sched/fair: Define sched_idle_cpu() only for SMP configurations
[linux-2.6-microblaze.git] / kernel / sched / fair.c
index ba749f5..fe4e0d7 100644 (file)
@@ -801,7 +801,7 @@ void post_init_entity_util_avg(struct task_struct *p)
                 * For !fair tasks do:
                 *
                update_cfs_rq_load_avg(now, cfs_rq);
-               attach_entity_load_avg(cfs_rq, se, 0);
+               attach_entity_load_avg(cfs_rq, se);
                switched_from_fair(rq, p);
                 *
                 * such that the next switched_to_fair() has the
@@ -3114,7 +3114,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
 {
        struct rq *rq = rq_of(cfs_rq);
 
-       if (&rq->cfs == cfs_rq || (flags & SCHED_CPUFREQ_MIGRATION)) {
+       if (&rq->cfs == cfs_rq) {
                /*
                 * There are a few boundary cases this might miss but it should
                 * get called often enough that that should (hopefully) not be
@@ -3366,16 +3366,17 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
 
        runnable_load_sum = (s64)se_runnable(se) * runnable_sum;
        runnable_load_avg = div_s64(runnable_load_sum, LOAD_AVG_MAX);
-       delta_sum = runnable_load_sum - se_weight(se) * se->avg.runnable_load_sum;
-       delta_avg = runnable_load_avg - se->avg.runnable_load_avg;
-
-       se->avg.runnable_load_sum = runnable_sum;
-       se->avg.runnable_load_avg = runnable_load_avg;
 
        if (se->on_rq) {
+               delta_sum = runnable_load_sum -
+                               se_weight(se) * se->avg.runnable_load_sum;
+               delta_avg = runnable_load_avg - se->avg.runnable_load_avg;
                add_positive(&cfs_rq->avg.runnable_load_avg, delta_avg);
                add_positive(&cfs_rq->avg.runnable_load_sum, delta_sum);
        }
+
+       se->avg.runnable_load_sum = runnable_sum;
+       se->avg.runnable_load_avg = runnable_load_avg;
 }
 
 static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
@@ -3520,7 +3521,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
  * Must call update_cfs_rq_load_avg() before this, since we rely on
  * cfs_rq->avg.last_update_time being current.
  */
-static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
        u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
 
@@ -3556,7 +3557,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 
        add_tg_cfs_propagate(cfs_rq, se->avg.load_sum);
 
-       cfs_rq_util_change(cfs_rq, flags);
+       cfs_rq_util_change(cfs_rq, 0);
 
        trace_pelt_cfs_tp(cfs_rq);
 }
@@ -3614,7 +3615,7 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
                 *
                 * IOW we're enqueueing a task on a new CPU.
                 */
-               attach_entity_load_avg(cfs_rq, se, SCHED_CPUFREQ_MIGRATION);
+               attach_entity_load_avg(cfs_rq, se);
                update_tg_load_avg(cfs_rq, 0);
 
        } else if (decayed) {
@@ -3711,6 +3712,20 @@ static inline unsigned long task_util_est(struct task_struct *p)
        return max(task_util(p), _task_util_est(p));
 }
 
+#ifdef CONFIG_UCLAMP_TASK
+static inline unsigned long uclamp_task_util(struct task_struct *p)
+{
+       return clamp(task_util_est(p),
+                    uclamp_eff_value(p, UCLAMP_MIN),
+                    uclamp_eff_value(p, UCLAMP_MAX));
+}
+#else
+static inline unsigned long uclamp_task_util(struct task_struct *p)
+{
+       return task_util_est(p);
+}
+#endif
+
 static inline void util_est_enqueue(struct cfs_rq *cfs_rq,
                                    struct task_struct *p)
 {
@@ -3822,7 +3837,7 @@ done:
 
 static inline int task_fits_capacity(struct task_struct *p, long capacity)
 {
-       return fits_capacity(task_util_est(p), capacity);
+       return fits_capacity(uclamp_task_util(p), capacity);
 }
 
 static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
@@ -3857,7 +3872,7 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 static inline void remove_entity_load_avg(struct sched_entity *se) {}
 
 static inline void
-attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) {}
+attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 static inline void
 detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 
@@ -5196,6 +5211,20 @@ static inline void update_overutilized_status(struct rq *rq)
 static inline void update_overutilized_status(struct rq *rq) { }
 #endif
 
+/* Runqueue only has SCHED_IDLE tasks enqueued */
+static int sched_idle_rq(struct rq *rq)
+{
+       return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running &&
+                       rq->nr_running);
+}
+
+#ifdef CONFIG_SMP
+static int sched_idle_cpu(int cpu)
+{
+       return sched_idle_rq(cpu_rq(cpu));
+}
+#endif
+
 /*
  * The enqueue_task method is called before nr_running is
  * increased. Here we update the fair scheduling stats and
@@ -5310,6 +5339,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
        struct sched_entity *se = &p->se;
        int task_sleep = flags & DEQUEUE_SLEEP;
        int idle_h_nr_running = task_has_idle_policy(p);
+       bool was_sched_idle = sched_idle_rq(rq);
 
        for_each_sched_entity(se) {
                cfs_rq = cfs_rq_of(se);
@@ -5356,6 +5386,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
        if (!se)
                sub_nr_running(rq, 1);
 
+       /* balance early to pull high priority tasks */
+       if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
+               rq->next_balance = jiffies;
+
        util_est_dequeue(&rq->cfs, p, task_sleep);
        hrtick_update(rq);
 }
@@ -5378,15 +5412,6 @@ static struct {
 
 #endif /* CONFIG_NO_HZ_COMMON */
 
-/* CPU only has SCHED_IDLE tasks enqueued */
-static int sched_idle_cpu(int cpu)
-{
-       struct rq *rq = cpu_rq(cpu);
-
-       return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running &&
-                       rq->nr_running);
-}
-
 static unsigned long cpu_load(struct rq *rq)
 {
        return cfs_rq_load_avg(&rq->cfs);
@@ -5588,7 +5613,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
        unsigned int min_exit_latency = UINT_MAX;
        u64 latest_idle_timestamp = 0;
        int least_loaded_cpu = this_cpu;
-       int shallowest_idle_cpu = -1, si_cpu = -1;
+       int shallowest_idle_cpu = -1;
        int i;
 
        /* Check if we have any choice: */
@@ -5597,6 +5622,9 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
 
        /* Traverse only the allowed CPUs */
        for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
+               if (sched_idle_cpu(i))
+                       return i;
+
                if (available_idle_cpu(i)) {
                        struct rq *rq = cpu_rq(i);
                        struct cpuidle_state *idle = idle_get_state(rq);
@@ -5619,12 +5647,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
                                latest_idle_timestamp = rq->idle_stamp;
                                shallowest_idle_cpu = i;
                        }
-               } else if (shallowest_idle_cpu == -1 && si_cpu == -1) {
-                       if (sched_idle_cpu(i)) {
-                               si_cpu = i;
-                               continue;
-                       }
-
+               } else if (shallowest_idle_cpu == -1) {
                        load = cpu_load(cpu_rq(i));
                        if (load < min_load) {
                                min_load = load;
@@ -5633,11 +5656,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
                }
        }
 
-       if (shallowest_idle_cpu != -1)
-               return shallowest_idle_cpu;
-       if (si_cpu != -1)
-               return si_cpu;
-       return least_loaded_cpu;
+       return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
 }
 
 static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p,
@@ -5790,7 +5809,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
  */
 static int select_idle_smt(struct task_struct *p, int target)
 {
-       int cpu, si_cpu = -1;
+       int cpu;
 
        if (!static_branch_likely(&sched_smt_present))
                return -1;
@@ -5798,13 +5817,11 @@ static int select_idle_smt(struct task_struct *p, int target)
        for_each_cpu(cpu, cpu_smt_mask(target)) {
                if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                        continue;
-               if (available_idle_cpu(cpu))
+               if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
                        return cpu;
-               if (si_cpu == -1 && sched_idle_cpu(cpu))
-                       si_cpu = cpu;
        }
 
-       return si_cpu;
+       return -1;
 }
 
 #else /* CONFIG_SCHED_SMT */
@@ -5828,12 +5845,13 @@ static inline int select_idle_smt(struct task_struct *p, int target)
  */
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
 {
+       struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
        struct sched_domain *this_sd;
        u64 avg_cost, avg_idle;
        u64 time, cost;
        s64 delta;
        int this = smp_processor_id();
-       int cpu, nr = INT_MAX, si_cpu = -1;
+       int cpu, nr = INT_MAX;
 
        this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
        if (!this_sd)
@@ -5859,15 +5877,13 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 
        time = cpu_clock(this);
 
-       for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
+       cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+
+       for_each_cpu_wrap(cpu, cpus, target) {
                if (!--nr)
-                       return si_cpu;
-               if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-                       continue;
-               if (available_idle_cpu(cpu))
+                       return -1;
+               if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
                        break;
-               if (si_cpu == -1 && sched_idle_cpu(cpu))
-                       si_cpu = cpu;
        }
 
        time = cpu_clock(this) - time;
@@ -6268,9 +6284,18 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                        if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                                continue;
 
-                       /* Skip CPUs that will be overutilized. */
                        util = cpu_util_next(cpu, p, cpu);
                        cpu_cap = capacity_of(cpu);
+                       spare_cap = cpu_cap - util;
+
+                       /*
+                        * Skip CPUs that cannot satisfy the capacity request.
+                        * IOW, placing the task there would make the CPU
+                        * overutilized. Take uclamp into account to see how
+                        * much capacity we can get out of the CPU; this is
+                        * aligned with schedutil_cpu_util().
+                        */
+                       util = uclamp_rq_util_with(cpu_rq(cpu), util, p);
                        if (!fits_capacity(util, cpu_cap))
                                continue;
 
@@ -6285,7 +6310,6 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                         * Find the CPU with the maximum spare capacity in
                         * the performance domain
                         */
-                       spare_cap = cpu_cap - util;
                        if (spare_cap > max_spare_cap) {
                                max_spare_cap = spare_cap;
                                max_spare_cap_cpu = cpu;
@@ -7780,29 +7804,11 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
                 */
 
                for_each_cpu(cpu, sched_group_span(sdg)) {
-                       struct sched_group_capacity *sgc;
-                       struct rq *rq = cpu_rq(cpu);
+                       unsigned long cpu_cap = capacity_of(cpu);
 
-                       /*
-                        * build_sched_domains() -> init_sched_groups_capacity()
-                        * gets here before we've attached the domains to the
-                        * runqueues.
-                        *
-                        * Use capacity_of(), which is set irrespective of domains
-                        * in update_cpu_capacity().
-                        *
-                        * This avoids capacity from being 0 and
-                        * causing divide-by-zero issues on boot.
-                        */
-                       if (unlikely(!rq->sd)) {
-                               capacity += capacity_of(cpu);
-                       } else {
-                               sgc = rq->sd->groups->sgc;
-                               capacity += sgc->capacity;
-                       }
-
-                       min_capacity = min(capacity, min_capacity);
-                       max_capacity = max(capacity, max_capacity);
+                       capacity += cpu_cap;
+                       min_capacity = min(cpu_cap, min_capacity);
+                       max_capacity = max(cpu_cap, max_capacity);
                }
        } else  {
                /*
@@ -8168,14 +8174,18 @@ static bool update_sd_pick_busiest(struct lb_env *env,
 
        case group_has_spare:
                /*
-                * Select not overloaded group with lowest number of
-                * idle cpus. We could also compare the spare capacity
-                * which is more stable but it can end up that the
-                * group has less spare capacity but finally more idle
+                * Select not overloaded group with lowest number of idle cpus
+                * and highest number of running tasks. We could also compare
+                * the spare capacity which is more stable but it can end up
+                * that the group has less spare capacity but finally more idle
                 * CPUs which means less opportunity to pull tasks.
                 */
-               if (sgs->idle_cpus >= busiest->idle_cpus)
+               if (sgs->idle_cpus > busiest->idle_cpus)
+                       return false;
+               else if ((sgs->idle_cpus == busiest->idle_cpus) &&
+                        (sgs->sum_nr_running <= busiest->sum_nr_running))
                        return false;
+
                break;
        }
 
@@ -9529,6 +9539,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
 {
        int continue_balancing = 1;
        int cpu = rq->cpu;
+       int busy = idle != CPU_IDLE && !sched_idle_cpu(cpu);
        unsigned long interval;
        struct sched_domain *sd;
        /* Earliest time when we have to do rebalance again */
@@ -9565,7 +9576,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
                        break;
                }
 
-               interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
+               interval = get_sd_balance_interval(sd, busy);
 
                need_serialize = sd->flags & SD_SERIALIZE;
                if (need_serialize) {
@@ -9581,9 +9592,10 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
                                 * state even if we migrated tasks. Update it.
                                 */
                                idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
+                               busy = idle != CPU_IDLE && !sched_idle_cpu(cpu);
                        }
                        sd->last_balance = jiffies;
-                       interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
+                       interval = get_sd_balance_interval(sd, busy);
                }
                if (need_serialize)
                        spin_unlock(&balancing);
@@ -10333,6 +10345,9 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
        if (!task_on_rq_queued(p))
                return;
 
+       if (rq->cfs.nr_running == 1)
+               return;
+
        /*
         * Reschedule if we are currently running on this runqueue and
         * our priority decreased, or if we are not currently running on
@@ -10423,7 +10438,7 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
 
        /* Synchronize entity with its cfs_rq */
        update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
-       attach_entity_load_avg(cfs_rq, se, 0);
+       attach_entity_load_avg(cfs_rq, se);
        update_tg_load_avg(cfs_rq, false);
        propagate_entity_cfs_rq(se);
 }