sched/fair: Move update_nohz_stats() to the CONFIG_NO_HZ_COMMON block to simplify...

[linux-2.6-microblaze.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 6d73bdb..7ea3b93 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -49,7 +49,7 @@ static unsigned int normalized_sysctl_sched_latency   = 6000000ULL;
   *
   * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
   */
-enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
+unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
  
  /*
   * Minimal preemption granularity for CPU-bound tasks:
@@ -113,6 +113,13 @@ int __weak arch_asym_cpu_priority(int cpu)
   */
  #define fits_capacity(cap, max)        ((cap) * 1280 < (max) * 1024)
  
+/*
+ * The margin used when comparing CPU capacities.
+ * is 'cap1' noticeably greater than 'cap2'
+ *
+ * (default: ~5%)
+ */
+#define capacity_greater(cap1, cap2) ((cap1) * 1024 > (cap2) * 1078)
  #endif
  
  #ifdef CONFIG_CFS_BANDWIDTH
@@ -627,15 +634,10 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
   * Scheduling class statistics methods:
   */
  
-int sched_proc_update_handler(struct ctl_table *table, int write,
-               void *buffer, size_t *lenp, loff_t *ppos)
+int sched_update_scaling(void)
  {
-       int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
         unsigned int factor = get_update_sysctl_factor();
  
-       if (ret || !write)
-               return ret;
-
         sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
                                         sysctl_sched_min_granularity);
  
@@ -685,7 +687,13 @@ static u64 __sched_period(unsigned long nr_running)
   */
  static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
-       u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq);
+       unsigned int nr_running = cfs_rq->nr_running;
+       u64 slice;
+
+       if (sched_feat(ALT_PERIOD))
+               nr_running = rq_of(cfs_rq)->cfs.h_nr_running;
+
+       slice = __sched_period(nr_running + !se->on_rq);
  
         for_each_sched_entity(se) {
                 struct load_weight *load;
@@ -702,6 +710,10 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
                 }
                 slice = __calc_delta(slice, se->load.weight, load);
         }
+
+       if (sched_feat(BASE_SLICE))
+               slice = max(slice, (u64)sysctl_sched_min_granularity);
+
         return slice;
  }
  
@@ -6038,11 +6050,9 @@ static inline bool test_idle_cores(int cpu, bool def)
  {
         struct sched_domain_shared *sds;
  
-       if (static_branch_likely(&sched_smt_present)) {
-               sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
-               if (sds)
-                       return READ_ONCE(sds->has_idle_cores);
-       }
+       sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+       if (sds)
+               return READ_ONCE(sds->has_idle_cores);
  
         return def;
  }
@@ -6112,6 +6122,24 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
         return -1;
  }
  
+/*
+ * Scan the local SMT mask for idle CPUs.
+ */
+static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
+{
+       int cpu;
+
+       for_each_cpu(cpu, cpu_smt_mask(target)) {
+               if (!cpumask_test_cpu(cpu, p->cpus_ptr) ||
+                   !cpumask_test_cpu(cpu, sched_domain_span(sd)))
+                       continue;
+               if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
+                       return cpu;
+       }
+
+       return -1;
+}
+
  #else /* CONFIG_SCHED_SMT */
  
  static inline void set_idle_cores(int cpu, int val)
@@ -6128,6 +6156,11 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
         return __select_idle_cpu(core);
  }
  
+static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
+{
+       return -1;
+}
+
  #endif /* CONFIG_SCHED_SMT */
  
  /*
@@ -6135,11 +6168,10 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
   * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
   * average idle time for this rq (as found in rq->avg_idle).
   */
-static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
+static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_core, int target)
  {
         struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
         int i, cpu, idle_cpu = -1, nr = INT_MAX;
-       bool smt = test_idle_cores(target, false);
         int this = smp_processor_id();
         struct sched_domain *this_sd;
         u64 time;
@@ -6150,7 +6182,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
  
         cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
  
-       if (sched_feat(SIS_PROP) && !smt) {
+       if (sched_feat(SIS_PROP) && !has_idle_core) {
                 u64 avg_cost, avg_idle, span_avg;
  
                 /*
@@ -6170,7 +6202,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
         }
  
         for_each_cpu_wrap(cpu, cpus, target) {
-               if (smt) {
+               if (has_idle_core) {
                         i = select_idle_core(p, cpu, cpus, &idle_cpu);
                         if ((unsigned int)i < nr_cpumask_bits)
                                 return i;
@@ -6184,10 +6216,10 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
                 }
         }
  
-       if (smt)
+       if (has_idle_core)
                 set_idle_cores(this, false);
  
-       if (sched_feat(SIS_PROP) && !smt) {
+       if (sched_feat(SIS_PROP) && !has_idle_core) {
                 time = cpu_clock(this) - time;
                 update_avg(&this_sd->avg_scan_cost, time);
         }
@@ -6242,6 +6274,7 @@ static inline bool asym_fits_capacity(int task_util, int cpu)
   */
  static int select_idle_sibling(struct task_struct *p, int prev, int target)
  {
+       bool has_idle_core = false;
         struct sched_domain *sd;
         unsigned long task_util;
         int i, recent_used_cpu;
@@ -6321,7 +6354,17 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
         if (!sd)
                 return target;
  
-       i = select_idle_cpu(p, sd, target);
+       if (sched_smt_active()) {
+               has_idle_core = test_idle_cores(target, false);
+
+               if (!has_idle_core && cpus_share_cache(prev, target)) {
+                       i = select_idle_smt(p, sd, prev);
+                       if ((unsigned int)i < nr_cpumask_bits)
+                               return i;
+               }
+       }
+
+       i = select_idle_cpu(p, sd, has_idle_core, target);
         if ((unsigned)i < nr_cpumask_bits)
                 return i;
  
@@ -7422,6 +7465,7 @@ enum migration_type {
  #define LBF_NEED_BREAK 0x02
  #define LBF_DST_PINNED  0x04
  #define LBF_SOME_PINNED        0x08
+#define LBF_ACTIVE_LB  0x10
  
  struct lb_env {
         struct sched_domain     *sd;
@@ -7567,6 +7611,10 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
         if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
                 return 0;
  
+       /* Disregard pcpu kthreads; they are where they need to be. */
+       if ((p->flags & PF_KTHREAD) && kthread_is_per_cpu(p))
+               return 0;
+
         if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
                 int cpu;
  
@@ -7579,10 +7627,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
                  * our sched_group. We may want to revisit it if we couldn't
                  * meet load balance goals by pulling other tasks on src_cpu.
                  *
-                * Avoid computing new_dst_cpu for NEWLY_IDLE or if we have
-                * already computed one in current iteration.
+                * Avoid computing new_dst_cpu
+                * - for NEWLY_IDLE
+                * - if we have already computed one in current iteration
+                * - if it's an active balance
                  */
-               if (env->idle == CPU_NEWLY_IDLE || (env->flags & LBF_DST_PINNED))
+               if (env->idle == CPU_NEWLY_IDLE ||
+                   env->flags & (LBF_DST_PINNED | LBF_ACTIVE_LB))
                         return 0;
  
                 /* Prevent to re-select dst_cpu via env's CPUs: */
@@ -7607,10 +7658,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
  
         /*
          * Aggressive migration if:
-        * 1) destination numa is preferred
-        * 2) task is cache cold, or
-        * 3) too many balance attempts have failed.
+        * 1) active balance
+        * 2) destination numa is preferred
+        * 3) task is cache cold, or
+        * 4) too many balance attempts have failed.
          */
+       if (env->flags & LBF_ACTIVE_LB)
+               return 1;
+
         tsk_cache_hot = migrate_degrades_locality(p, env);
         if (tsk_cache_hot == -1)
                 tsk_cache_hot = task_hot(p, env);
@@ -8352,26 +8407,6 @@ group_is_overloaded(unsigned int imbalance_pct, struct sg_lb_stats *sgs)
         return false;
  }
  
-/*
- * group_smaller_min_cpu_capacity: Returns true if sched_group sg has smaller
- * per-CPU capacity than sched_group ref.
- */
-static inline bool
-group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
-{
-       return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity);
-}
-
-/*
- * group_smaller_max_cpu_capacity: Returns true if sched_group sg has smaller
- * per-CPU capacity_orig than sched_group ref.
- */
-static inline bool
-group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
-{
-       return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity);
-}
-
  static inline enum
  group_type group_classify(unsigned int imbalance_pct,
                           struct sched_group *group,
@@ -8395,28 +8430,6 @@ group_type group_classify(unsigned int imbalance_pct,
         return group_has_spare;
  }
  
-static bool update_nohz_stats(struct rq *rq)
-{
-#ifdef CONFIG_NO_HZ_COMMON
-       unsigned int cpu = rq->cpu;
-
-       if (!rq->has_blocked_load)
-               return false;
-
-       if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
-               return false;
-
-       if (!time_after(jiffies, READ_ONCE(rq->last_blocked_load_update_tick)))
-               return true;
-
-       update_blocked_averages(cpu);
-
-       return rq->has_blocked_load;
-#else
-       return false;
-#endif
-}
-
  /**
   * update_sg_lb_stats - Update sched_group's statistics for load balancing.
   * @env: The load balancing environment.
@@ -8527,7 +8540,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
          * internally or be covered by avg_load imbalance (eventually).
          */
         if (sgs->group_type == group_misfit_task &&
-           (!group_smaller_max_cpu_capacity(sg, sds->local) ||
+           (!capacity_greater(capacity_of(env->dst_cpu), sg->sgc->max_capacity) ||
              sds->local_stat.group_type != group_has_spare))
                 return false;
  
@@ -8611,7 +8624,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
          */
         if ((env->sd->flags & SD_ASYM_CPUCAPACITY) &&
             (sgs->group_type <= group_fully_busy) &&
-           (group_smaller_min_cpu_capacity(sds->local, sg)))
+           (capacity_greater(sg->sgc->min_capacity, capacity_of(env->dst_cpu))))
                 return false;
  
         return true;
@@ -9411,7 +9424,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
                  * average load.
                  */
                 if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
-                   capacity_of(env->dst_cpu) < capacity &&
+                   !capacity_greater(capacity_of(env->dst_cpu), capacity) &&
                     nr_running == 1)
                         continue;
  
@@ -9801,9 +9814,6 @@ more_balance:
                                         active_load_balance_cpu_stop, busiest,
                                         &busiest->active_balance_work);
                         }
-
-                       /* We've kicked active balancing, force task migration. */
-                       sd->nr_balance_failed = sd->cache_nice_tries+1;
                 }
         } else {
                 sd->nr_balance_failed = 0;
@@ -9953,13 +9963,7 @@ static int active_load_balance_cpu_stop(void *data)
                         .src_cpu        = busiest_rq->cpu,
                         .src_rq         = busiest_rq,
                         .idle           = CPU_IDLE,
-                       /*
-                        * can_migrate_task() doesn't need to compute new_dst_cpu
-                        * for active balancing. Since we have CPU_IDLE, but no
-                        * @dst_grpmask we need to make that test go away with lying
-                        * about DST_PINNED.
-                        */
-                       .flags          = LBF_DST_PINNED,
+                       .flags          = LBF_ACTIVE_LB,
                 };
  
                 schedstat_inc(sd->alb_count);
@@ -10380,6 +10384,24 @@ out:
         WRITE_ONCE(nohz.has_blocked, 1);
  }
  
+static bool update_nohz_stats(struct rq *rq)
+{
+       unsigned int cpu = rq->cpu;
+
+       if (!rq->has_blocked_load)
+               return false;
+
+       if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
+               return false;
+
+       if (!time_after(jiffies, READ_ONCE(rq->last_blocked_load_update_tick)))
+               return true;
+
+       update_blocked_averages(cpu);
+
+       return rq->has_blocked_load;
+}
+
  /*
   * Internal function that runs load balance for all idle cpus. The load balance
   * can be a simple update of blocked load or a complete load balance with