sched/pelt: Check that *_avg are null when *_sum are

[linux-2.6-microblaze.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 51d72ab..198514d 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -268,33 +268,11 @@ const struct sched_class fair_sched_class;
   */
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
-static inline struct task_struct *task_of(struct sched_entity *se)
-{
-       SCHED_WARN_ON(!entity_is_task(se));
-       return container_of(se, struct task_struct, se);
-}
  
  /* Walk up scheduling entities hierarchy */
  #define for_each_sched_entity(se) \
                 for (; se; se = se->parent)
  
-static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
-{
-       return p->se.cfs_rq;
-}
-
-/* runqueue on which this entity is (to be) queued */
-static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
-{
-       return se->cfs_rq;
-}
-
-/* runqueue "owned" by this group */
-static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
-{
-       return grp->my_q;
-}
-
  static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
  {
         if (!path)
@@ -455,33 +433,9 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
  
  #else  /* !CONFIG_FAIR_GROUP_SCHED */
  
-static inline struct task_struct *task_of(struct sched_entity *se)
-{
-       return container_of(se, struct task_struct, se);
-}
-
  #define for_each_sched_entity(se) \
                 for (; se; se = NULL)
  
-static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
-{
-       return &task_rq(p)->cfs;
-}
-
-static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
-{
-       struct task_struct *p = task_of(se);
-       struct rq *rq = task_rq(p);
-
-       return &rq->cfs;
-}
-
-/* runqueue "owned" by this group */
-static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
-{
-       return NULL;
-}
-
  static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
  {
         if (path)
@@ -3139,7 +3093,7 @@ void reweight_task(struct task_struct *p, int prio)
   *
   *                     tg->weight * grq->load.weight
   *   ge->load.weight = -----------------------------               (1)
- *                       \Sum grq->load.weight
+ *                       \Sum grq->load.weight
   *
   * Now, because computing that sum is prohibitively expensive to compute (been
   * there, done that) we approximate it with this average stuff. The average
@@ -3153,7 +3107,7 @@ void reweight_task(struct task_struct *p, int prio)
   *
   *                     tg->weight * grq->avg.load_avg
   *   ge->load.weight = ------------------------------              (3)
- *                             tg->load_avg
+ *                             tg->load_avg
   *
   * Where: tg->load_avg ~= \Sum grq->avg.load_avg
   *
@@ -3169,7 +3123,7 @@ void reweight_task(struct task_struct *p, int prio)
   *
   *                     tg->weight * grq->load.weight
   *   ge->load.weight = ----------------------------- = tg->weight   (4)
- *                         grp->load.weight
+ *                         grp->load.weight
   *
   * That is, the sum collapses because all other CPUs are idle; the UP scenario.
   *
@@ -3188,7 +3142,7 @@ void reweight_task(struct task_struct *p, int prio)
   *
   *                     tg->weight * grq->load.weight
   *   ge->load.weight = -----------------------------              (6)
- *                             tg_load_avg'
+ *                             tg_load_avg'
   *
   * Where:
   *
@@ -3499,10 +3453,9 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
  static inline void
  update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
  {
-       long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
+       long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
         unsigned long load_avg;
         u64 load_sum = 0;
-       s64 delta_sum;
         u32 divider;
  
         if (!runnable_sum)
@@ -3549,13 +3502,13 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
         load_sum = (s64)se_weight(se) * runnable_sum;
         load_avg = div_s64(load_sum, divider);
  
-       delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
-       delta_avg = load_avg - se->avg.load_avg;
+       delta = load_avg - se->avg.load_avg;
  
         se->avg.load_sum = runnable_sum;
         se->avg.load_avg = load_avg;
-       add_positive(&cfs_rq->avg.load_avg, delta_avg);
-       add_positive(&cfs_rq->avg.load_sum, delta_sum);
+
+       add_positive(&cfs_rq->avg.load_avg, delta);
+       cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
  }
  
  static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
@@ -3766,11 +3719,17 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
   */
  static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
+       /*
+        * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+        * See ___update_load_avg() for details.
+        */
+       u32 divider = get_pelt_divider(&cfs_rq->avg);
+
         dequeue_load_avg(cfs_rq, se);
         sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
-       sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
+       cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
         sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg);
-       sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum);
+       cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
  
         add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
  
@@ -5935,11 +5894,15 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
  
         /* Traverse only the allowed CPUs */
         for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
+               struct rq *rq = cpu_rq(i);
+
+               if (!sched_core_cookie_match(rq, p))
+                       continue;
+
                 if (sched_idle_cpu(i))
                         return i;
  
                 if (available_idle_cpu(i)) {
-                       struct rq *rq = cpu_rq(i);
                         struct cpuidle_state *idle = idle_get_state(rq);
                         if (idle && idle->exit_latency < min_exit_latency) {
                                 /*
@@ -6025,9 +5988,10 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
         return new_cpu;
  }
  
-static inline int __select_idle_cpu(int cpu)
+static inline int __select_idle_cpu(int cpu, struct task_struct *p)
  {
-       if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
+       if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) &&
+           sched_cpu_cookie_match(cpu_rq(cpu), p))
                 return cpu;
  
         return -1;
@@ -6097,7 +6061,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
         int cpu;
  
         if (!static_branch_likely(&sched_smt_present))
-               return __select_idle_cpu(core);
+               return __select_idle_cpu(core, p);
  
         for_each_cpu(cpu, cpu_smt_mask(core)) {
                 if (!available_idle_cpu(cpu)) {
@@ -6153,7 +6117,7 @@ static inline bool test_idle_cores(int cpu, bool def)
  
  static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
  {
-       return __select_idle_cpu(core);
+       return __select_idle_cpu(core, p);
  }
  
  static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
@@ -6210,7 +6174,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
                 } else {
                         if (!--nr)
                                 return -1;
-                       idle_cpu = __select_idle_cpu(cpu);
+                       idle_cpu = __select_idle_cpu(cpu, p);
                         if ((unsigned int)idle_cpu < nr_cpumask_bits)
                                 break;
                 }
@@ -7573,6 +7537,14 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
  
         if (sysctl_sched_migration_cost == -1)
                 return 1;
+
+       /*
+        * Don't migrate task if the task's cookie does not match
+        * with the destination CPU's core cookie.
+        */
+       if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
+               return 1;
+
         if (sysctl_sched_migration_cost == 0)
                 return 0;
  
@@ -8054,6 +8026,15 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
         if (cfs_rq->avg.runnable_sum)
                 return false;
  
+       /*
+        * _avg must be null when _sum are null because _avg = _sum / divider
+        * Make sure that rounding and/or propagation of PELT values never
+        * break this.
+        */
+       SCHED_WARN_ON(cfs_rq->avg.load_avg ||
+                     cfs_rq->avg.util_avg ||
+                     cfs_rq->avg.runnable_avg);
+
         return true;
  }
  
@@ -8080,7 +8061,7 @@ static bool __update_blocked_fair(struct rq *rq, bool *done)
                 /* Propagate pending load changes to the parent, if any: */
                 se = cfs_rq->tg->se[cpu];
                 if (se && !skip_blocked_update(se))
-                       update_load_avg(cfs_rq_of(se), se, 0);
+                       update_load_avg(cfs_rq_of(se), se, UPDATE_TG);
  
                 /*
                  * There can be a lot of idle CPU cgroups.  Don't let fully
@@ -8903,6 +8884,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
                                         p->cpus_ptr))
                         continue;
  
+               /* Skip over this group if no cookie matched */
+               if (!sched_group_cookie_match(cpu_rq(this_cpu), p, group))
+                       continue;
+
                 local_group = cpumask_test_cpu(this_cpu,
                                                sched_group_span(group));
  
@@ -10813,6 +10798,119 @@ static void rq_offline_fair(struct rq *rq)
  
  #endif /* CONFIG_SMP */
  
+#ifdef CONFIG_SCHED_CORE
+static inline bool
+__entity_slice_used(struct sched_entity *se, int min_nr_tasks)
+{
+       u64 slice = sched_slice(cfs_rq_of(se), se);
+       u64 rtime = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+
+       return (rtime * min_nr_tasks > slice);
+}
+
+#define MIN_NR_TASKS_DURING_FORCEIDLE  2
+static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
+{
+       if (!sched_core_enabled(rq))
+               return;
+
+       /*
+        * If runqueue has only one task which used up its slice and
+        * if the sibling is forced idle, then trigger schedule to
+        * give forced idle task a chance.
+        *
+        * sched_slice() considers only this active rq and it gets the
+        * whole slice. But during force idle, we have siblings acting
+        * like a single runqueue and hence we need to consider runnable
+        * tasks on this CPU and the forced idle CPU. Ideally, we should
+        * go through the forced idle rq, but that would be a perf hit.
+        * We can assume that the forced idle CPU has at least
+        * MIN_NR_TASKS_DURING_FORCEIDLE - 1 tasks and use that to check
+        * if we need to give up the CPU.
+        */
+       if (rq->core->core_forceidle && rq->cfs.nr_running == 1 &&
+           __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE))
+               resched_curr(rq);
+}
+
+/*
+ * se_fi_update - Update the cfs_rq->min_vruntime_fi in a CFS hierarchy if needed.
+ */
+static void se_fi_update(struct sched_entity *se, unsigned int fi_seq, bool forceidle)
+{
+       for_each_sched_entity(se) {
+               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+               if (forceidle) {
+                       if (cfs_rq->forceidle_seq == fi_seq)
+                               break;
+                       cfs_rq->forceidle_seq = fi_seq;
+               }
+
+               cfs_rq->min_vruntime_fi = cfs_rq->min_vruntime;
+       }
+}
+
+void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi)
+{
+       struct sched_entity *se = &p->se;
+
+       if (p->sched_class != &fair_sched_class)
+               return;
+
+       se_fi_update(se, rq->core->core_forceidle_seq, in_fi);
+}
+
+bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi)
+{
+       struct rq *rq = task_rq(a);
+       struct sched_entity *sea = &a->se;
+       struct sched_entity *seb = &b->se;
+       struct cfs_rq *cfs_rqa;
+       struct cfs_rq *cfs_rqb;
+       s64 delta;
+
+       SCHED_WARN_ON(task_rq(b)->core != rq->core);
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+       /*
+        * Find an se in the hierarchy for tasks a and b, such that the se's
+        * are immediate siblings.
+        */
+       while (sea->cfs_rq->tg != seb->cfs_rq->tg) {
+               int sea_depth = sea->depth;
+               int seb_depth = seb->depth;
+
+               if (sea_depth >= seb_depth)
+                       sea = parent_entity(sea);
+               if (sea_depth <= seb_depth)
+                       seb = parent_entity(seb);
+       }
+
+       se_fi_update(sea, rq->core->core_forceidle_seq, in_fi);
+       se_fi_update(seb, rq->core->core_forceidle_seq, in_fi);
+
+       cfs_rqa = sea->cfs_rq;
+       cfs_rqb = seb->cfs_rq;
+#else
+       cfs_rqa = &task_rq(a)->cfs;
+       cfs_rqb = &task_rq(b)->cfs;
+#endif
+
+       /*
+        * Find delta after normalizing se's vruntime with its cfs_rq's
+        * min_vruntime_fi, which would have been updated in prior calls
+        * to se_fi_update().
+        */
+       delta = (s64)(sea->vruntime - seb->vruntime) +
+               (s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi);
+
+       return delta > 0;
+}
+#else
+static inline void task_tick_core(struct rq *rq, struct task_struct *curr) {}
+#endif
+
  /*
   * scheduler tick hitting a task of our scheduling class.
   *
@@ -10836,6 +10934,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
  
         update_misfit_status(curr, rq);
         update_overutilized_status(task_rq(curr));
+
+       task_tick_core(rq, curr);
  }
  
  /*