sched/fair: Introduce set_curr_task() helper

[linux-2.6-microblaze.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index d7babcc..ce69fc7 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1109,10 +1109,10 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  
         p->sched_class->set_cpus_allowed(p, new_mask);
  
-       if (running)
-               p->sched_class->set_curr_task(rq);
         if (queued)
                 enqueue_task(rq, p, ENQUEUE_RESTORE);
+       if (running)
+               set_curr_task(rq, p);
  }
  
  /*
@@ -3707,10 +3707,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
  
         p->prio = prio;
  
-       if (running)
-               p->sched_class->set_curr_task(rq);
         if (queued)
                 enqueue_task(rq, p, queue_flag);
+       if (running)
+               set_curr_task(rq, p);
  
         check_class_changed(rq, p, prev_class, oldprio);
  out_unlock:
@@ -4263,8 +4263,6 @@ change:
         prev_class = p->sched_class;
         __setscheduler(rq, p, attr, pi);
  
-       if (running)
-               p->sched_class->set_curr_task(rq);
         if (queued) {
                 /*
                  * We enqueue to tail when the priority of a task is
@@ -4275,6 +4273,8 @@ change:
  
                 enqueue_task(rq, p, queue_flags);
         }
+       if (running)
+               set_curr_task(rq, p);
  
         check_class_changed(rq, p, prev_class, oldprio);
         preempt_disable(); /* avoid rq from going away on us */
@@ -5439,10 +5439,10 @@ void sched_setnuma(struct task_struct *p, int nid)
  
         p->numa_preferred_nid = nid;
  
-       if (running)
-               p->sched_class->set_curr_task(rq);
         if (queued)
                 enqueue_task(rq, p, ENQUEUE_RESTORE);
+       if (running)
+               set_curr_task(rq, p);
         task_rq_unlock(rq, p, &rf);
  }
  #endif /* CONFIG_NUMA_BALANCING */
@@ -5739,6 +5739,8 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
         }
  }
  #else /* !CONFIG_SCHED_DEBUG */
+
+# define sched_debug_enabled 0
  # define sched_domain_debug(sd, cpu) do { } while (0)
  static inline bool sched_debug(void)
  {
@@ -5933,10 +5935,8 @@ static void free_sched_groups(struct sched_group *sg, int free_sgc)
         } while (sg != first);
  }
  
-static void free_sched_domain(struct rcu_head *rcu)
+static void destroy_sched_domain(struct sched_domain *sd)
  {
-       struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
-
         /*
          * If its an overlapping domain it has private groups, iterate and
          * nuke them all.
@@ -5947,18 +5947,26 @@ static void free_sched_domain(struct rcu_head *rcu)
                 kfree(sd->groups->sgc);
                 kfree(sd->groups);
         }
+       if (sd->shared && atomic_dec_and_test(&sd->shared->ref))
+               kfree(sd->shared);
         kfree(sd);
  }
  
-static void destroy_sched_domain(struct sched_domain *sd, int cpu)
+static void destroy_sched_domains_rcu(struct rcu_head *rcu)
  {
-       call_rcu(&sd->rcu, free_sched_domain);
+       struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
+
+       while (sd) {
+               struct sched_domain *parent = sd->parent;
+               destroy_sched_domain(sd);
+               sd = parent;
+       }
  }
  
-static void destroy_sched_domains(struct sched_domain *sd, int cpu)
+static void destroy_sched_domains(struct sched_domain *sd)
  {
-       for (; sd; sd = sd->parent)
-               destroy_sched_domain(sd, cpu);
+       if (sd)
+               call_rcu(&sd->rcu, destroy_sched_domains_rcu);
  }
  
  /*
@@ -5973,14 +5981,14 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
  DEFINE_PER_CPU(struct sched_domain *, sd_llc);
  DEFINE_PER_CPU(int, sd_llc_size);
  DEFINE_PER_CPU(int, sd_llc_id);
+DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
  DEFINE_PER_CPU(struct sched_domain *, sd_numa);
-DEFINE_PER_CPU(struct sched_domain *, sd_busy);
  DEFINE_PER_CPU(struct sched_domain *, sd_asym);
  
  static void update_top_cache_domain(int cpu)
  {
+       struct sched_domain_shared *sds = NULL;
         struct sched_domain *sd;
-       struct sched_domain *busy_sd = NULL;
         int id = cpu;
         int size = 1;
  
@@ -5988,13 +5996,13 @@ static void update_top_cache_domain(int cpu)
         if (sd) {
                 id = cpumask_first(sched_domain_span(sd));
                 size = cpumask_weight(sched_domain_span(sd));
-               busy_sd = sd->parent; /* sd_busy */
+               sds = sd->shared;
         }
-       rcu_assign_pointer(per_cpu(sd_busy, cpu), busy_sd);
  
         rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
         per_cpu(sd_llc_size, cpu) = size;
         per_cpu(sd_llc_id, cpu) = id;
+       rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
  
         sd = lowest_flag_domain(cpu, SD_NUMA);
         rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
@@ -6030,7 +6038,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
                          */
                         if (parent->flags & SD_PREFER_SIBLING)
                                 tmp->flags |= SD_PREFER_SIBLING;
-                       destroy_sched_domain(parent, cpu);
+                       destroy_sched_domain(parent);
                 } else
                         tmp = tmp->parent;
         }
@@ -6038,7 +6046,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
         if (sd && sd_degenerate(sd)) {
                 tmp = sd;
                 sd = sd->parent;
-               destroy_sched_domain(tmp, cpu);
+               destroy_sched_domain(tmp);
                 if (sd)
                         sd->child = NULL;
         }
@@ -6048,7 +6056,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
         rq_attach_root(rq, rd);
         tmp = rq->sd;
         rcu_assign_pointer(rq->sd, sd);
-       destroy_sched_domains(tmp, cpu);
+       destroy_sched_domains(tmp);
  
         update_top_cache_domain(cpu);
  }
@@ -6291,7 +6299,6 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
                 return;
  
         update_group_capacity(sd, cpu);
-       atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
  }
  
  /*
@@ -6379,6 +6386,9 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
         WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
         *per_cpu_ptr(sdd->sd, cpu) = NULL;
  
+       if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))
+               *per_cpu_ptr(sdd->sds, cpu) = NULL;
+
         if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
                 *per_cpu_ptr(sdd->sg, cpu) = NULL;
  
@@ -6423,10 +6433,12 @@ static int sched_domains_curr_level;
  
  static struct sched_domain *
  sd_init(struct sched_domain_topology_level *tl,
+       const struct cpumask *cpu_map,
         struct sched_domain *child, int cpu)
  {
-       struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
-       int sd_weight, sd_flags = 0;
+       struct sd_data *sdd = &tl->data;
+       struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
+       int sd_id, sd_weight, sd_flags = 0;
  
  #ifdef CONFIG_NUMA
         /*
@@ -6481,6 +6493,9 @@ sd_init(struct sched_domain_topology_level *tl,
  #endif
         };
  
+       cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
+       sd_id = cpumask_first(sched_domain_span(sd));
+
         /*
          * Convert topological properties into behaviour.
          */
@@ -6523,7 +6538,17 @@ sd_init(struct sched_domain_topology_level *tl,
                 sd->idle_idx = 1;
         }
  
-       sd->private = &tl->data;
+       /*
+        * For all levels sharing cache; connect a sched_domain_shared
+        * instance.
+        */
+       if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+               sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
+               atomic_inc(&sd->shared->ref);
+               atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
+       }
+
+       sd->private = sdd;
  
         return sd;
  }
@@ -6550,6 +6575,9 @@ static struct sched_domain_topology_level *sched_domain_topology =
  
  void set_sched_topology(struct sched_domain_topology_level *tl)
  {
+       if (WARN_ON_ONCE(sched_smp_initialized))
+               return;
+
         sched_domain_topology = tl;
  }
  
@@ -6830,6 +6858,10 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
                 if (!sdd->sd)
                         return -ENOMEM;
  
+               sdd->sds = alloc_percpu(struct sched_domain_shared *);
+               if (!sdd->sds)
+                       return -ENOMEM;
+
                 sdd->sg = alloc_percpu(struct sched_group *);
                 if (!sdd->sg)
                         return -ENOMEM;
@@ -6840,6 +6872,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
  
                 for_each_cpu(j, cpu_map) {
                         struct sched_domain *sd;
+                       struct sched_domain_shared *sds;
                         struct sched_group *sg;
                         struct sched_group_capacity *sgc;
  
@@ -6850,6 +6883,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
  
                         *per_cpu_ptr(sdd->sd, j) = sd;
  
+                       sds = kzalloc_node(sizeof(struct sched_domain_shared),
+                                       GFP_KERNEL, cpu_to_node(j));
+                       if (!sds)
+                               return -ENOMEM;
+
+                       *per_cpu_ptr(sdd->sds, j) = sds;
+
                         sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
                                         GFP_KERNEL, cpu_to_node(j));
                         if (!sg)
@@ -6889,6 +6929,8 @@ static void __sdt_free(const struct cpumask *cpu_map)
                                 kfree(*per_cpu_ptr(sdd->sd, j));
                         }
  
+                       if (sdd->sds)
+                               kfree(*per_cpu_ptr(sdd->sds, j));
                         if (sdd->sg)
                                 kfree(*per_cpu_ptr(sdd->sg, j));
                         if (sdd->sgc)
@@ -6896,6 +6938,8 @@ static void __sdt_free(const struct cpumask *cpu_map)
                 }
                 free_percpu(sdd->sd);
                 sdd->sd = NULL;
+               free_percpu(sdd->sds);
+               sdd->sds = NULL;
                 free_percpu(sdd->sg);
                 sdd->sg = NULL;
                 free_percpu(sdd->sgc);
@@ -6907,9 +6951,8 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
                 const struct cpumask *cpu_map, struct sched_domain_attr *attr,
                 struct sched_domain *child, int cpu)
  {
-       struct sched_domain *sd = sd_init(tl, child, cpu);
+       struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu);
  
-       cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
         if (child) {
                 sd->level = child->level + 1;
                 sched_domain_level_max = max(sched_domain_level_max, sd->level);
@@ -7006,7 +7049,7 @@ static int build_sched_domains(const struct cpumask *cpu_map,
         }
         rcu_read_unlock();
  
-       if (rq) {
+       if (rq && sched_debug_enabled) {
                 pr_info("span: %*pbl (max cpu_capacity = %lu)\n",
                         cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
         }
@@ -7369,6 +7412,22 @@ int sched_cpu_dying(unsigned int cpu)
  }
  #endif
  
+#ifdef CONFIG_SCHED_SMT
+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
+
+static void sched_init_smt(void)
+{
+       /*
+        * We've enumerated all CPUs and will assume that if any CPU
+        * has SMT siblings, CPU0 will too.
+        */
+       if (cpumask_weight(cpu_smt_mask(0)) > 1)
+               static_branch_enable(&sched_smt_present);
+}
+#else
+static inline void sched_init_smt(void) { }
+#endif
+
  void __init sched_init_smp(void)
  {
         cpumask_var_t non_isolated_cpus;
@@ -7398,6 +7457,9 @@ void __init sched_init_smp(void)
  
         init_sched_rt_class();
         init_sched_dl_class();
+
+       sched_init_smt();
+
         sched_smp_initialized = true;
  }
  
@@ -7435,6 +7497,7 @@ static struct kmem_cache *task_group_cache __read_mostly;
  #endif
  
  DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
+DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
  
  void __init sched_init(void)
  {
@@ -7471,6 +7534,8 @@ void __init sched_init(void)
         for_each_possible_cpu(i) {
                 per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
                         cpumask_size(), GFP_KERNEL, cpu_to_node(i));
+               per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
+                       cpumask_size(), GFP_KERNEL, cpu_to_node(i));
         }
  #endif /* CONFIG_CPUMASK_OFFSTACK */
  
@@ -7753,7 +7818,7 @@ struct task_struct *curr_task(int cpu)
   *
   * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
   */
-void set_curr_task(int cpu, struct task_struct *p)
+void ia64_set_curr_task(int cpu, struct task_struct *p)
  {
         cpu_curr(cpu) = p;
  }
@@ -7884,10 +7949,10 @@ void sched_move_task(struct task_struct *tsk)
  
         sched_change_group(tsk, TASK_MOVE_GROUP);
  
-       if (unlikely(running))
-               tsk->sched_class->set_curr_task(rq);
         if (queued)
                 enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
+       if (unlikely(running))
+               set_curr_task(rq, tsk);
  
         task_rq_unlock(rq, tsk, &rf);
  }