sched/fair: Introduce set_curr_task() helper
[linux-2.6-microblaze.git] / kernel / sched / core.c
index d7babcc..ce69fc7 100644 (file)
@@ -1109,10 +1109,10 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 
        p->sched_class->set_cpus_allowed(p, new_mask);
 
-       if (running)
-               p->sched_class->set_curr_task(rq);
        if (queued)
                enqueue_task(rq, p, ENQUEUE_RESTORE);
+       if (running)
+               set_curr_task(rq, p);
 }
 
 /*
@@ -3707,10 +3707,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
        p->prio = prio;
 
-       if (running)
-               p->sched_class->set_curr_task(rq);
        if (queued)
                enqueue_task(rq, p, queue_flag);
+       if (running)
+               set_curr_task(rq, p);
 
        check_class_changed(rq, p, prev_class, oldprio);
 out_unlock:
@@ -4263,8 +4263,6 @@ change:
        prev_class = p->sched_class;
        __setscheduler(rq, p, attr, pi);
 
-       if (running)
-               p->sched_class->set_curr_task(rq);
        if (queued) {
                /*
                 * We enqueue to tail when the priority of a task is
@@ -4275,6 +4273,8 @@ change:
 
                enqueue_task(rq, p, queue_flags);
        }
+       if (running)
+               set_curr_task(rq, p);
 
        check_class_changed(rq, p, prev_class, oldprio);
        preempt_disable(); /* avoid rq from going away on us */
@@ -5439,10 +5439,10 @@ void sched_setnuma(struct task_struct *p, int nid)
 
        p->numa_preferred_nid = nid;
 
-       if (running)
-               p->sched_class->set_curr_task(rq);
        if (queued)
                enqueue_task(rq, p, ENQUEUE_RESTORE);
+       if (running)
+               set_curr_task(rq, p);
        task_rq_unlock(rq, p, &rf);
 }
 #endif /* CONFIG_NUMA_BALANCING */
@@ -5739,6 +5739,8 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
        }
 }
 #else /* !CONFIG_SCHED_DEBUG */
+
+# define sched_debug_enabled 0
 # define sched_domain_debug(sd, cpu) do { } while (0)
 static inline bool sched_debug(void)
 {
@@ -5933,10 +5935,8 @@ static void free_sched_groups(struct sched_group *sg, int free_sgc)
        } while (sg != first);
 }
 
-static void free_sched_domain(struct rcu_head *rcu)
+static void destroy_sched_domain(struct sched_domain *sd)
 {
-       struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
-
        /*
         * If its an overlapping domain it has private groups, iterate and
         * nuke them all.
@@ -5947,18 +5947,26 @@ static void free_sched_domain(struct rcu_head *rcu)
                kfree(sd->groups->sgc);
                kfree(sd->groups);
        }
+       if (sd->shared && atomic_dec_and_test(&sd->shared->ref))
+               kfree(sd->shared);
        kfree(sd);
 }
 
-static void destroy_sched_domain(struct sched_domain *sd, int cpu)
+static void destroy_sched_domains_rcu(struct rcu_head *rcu)
 {
-       call_rcu(&sd->rcu, free_sched_domain);
+       struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
+
+       while (sd) {
+               struct sched_domain *parent = sd->parent;
+               destroy_sched_domain(sd);
+               sd = parent;
+       }
 }
 
-static void destroy_sched_domains(struct sched_domain *sd, int cpu)
+static void destroy_sched_domains(struct sched_domain *sd)
 {
-       for (; sd; sd = sd->parent)
-               destroy_sched_domain(sd, cpu);
+       if (sd)
+               call_rcu(&sd->rcu, destroy_sched_domains_rcu);
 }
 
 /*
@@ -5973,14 +5981,14 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
 DEFINE_PER_CPU(struct sched_domain *, sd_llc);
 DEFINE_PER_CPU(int, sd_llc_size);
 DEFINE_PER_CPU(int, sd_llc_id);
+DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
 DEFINE_PER_CPU(struct sched_domain *, sd_numa);
-DEFINE_PER_CPU(struct sched_domain *, sd_busy);
 DEFINE_PER_CPU(struct sched_domain *, sd_asym);
 
 static void update_top_cache_domain(int cpu)
 {
+       struct sched_domain_shared *sds = NULL;
        struct sched_domain *sd;
-       struct sched_domain *busy_sd = NULL;
        int id = cpu;
        int size = 1;
 
@@ -5988,13 +5996,13 @@ static void update_top_cache_domain(int cpu)
        if (sd) {
                id = cpumask_first(sched_domain_span(sd));
                size = cpumask_weight(sched_domain_span(sd));
-               busy_sd = sd->parent; /* sd_busy */
+               sds = sd->shared;
        }
-       rcu_assign_pointer(per_cpu(sd_busy, cpu), busy_sd);
 
        rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
        per_cpu(sd_llc_size, cpu) = size;
        per_cpu(sd_llc_id, cpu) = id;
+       rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
 
        sd = lowest_flag_domain(cpu, SD_NUMA);
        rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
@@ -6030,7 +6038,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
                         */
                        if (parent->flags & SD_PREFER_SIBLING)
                                tmp->flags |= SD_PREFER_SIBLING;
-                       destroy_sched_domain(parent, cpu);
+                       destroy_sched_domain(parent);
                } else
                        tmp = tmp->parent;
        }
@@ -6038,7 +6046,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
        if (sd && sd_degenerate(sd)) {
                tmp = sd;
                sd = sd->parent;
-               destroy_sched_domain(tmp, cpu);
+               destroy_sched_domain(tmp);
                if (sd)
                        sd->child = NULL;
        }
@@ -6048,7 +6056,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
        rq_attach_root(rq, rd);
        tmp = rq->sd;
        rcu_assign_pointer(rq->sd, sd);
-       destroy_sched_domains(tmp, cpu);
+       destroy_sched_domains(tmp);
 
        update_top_cache_domain(cpu);
 }
@@ -6291,7 +6299,6 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
                return;
 
        update_group_capacity(sd, cpu);
-       atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
 }
 
 /*
@@ -6379,6 +6386,9 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
        WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
        *per_cpu_ptr(sdd->sd, cpu) = NULL;
 
+       if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))
+               *per_cpu_ptr(sdd->sds, cpu) = NULL;
+
        if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
                *per_cpu_ptr(sdd->sg, cpu) = NULL;
 
@@ -6423,10 +6433,12 @@ static int sched_domains_curr_level;
 
 static struct sched_domain *
 sd_init(struct sched_domain_topology_level *tl,
+       const struct cpumask *cpu_map,
        struct sched_domain *child, int cpu)
 {
-       struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
-       int sd_weight, sd_flags = 0;
+       struct sd_data *sdd = &tl->data;
+       struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
+       int sd_id, sd_weight, sd_flags = 0;
 
 #ifdef CONFIG_NUMA
        /*
@@ -6481,6 +6493,9 @@ sd_init(struct sched_domain_topology_level *tl,
 #endif
        };
 
+       cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
+       sd_id = cpumask_first(sched_domain_span(sd));
+
        /*
         * Convert topological properties into behaviour.
         */
@@ -6523,7 +6538,17 @@ sd_init(struct sched_domain_topology_level *tl,
                sd->idle_idx = 1;
        }
 
-       sd->private = &tl->data;
+       /*
+        * For all levels sharing cache; connect a sched_domain_shared
+        * instance.
+        */
+       if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+               sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
+               atomic_inc(&sd->shared->ref);
+               atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
+       }
+
+       sd->private = sdd;
 
        return sd;
 }
@@ -6550,6 +6575,9 @@ static struct sched_domain_topology_level *sched_domain_topology =
 
 void set_sched_topology(struct sched_domain_topology_level *tl)
 {
+       if (WARN_ON_ONCE(sched_smp_initialized))
+               return;
+
        sched_domain_topology = tl;
 }
 
@@ -6830,6 +6858,10 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
                if (!sdd->sd)
                        return -ENOMEM;
 
+               sdd->sds = alloc_percpu(struct sched_domain_shared *);
+               if (!sdd->sds)
+                       return -ENOMEM;
+
                sdd->sg = alloc_percpu(struct sched_group *);
                if (!sdd->sg)
                        return -ENOMEM;
@@ -6840,6 +6872,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
                for_each_cpu(j, cpu_map) {
                        struct sched_domain *sd;
+                       struct sched_domain_shared *sds;
                        struct sched_group *sg;
                        struct sched_group_capacity *sgc;
 
@@ -6850,6 +6883,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
                        *per_cpu_ptr(sdd->sd, j) = sd;
 
+                       sds = kzalloc_node(sizeof(struct sched_domain_shared),
+                                       GFP_KERNEL, cpu_to_node(j));
+                       if (!sds)
+                               return -ENOMEM;
+
+                       *per_cpu_ptr(sdd->sds, j) = sds;
+
                        sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
                                        GFP_KERNEL, cpu_to_node(j));
                        if (!sg)
@@ -6889,6 +6929,8 @@ static void __sdt_free(const struct cpumask *cpu_map)
                                kfree(*per_cpu_ptr(sdd->sd, j));
                        }
 
+                       if (sdd->sds)
+                               kfree(*per_cpu_ptr(sdd->sds, j));
                        if (sdd->sg)
                                kfree(*per_cpu_ptr(sdd->sg, j));
                        if (sdd->sgc)
@@ -6896,6 +6938,8 @@ static void __sdt_free(const struct cpumask *cpu_map)
                }
                free_percpu(sdd->sd);
                sdd->sd = NULL;
+               free_percpu(sdd->sds);
+               sdd->sds = NULL;
                free_percpu(sdd->sg);
                sdd->sg = NULL;
                free_percpu(sdd->sgc);
@@ -6907,9 +6951,8 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
                const struct cpumask *cpu_map, struct sched_domain_attr *attr,
                struct sched_domain *child, int cpu)
 {
-       struct sched_domain *sd = sd_init(tl, child, cpu);
+       struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu);
 
-       cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
        if (child) {
                sd->level = child->level + 1;
                sched_domain_level_max = max(sched_domain_level_max, sd->level);
@@ -7006,7 +7049,7 @@ static int build_sched_domains(const struct cpumask *cpu_map,
        }
        rcu_read_unlock();
 
-       if (rq) {
+       if (rq && sched_debug_enabled) {
                pr_info("span: %*pbl (max cpu_capacity = %lu)\n",
                        cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
        }
@@ -7369,6 +7412,22 @@ int sched_cpu_dying(unsigned int cpu)
 }
 #endif
 
+#ifdef CONFIG_SCHED_SMT
+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
+
+static void sched_init_smt(void)
+{
+       /*
+        * We've enumerated all CPUs and will assume that if any CPU
+        * has SMT siblings, CPU0 will too.
+        */
+       if (cpumask_weight(cpu_smt_mask(0)) > 1)
+               static_branch_enable(&sched_smt_present);
+}
+#else
+static inline void sched_init_smt(void) { }
+#endif
+
 void __init sched_init_smp(void)
 {
        cpumask_var_t non_isolated_cpus;
@@ -7398,6 +7457,9 @@ void __init sched_init_smp(void)
 
        init_sched_rt_class();
        init_sched_dl_class();
+
+       sched_init_smt();
+
        sched_smp_initialized = true;
 }
 
@@ -7435,6 +7497,7 @@ static struct kmem_cache *task_group_cache __read_mostly;
 #endif
 
 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
+DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
 
 void __init sched_init(void)
 {
@@ -7471,6 +7534,8 @@ void __init sched_init(void)
        for_each_possible_cpu(i) {
                per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
                        cpumask_size(), GFP_KERNEL, cpu_to_node(i));
+               per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
+                       cpumask_size(), GFP_KERNEL, cpu_to_node(i));
        }
 #endif /* CONFIG_CPUMASK_OFFSTACK */
 
@@ -7753,7 +7818,7 @@ struct task_struct *curr_task(int cpu)
  *
  * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
  */
-void set_curr_task(int cpu, struct task_struct *p)
+void ia64_set_curr_task(int cpu, struct task_struct *p)
 {
        cpu_curr(cpu) = p;
 }
@@ -7884,10 +7949,10 @@ void sched_move_task(struct task_struct *tsk)
 
        sched_change_group(tsk, TASK_MOVE_GROUP);
 
-       if (unlikely(running))
-               tsk->sched_class->set_curr_task(rq);
        if (queued)
                enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
+       if (unlikely(running))
+               set_curr_task(rq, tsk);
 
        task_rq_unlock(rq, tsk, &rf);
 }