Merge tag 'char-misc-5.13-rc1-round2' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / kernel / sched / topology.c
index 09d3504..55a0a24 100644 (file)
@@ -14,15 +14,15 @@ static cpumask_var_t sched_domains_tmpmask2;
 
 static int __init sched_debug_setup(char *str)
 {
-       sched_debug_enabled = true;
+       sched_debug_verbose = true;
 
        return 0;
 }
-early_param("sched_debug", sched_debug_setup);
+early_param("sched_verbose", sched_debug_setup);
 
 static inline bool sched_debug(void)
 {
-       return sched_debug_enabled;
+       return sched_debug_verbose;
 }
 
 #define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = #_name },
@@ -131,7 +131,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 {
        int level = 0;
 
-       if (!sched_debug_enabled)
+       if (!sched_debug_verbose)
                return;
 
        if (!sd) {
@@ -152,7 +152,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 }
 #else /* !CONFIG_SCHED_DEBUG */
 
-# define sched_debug_enabled 0
+# define sched_debug_verbose 0
 # define sched_domain_debug(sd, cpu) do { } while (0)
 static inline bool sched_debug(void)
 {
@@ -723,35 +723,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
        for (tmp = sd; tmp; tmp = tmp->parent)
                numa_distance += !!(tmp->flags & SD_NUMA);
 
-       /*
-        * FIXME: Diameter >=3 is misrepresented.
-        *
-        * Smallest diameter=3 topology is:
-        *
-        *   node   0   1   2   3
-        *     0:  10  20  30  40
-        *     1:  20  10  20  30
-        *     2:  30  20  10  20
-        *     3:  40  30  20  10
-        *
-        *   0 --- 1 --- 2 --- 3
-        *
-        * NUMA-3       0-3             N/A             N/A             0-3
-        *  groups:     {0-2},{1-3}                                     {1-3},{0-2}
-        *
-        * NUMA-2       0-2             0-3             0-3             1-3
-        *  groups:     {0-1},{1-3}     {0-2},{2-3}     {1-3},{0-1}     {2-3},{0-2}
-        *
-        * NUMA-1       0-1             0-2             1-3             2-3
-        *  groups:     {0},{1}         {1},{2},{0}     {2},{3},{1}     {3},{2}
-        *
-        * NUMA-0       0               1               2               3
-        *
-        * The NUMA-2 groups for nodes 0 and 3 are obviously buggered, as the
-        * group span isn't a subset of the domain span.
-        */
-       WARN_ONCE(numa_distance > 2, "Shortest NUMA path spans too many nodes\n");
-
        sched_domain_debug(sd, cpu);
 
        rq_attach_root(rq, rd);
@@ -963,7 +934,7 @@ static void init_overlap_sched_group(struct sched_domain *sd,
        int cpu;
 
        build_balance_mask(sd, sg, mask);
-       cpu = cpumask_first_and(sched_group_span(sg), mask);
+       cpu = cpumask_first(mask);
 
        sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
        if (atomic_inc_return(&sg->sgc->ref) == 1)
@@ -982,6 +953,31 @@ static void init_overlap_sched_group(struct sched_domain *sd,
        sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
 }
 
+static struct sched_domain *
+find_descended_sibling(struct sched_domain *sd, struct sched_domain *sibling)
+{
+       /*
+        * The proper descendant would be the one whose child won't span out
+        * of sd
+        */
+       while (sibling->child &&
+              !cpumask_subset(sched_domain_span(sibling->child),
+                              sched_domain_span(sd)))
+               sibling = sibling->child;
+
+       /*
+        * As we are referencing sgc across different topology level, we need
+        * to go down to skip those sched_domains which don't contribute to
+        * scheduling because they will be degenerated in cpu_attach_domain
+        */
+       while (sibling->child &&
+              cpumask_equal(sched_domain_span(sibling->child),
+                            sched_domain_span(sibling)))
+               sibling = sibling->child;
+
+       return sibling;
+}
+
 static int
 build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 {
@@ -1015,6 +1011,41 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
                if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
                        continue;
 
+               /*
+                * Usually we build sched_group by sibling's child sched_domain
+                * But for machines whose NUMA diameter are 3 or above, we move
+                * to build sched_group by sibling's proper descendant's child
+                * domain because sibling's child sched_domain will span out of
+                * the sched_domain being built as below.
+                *
+                * Smallest diameter=3 topology is:
+                *
+                *   node   0   1   2   3
+                *     0:  10  20  30  40
+                *     1:  20  10  20  30
+                *     2:  30  20  10  20
+                *     3:  40  30  20  10
+                *
+                *   0 --- 1 --- 2 --- 3
+                *
+                * NUMA-3       0-3             N/A             N/A             0-3
+                *  groups:     {0-2},{1-3}                                     {1-3},{0-2}
+                *
+                * NUMA-2       0-2             0-3             0-3             1-3
+                *  groups:     {0-1},{1-3}     {0-2},{2-3}     {1-3},{0-1}     {2-3},{0-2}
+                *
+                * NUMA-1       0-1             0-2             1-3             2-3
+                *  groups:     {0},{1}         {1},{2},{0}     {2},{3},{1}     {3},{2}
+                *
+                * NUMA-0       0               1               2               3
+                *
+                * The NUMA-2 groups for nodes 0 and 3 are obviously buggered, as the
+                * group span isn't a subset of the domain span.
+                */
+               if (sibling->child &&
+                   !cpumask_subset(sched_domain_span(sibling->child), span))
+                       sibling = find_descended_sibling(sd, sibling);
+
                sg = build_group_from_child_sched_domain(sibling, cpu);
                if (!sg)
                        goto fail;
@@ -1022,7 +1053,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
                sg_span = sched_group_span(sg);
                cpumask_or(covered, covered, sg_span);
 
-               init_overlap_sched_group(sd, sg);
+               init_overlap_sched_group(sibling, sg);
 
                if (!first)
                        first = sg;
@@ -2110,7 +2141,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
        if (has_asym)
                static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
 
-       if (rq && sched_debug_enabled) {
+       if (rq && sched_debug_verbose) {
                pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
                        cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
        }
@@ -2128,7 +2159,7 @@ static cpumask_var_t                      *doms_cur;
 /* Number of sched domains in 'doms_cur': */
 static int                             ndoms_cur;
 
-/* Attribues of custom domains in 'doms_cur' */
+/* Attributes of custom domains in 'doms_cur' */
 static struct sched_domain_attr                *dattr_cur;
 
 /*
@@ -2192,7 +2223,6 @@ int sched_init_domains(const struct cpumask *cpu_map)
                doms_cur = &fallback_doms;
        cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
        err = build_sched_domains(doms_cur[0], NULL);
-       register_sched_domain_sysctl();
 
        return err;
 }
@@ -2267,9 +2297,6 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
 
        lockdep_assert_held(&sched_domains_mutex);
 
-       /* Always unregister in case we don't destroy any domains: */
-       unregister_sched_domain_sysctl();
-
        /* Let the architecture update CPU core mappings: */
        new_topology = arch_update_cpu_topology();
 
@@ -2358,7 +2385,7 @@ match3:
        dattr_cur = dattr_new;
        ndoms_cur = ndoms_new;
 
-       register_sched_domain_sysctl();
+       update_sched_domain_debugfs();
 }
 
 /*