drivers/base/arch_topology.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Arch specific cpu topology information
   4  *
   5  * Copyright (C) 2016, ARM Ltd.
   6  * Written by: Juri Lelli, ARM Ltd.
   7  */
   8
   9 #include <linux/acpi.h>
  10 #include <linux/cpu.h>
  11 #include <linux/cpufreq.h>
  12 #include <linux/device.h>
  13 #include <linux/of.h>
  14 #include <linux/slab.h>
  15 #include <linux/string.h>
  16 #include <linux/sched/topology.h>
  17 #include <linux/cpuset.h>
  18 #include <linux/cpumask.h>
  19 #include <linux/init.h>
  20 #include <linux/percpu.h>
  21 #include <linux/sched.h>
  22 #include <linux/smp.h>
  23
  24 __weak bool arch_freq_counters_available(struct cpumask *cpus)
  25 {
  26         return false;
  27 }
  28 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
  29
  30 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
  31                          unsigned long max_freq)
  32 {
  33         unsigned long scale;
  34         int i;
  35
  36         /*
  37          * If the use of counters for FIE is enabled, just return as we don't
  38          * want to update the scale factor with information from CPUFREQ.
  39          * Instead the scale factor will be updated from arch_scale_freq_tick.
  40          */
  41         if (arch_freq_counters_available(cpus))
  42                 return;
  43
  44         scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
  45
  46         for_each_cpu(i, cpus)
  47                 per_cpu(freq_scale, i) = scale;
  48 }
  49
  50 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
  51
  52 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
  53 {
  54         per_cpu(cpu_scale, cpu) = capacity;
  55 }
  56
  57 DEFINE_PER_CPU(unsigned long, thermal_pressure);
  58
  59 void topology_set_thermal_pressure(const struct cpumask *cpus,
  60                                unsigned long th_pressure)
  61 {
  62         int cpu;
  63
  64         for_each_cpu(cpu, cpus)
  65                 WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
  66 }
  67
  68 static ssize_t cpu_capacity_show(struct device *dev,
  69                                  struct device_attribute *attr,
  70                                  char *buf)
  71 {
  72         struct cpu *cpu = container_of(dev, struct cpu, dev);
  73
  74         return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
  75 }
  76
  77 static void update_topology_flags_workfn(struct work_struct *work);
  78 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
  79
  80 static DEVICE_ATTR_RO(cpu_capacity);
  81
  82 static int register_cpu_capacity_sysctl(void)
  83 {
  84         int i;
  85         struct device *cpu;
  86
  87         for_each_possible_cpu(i) {
  88                 cpu = get_cpu_device(i);
  89                 if (!cpu) {
  90                         pr_err("%s: too early to get CPU%d device!\n",
  91                                __func__, i);
  92                         continue;
  93                 }
  94                 device_create_file(cpu, &dev_attr_cpu_capacity);
  95         }
  96
  97         return 0;
  98 }
  99 subsys_initcall(register_cpu_capacity_sysctl);
 100
 101 static int update_topology;
 102
 103 int topology_update_cpu_topology(void)
 104 {
 105         return update_topology;
 106 }
 107
 108 /*
 109  * Updating the sched_domains can't be done directly from cpufreq callbacks
 110  * due to locking, so queue the work for later.
 111  */
 112 static void update_topology_flags_workfn(struct work_struct *work)
 113 {
 114         update_topology = 1;
 115         rebuild_sched_domains();
 116         pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
 117         update_topology = 0;
 118 }
 119
 120 static DEFINE_PER_CPU(u32, freq_factor) = 1;
 121 static u32 *raw_capacity;
 122
 123 static int free_raw_capacity(void)
 124 {
 125         kfree(raw_capacity);
 126         raw_capacity = NULL;
 127
 128         return 0;
 129 }
 130
 131 void topology_normalize_cpu_scale(void)
 132 {
 133         u64 capacity;
 134         u64 capacity_scale;
 135         int cpu;
 136
 137         if (!raw_capacity)
 138                 return;
 139
 140         capacity_scale = 1;
 141         for_each_possible_cpu(cpu) {
 142                 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
 143                 capacity_scale = max(capacity, capacity_scale);
 144         }
 145
 146         pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
 147         for_each_possible_cpu(cpu) {
 148                 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
 149                 capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
 150                         capacity_scale);
 151                 topology_set_cpu_scale(cpu, capacity);
 152                 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
 153                         cpu, topology_get_cpu_scale(cpu));
 154         }
 155 }
 156
 157 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
 158 {
 159         struct clk *cpu_clk;
 160         static bool cap_parsing_failed;
 161         int ret;
 162         u32 cpu_capacity;
 163
 164         if (cap_parsing_failed)
 165                 return false;
 166
 167         ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
 168                                    &cpu_capacity);
 169         if (!ret) {
 170                 if (!raw_capacity) {
 171                         raw_capacity = kcalloc(num_possible_cpus(),
 172                                                sizeof(*raw_capacity),
 173                                                GFP_KERNEL);
 174                         if (!raw_capacity) {
 175                                 cap_parsing_failed = true;
 176                                 return false;
 177                         }
 178                 }
 179                 raw_capacity[cpu] = cpu_capacity;
 180                 pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
 181                         cpu_node, raw_capacity[cpu]);
 182
 183                 /*
 184                  * Update freq_factor for calculating early boot cpu capacities.
 185                  * For non-clk CPU DVFS mechanism, there's no way to get the
 186                  * frequency value now, assuming they are running at the same
 187                  * frequency (by keeping the initial freq_factor value).
 188                  */
 189                 cpu_clk = of_clk_get(cpu_node, 0);
 190                 if (!PTR_ERR_OR_ZERO(cpu_clk)) {
 191                         per_cpu(freq_factor, cpu) =
 192                                 clk_get_rate(cpu_clk) / 1000;
 193                         clk_put(cpu_clk);
 194                 }
 195         } else {
 196                 if (raw_capacity) {
 197                         pr_err("cpu_capacity: missing %pOF raw capacity\n",
 198                                 cpu_node);
 199                         pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
 200                 }
 201                 cap_parsing_failed = true;
 202                 free_raw_capacity();
 203         }
 204
 205         return !ret;
 206 }
 207
 208 #ifdef CONFIG_CPU_FREQ
 209 static cpumask_var_t cpus_to_visit;
 210 static void parsing_done_workfn(struct work_struct *work);
 211 static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
 212
 213 static int
 214 init_cpu_capacity_callback(struct notifier_block *nb,
 215                            unsigned long val,
 216                            void *data)
 217 {
 218         struct cpufreq_policy *policy = data;
 219         int cpu;
 220
 221         if (!raw_capacity)
 222                 return 0;
 223
 224         if (val != CPUFREQ_CREATE_POLICY)
 225                 return 0;
 226
 227         pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
 228                  cpumask_pr_args(policy->related_cpus),
 229                  cpumask_pr_args(cpus_to_visit));
 230
 231         cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
 232
 233         for_each_cpu(cpu, policy->related_cpus)
 234                 per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
 235
 236         if (cpumask_empty(cpus_to_visit)) {
 237                 topology_normalize_cpu_scale();
 238                 schedule_work(&update_topology_flags_work);
 239                 free_raw_capacity();
 240                 pr_debug("cpu_capacity: parsing done\n");
 241                 schedule_work(&parsing_done_work);
 242         }
 243
 244         return 0;
 245 }
 246
 247 static struct notifier_block init_cpu_capacity_notifier = {
 248         .notifier_call = init_cpu_capacity_callback,
 249 };
 250
 251 static int __init register_cpufreq_notifier(void)
 252 {
 253         int ret;
 254
 255         /*
 256          * on ACPI-based systems we need to use the default cpu capacity
 257          * until we have the necessary code to parse the cpu capacity, so
 258          * skip registering cpufreq notifier.
 259          */
 260         if (!acpi_disabled || !raw_capacity)
 261                 return -EINVAL;
 262
 263         if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
 264                 return -ENOMEM;
 265
 266         cpumask_copy(cpus_to_visit, cpu_possible_mask);
 267
 268         ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
 269                                         CPUFREQ_POLICY_NOTIFIER);
 270
 271         if (ret)
 272                 free_cpumask_var(cpus_to_visit);
 273
 274         return ret;
 275 }
 276 core_initcall(register_cpufreq_notifier);
 277
 278 static void parsing_done_workfn(struct work_struct *work)
 279 {
 280         cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
 281                                          CPUFREQ_POLICY_NOTIFIER);
 282         free_cpumask_var(cpus_to_visit);
 283 }
 284
 285 #else
 286 core_initcall(free_raw_capacity);
 287 #endif
 288
 289 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 290 /*
 291  * This function returns the logic cpu number of the node.
 292  * There are basically three kinds of return values:
 293  * (1) logic cpu number which is > 0.
 294  * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
 295  * there is no possible logical CPU in the kernel to match. This happens
 296  * when CONFIG_NR_CPUS is configure to be smaller than the number of
 297  * CPU nodes in DT. We need to just ignore this case.
 298  * (3) -1 if the node does not exist in the device tree
 299  */
 300 static int __init get_cpu_for_node(struct device_node *node)
 301 {
 302         struct device_node *cpu_node;
 303         int cpu;
 304
 305         cpu_node = of_parse_phandle(node, "cpu", 0);
 306         if (!cpu_node)
 307                 return -1;
 308
 309         cpu = of_cpu_node_to_id(cpu_node);
 310         if (cpu >= 0)
 311                 topology_parse_cpu_capacity(cpu_node, cpu);
 312         else
 313                 pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
 314                         cpu_node, cpumask_pr_args(cpu_possible_mask));
 315
 316         of_node_put(cpu_node);
 317         return cpu;
 318 }
 319
 320 static int __init parse_core(struct device_node *core, int package_id,
 321                              int core_id)
 322 {
 323         char name[20];
 324         bool leaf = true;
 325         int i = 0;
 326         int cpu;
 327         struct device_node *t;
 328
 329         do {
 330                 snprintf(name, sizeof(name), "thread%d", i);
 331                 t = of_get_child_by_name(core, name);
 332                 if (t) {
 333                         leaf = false;
 334                         cpu = get_cpu_for_node(t);
 335                         if (cpu >= 0) {
 336                                 cpu_topology[cpu].package_id = package_id;
 337                                 cpu_topology[cpu].core_id = core_id;
 338                                 cpu_topology[cpu].thread_id = i;
 339                         } else if (cpu != -ENODEV) {
 340                                 pr_err("%pOF: Can't get CPU for thread\n", t);
 341                                 of_node_put(t);
 342                                 return -EINVAL;
 343                         }
 344                         of_node_put(t);
 345                 }
 346                 i++;
 347         } while (t);
 348
 349         cpu = get_cpu_for_node(core);
 350         if (cpu >= 0) {
 351                 if (!leaf) {
 352                         pr_err("%pOF: Core has both threads and CPU\n",
 353                                core);
 354                         return -EINVAL;
 355                 }
 356
 357                 cpu_topology[cpu].package_id = package_id;
 358                 cpu_topology[cpu].core_id = core_id;
 359         } else if (leaf && cpu != -ENODEV) {
 360                 pr_err("%pOF: Can't get CPU for leaf core\n", core);
 361                 return -EINVAL;
 362         }
 363
 364         return 0;
 365 }
 366
 367 static int __init parse_cluster(struct device_node *cluster, int depth)
 368 {
 369         char name[20];
 370         bool leaf = true;
 371         bool has_cores = false;
 372         struct device_node *c;
 373         static int package_id __initdata;
 374         int core_id = 0;
 375         int i, ret;
 376
 377         /*
 378          * First check for child clusters; we currently ignore any
 379          * information about the nesting of clusters and present the
 380          * scheduler with a flat list of them.
 381          */
 382         i = 0;
 383         do {
 384                 snprintf(name, sizeof(name), "cluster%d", i);
 385                 c = of_get_child_by_name(cluster, name);
 386                 if (c) {
 387                         leaf = false;
 388                         ret = parse_cluster(c, depth + 1);
 389                         of_node_put(c);
 390                         if (ret != 0)
 391                                 return ret;
 392                 }
 393                 i++;
 394         } while (c);
 395
 396         /* Now check for cores */
 397         i = 0;
 398         do {
 399                 snprintf(name, sizeof(name), "core%d", i);
 400                 c = of_get_child_by_name(cluster, name);
 401                 if (c) {
 402                         has_cores = true;
 403
 404                         if (depth == 0) {
 405                                 pr_err("%pOF: cpu-map children should be clusters\n",
 406                                        c);
 407                                 of_node_put(c);
 408                                 return -EINVAL;
 409                         }
 410
 411                         if (leaf) {
 412                                 ret = parse_core(c, package_id, core_id++);
 413                         } else {
 414                                 pr_err("%pOF: Non-leaf cluster with core %s\n",
 415                                        cluster, name);
 416                                 ret = -EINVAL;
 417                         }
 418
 419                         of_node_put(c);
 420                         if (ret != 0)
 421                                 return ret;
 422                 }
 423                 i++;
 424         } while (c);
 425
 426         if (leaf && !has_cores)
 427                 pr_warn("%pOF: empty cluster\n", cluster);
 428
 429         if (leaf)
 430                 package_id++;
 431
 432         return 0;
 433 }
 434
 435 static int __init parse_dt_topology(void)
 436 {
 437         struct device_node *cn, *map;
 438         int ret = 0;
 439         int cpu;
 440
 441         cn = of_find_node_by_path("/cpus");
 442         if (!cn) {
 443                 pr_err("No CPU information found in DT\n");
 444                 return 0;
 445         }
 446
 447         /*
 448          * When topology is provided cpu-map is essentially a root
 449          * cluster with restricted subnodes.
 450          */
 451         map = of_get_child_by_name(cn, "cpu-map");
 452         if (!map)
 453                 goto out;
 454
 455         ret = parse_cluster(map, 0);
 456         if (ret != 0)
 457                 goto out_map;
 458
 459         topology_normalize_cpu_scale();
 460
 461         /*
 462          * Check that all cores are in the topology; the SMP code will
 463          * only mark cores described in the DT as possible.
 464          */
 465         for_each_possible_cpu(cpu)
 466                 if (cpu_topology[cpu].package_id == -1)
 467                         ret = -EINVAL;
 468
 469 out_map:
 470         of_node_put(map);
 471 out:
 472         of_node_put(cn);
 473         return ret;
 474 }
 475 #endif
 476
 477 /*
 478  * cpu topology table
 479  */
 480 struct cpu_topology cpu_topology[NR_CPUS];
 481 EXPORT_SYMBOL_GPL(cpu_topology);
 482
 483 const struct cpumask *cpu_coregroup_mask(int cpu)
 484 {
 485         const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
 486
 487         /* Find the smaller of NUMA, core or LLC siblings */
 488         if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
 489                 /* not numa in package, lets use the package siblings */
 490                 core_mask = &cpu_topology[cpu].core_sibling;
 491         }
 492         if (cpu_topology[cpu].llc_id != -1) {
 493                 if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
 494                         core_mask = &cpu_topology[cpu].llc_sibling;
 495         }
 496
 497         return core_mask;
 498 }
 499
 500 void update_siblings_masks(unsigned int cpuid)
 501 {
 502         struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
 503         int cpu;
 504
 505         /* update core and thread sibling masks */
 506         for_each_online_cpu(cpu) {
 507                 cpu_topo = &cpu_topology[cpu];
 508
 509                 if (cpuid_topo->llc_id == cpu_topo->llc_id) {
 510                         cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
 511                         cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
 512                 }
 513
 514                 if (cpuid_topo->package_id != cpu_topo->package_id)
 515                         continue;
 516
 517                 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
 518                 cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
 519
 520                 if (cpuid_topo->core_id != cpu_topo->core_id)
 521                         continue;
 522
 523                 cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
 524                 cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
 525         }
 526 }
 527
 528 static void clear_cpu_topology(int cpu)
 529 {
 530         struct cpu_topology *cpu_topo = &cpu_topology[cpu];
 531
 532         cpumask_clear(&cpu_topo->llc_sibling);
 533         cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
 534
 535         cpumask_clear(&cpu_topo->core_sibling);
 536         cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
 537         cpumask_clear(&cpu_topo->thread_sibling);
 538         cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
 539 }
 540
 541 void __init reset_cpu_topology(void)
 542 {
 543         unsigned int cpu;
 544
 545         for_each_possible_cpu(cpu) {
 546                 struct cpu_topology *cpu_topo = &cpu_topology[cpu];
 547
 548                 cpu_topo->thread_id = -1;
 549                 cpu_topo->core_id = -1;
 550                 cpu_topo->package_id = -1;
 551                 cpu_topo->llc_id = -1;
 552
 553                 clear_cpu_topology(cpu);
 554         }
 555 }
 556
 557 void remove_cpu_topology(unsigned int cpu)
 558 {
 559         int sibling;
 560
 561         for_each_cpu(sibling, topology_core_cpumask(cpu))
 562                 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
 563         for_each_cpu(sibling, topology_sibling_cpumask(cpu))
 564                 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
 565         for_each_cpu(sibling, topology_llc_cpumask(cpu))
 566                 cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
 567
 568         clear_cpu_topology(cpu);
 569 }
 570
 571 __weak int __init parse_acpi_topology(void)
 572 {
 573         return 0;
 574 }
 575
 576 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 577 void __init init_cpu_topology(void)
 578 {
 579         reset_cpu_topology();
 580
 581         /*
 582          * Discard anything that was parsed if we hit an error so we
 583          * don't use partial information.
 584          */
 585         if (parse_acpi_topology())
 586                 reset_cpu_topology();
 587         else if (of_have_populated_dt() && parse_dt_topology())
 588                 reset_cpu_topology();
 589 }
 590 #endif