drivers/base/arch_topology.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Arch specific cpu topology information
   4  *
   5  * Copyright (C) 2016, ARM Ltd.
   6  * Written by: Juri Lelli, ARM Ltd.
   7  */
   8
   9 #include <linux/acpi.h>
  10 #include <linux/cpu.h>
  11 #include <linux/cpufreq.h>
  12 #include <linux/device.h>
  13 #include <linux/of.h>
  14 #include <linux/slab.h>
  15 #include <linux/string.h>
  16 #include <linux/sched/topology.h>
  17 #include <linux/cpuset.h>
  18 #include <linux/cpumask.h>
  19 #include <linux/init.h>
  20 #include <linux/percpu.h>
  21 #include <linux/sched.h>
  22 #include <linux/smp.h>
  23
  24 __weak bool arch_freq_counters_available(struct cpumask *cpus)
  25 {
  26         return false;
  27 }
  28 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
  29
  30 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
  31                          unsigned long max_freq)
  32 {
  33         unsigned long scale;
  34         int i;
  35
  36         /*
  37          * If the use of counters for FIE is enabled, just return as we don't
  38          * want to update the scale factor with information from CPUFREQ.
  39          * Instead the scale factor will be updated from arch_scale_freq_tick.
  40          */
  41         if (arch_freq_counters_available(cpus))
  42                 return;
  43
  44         scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
  45
  46         for_each_cpu(i, cpus)
  47                 per_cpu(freq_scale, i) = scale;
  48 }
  49
  50 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
  51
  52 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
  53 {
  54         per_cpu(cpu_scale, cpu) = capacity;
  55 }
  56
  57 static ssize_t cpu_capacity_show(struct device *dev,
  58                                  struct device_attribute *attr,
  59                                  char *buf)
  60 {
  61         struct cpu *cpu = container_of(dev, struct cpu, dev);
  62
  63         return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
  64 }
  65
  66 static void update_topology_flags_workfn(struct work_struct *work);
  67 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
  68
  69 static DEVICE_ATTR_RO(cpu_capacity);
  70
  71 static int register_cpu_capacity_sysctl(void)
  72 {
  73         int i;
  74         struct device *cpu;
  75
  76         for_each_possible_cpu(i) {
  77                 cpu = get_cpu_device(i);
  78                 if (!cpu) {
  79                         pr_err("%s: too early to get CPU%d device!\n",
  80                                __func__, i);
  81                         continue;
  82                 }
  83                 device_create_file(cpu, &dev_attr_cpu_capacity);
  84         }
  85
  86         return 0;
  87 }
  88 subsys_initcall(register_cpu_capacity_sysctl);
  89
  90 static int update_topology;
  91
  92 int topology_update_cpu_topology(void)
  93 {
  94         return update_topology;
  95 }
  96
  97 /*
  98  * Updating the sched_domains can't be done directly from cpufreq callbacks
  99  * due to locking, so queue the work for later.
 100  */
 101 static void update_topology_flags_workfn(struct work_struct *work)
 102 {
 103         update_topology = 1;
 104         rebuild_sched_domains();
 105         pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
 106         update_topology = 0;
 107 }
 108
 109 static DEFINE_PER_CPU(u32, freq_factor) = 1;
 110 static u32 *raw_capacity;
 111
 112 static int free_raw_capacity(void)
 113 {
 114         kfree(raw_capacity);
 115         raw_capacity = NULL;
 116
 117         return 0;
 118 }
 119
 120 void topology_normalize_cpu_scale(void)
 121 {
 122         u64 capacity;
 123         u64 capacity_scale;
 124         int cpu;
 125
 126         if (!raw_capacity)
 127                 return;
 128
 129         capacity_scale = 1;
 130         for_each_possible_cpu(cpu) {
 131                 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
 132                 capacity_scale = max(capacity, capacity_scale);
 133         }
 134
 135         pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
 136         for_each_possible_cpu(cpu) {
 137                 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
 138                 capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
 139                         capacity_scale);
 140                 topology_set_cpu_scale(cpu, capacity);
 141                 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
 142                         cpu, topology_get_cpu_scale(cpu));
 143         }
 144 }
 145
 146 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
 147 {
 148         struct clk *cpu_clk;
 149         static bool cap_parsing_failed;
 150         int ret;
 151         u32 cpu_capacity;
 152
 153         if (cap_parsing_failed)
 154                 return false;
 155
 156         ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
 157                                    &cpu_capacity);
 158         if (!ret) {
 159                 if (!raw_capacity) {
 160                         raw_capacity = kcalloc(num_possible_cpus(),
 161                                                sizeof(*raw_capacity),
 162                                                GFP_KERNEL);
 163                         if (!raw_capacity) {
 164                                 cap_parsing_failed = true;
 165                                 return false;
 166                         }
 167                 }
 168                 raw_capacity[cpu] = cpu_capacity;
 169                 pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
 170                         cpu_node, raw_capacity[cpu]);
 171
 172                 /*
 173                  * Update freq_factor for calculating early boot cpu capacities.
 174                  * For non-clk CPU DVFS mechanism, there's no way to get the
 175                  * frequency value now, assuming they are running at the same
 176                  * frequency (by keeping the initial freq_factor value).
 177                  */
 178                 cpu_clk = of_clk_get(cpu_node, 0);
 179                 if (!PTR_ERR_OR_ZERO(cpu_clk)) {
 180                         per_cpu(freq_factor, cpu) =
 181                                 clk_get_rate(cpu_clk) / 1000;
 182                         clk_put(cpu_clk);
 183                 }
 184         } else {
 185                 if (raw_capacity) {
 186                         pr_err("cpu_capacity: missing %pOF raw capacity\n",
 187                                 cpu_node);
 188                         pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
 189                 }
 190                 cap_parsing_failed = true;
 191                 free_raw_capacity();
 192         }
 193
 194         return !ret;
 195 }
 196
 197 #ifdef CONFIG_CPU_FREQ
 198 static cpumask_var_t cpus_to_visit;
 199 static void parsing_done_workfn(struct work_struct *work);
 200 static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
 201
 202 static int
 203 init_cpu_capacity_callback(struct notifier_block *nb,
 204                            unsigned long val,
 205                            void *data)
 206 {
 207         struct cpufreq_policy *policy = data;
 208         int cpu;
 209
 210         if (!raw_capacity)
 211                 return 0;
 212
 213         if (val != CPUFREQ_CREATE_POLICY)
 214                 return 0;
 215
 216         pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
 217                  cpumask_pr_args(policy->related_cpus),
 218                  cpumask_pr_args(cpus_to_visit));
 219
 220         cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
 221
 222         for_each_cpu(cpu, policy->related_cpus)
 223                 per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
 224
 225         if (cpumask_empty(cpus_to_visit)) {
 226                 topology_normalize_cpu_scale();
 227                 schedule_work(&update_topology_flags_work);
 228                 free_raw_capacity();
 229                 pr_debug("cpu_capacity: parsing done\n");
 230                 schedule_work(&parsing_done_work);
 231         }
 232
 233         return 0;
 234 }
 235
 236 static struct notifier_block init_cpu_capacity_notifier = {
 237         .notifier_call = init_cpu_capacity_callback,
 238 };
 239
 240 static int __init register_cpufreq_notifier(void)
 241 {
 242         int ret;
 243
 244         /*
 245          * on ACPI-based systems we need to use the default cpu capacity
 246          * until we have the necessary code to parse the cpu capacity, so
 247          * skip registering cpufreq notifier.
 248          */
 249         if (!acpi_disabled || !raw_capacity)
 250                 return -EINVAL;
 251
 252         if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
 253                 return -ENOMEM;
 254
 255         cpumask_copy(cpus_to_visit, cpu_possible_mask);
 256
 257         ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
 258                                         CPUFREQ_POLICY_NOTIFIER);
 259
 260         if (ret)
 261                 free_cpumask_var(cpus_to_visit);
 262
 263         return ret;
 264 }
 265 core_initcall(register_cpufreq_notifier);
 266
 267 static void parsing_done_workfn(struct work_struct *work)
 268 {
 269         cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
 270                                          CPUFREQ_POLICY_NOTIFIER);
 271         free_cpumask_var(cpus_to_visit);
 272 }
 273
 274 #else
 275 core_initcall(free_raw_capacity);
 276 #endif
 277
 278 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 279 /*
 280  * This function returns the logic cpu number of the node.
 281  * There are basically three kinds of return values:
 282  * (1) logic cpu number which is > 0.
 283  * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
 284  * there is no possible logical CPU in the kernel to match. This happens
 285  * when CONFIG_NR_CPUS is configure to be smaller than the number of
 286  * CPU nodes in DT. We need to just ignore this case.
 287  * (3) -1 if the node does not exist in the device tree
 288  */
 289 static int __init get_cpu_for_node(struct device_node *node)
 290 {
 291         struct device_node *cpu_node;
 292         int cpu;
 293
 294         cpu_node = of_parse_phandle(node, "cpu", 0);
 295         if (!cpu_node)
 296                 return -1;
 297
 298         cpu = of_cpu_node_to_id(cpu_node);
 299         if (cpu >= 0)
 300                 topology_parse_cpu_capacity(cpu_node, cpu);
 301         else
 302                 pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
 303                         cpu_node, cpumask_pr_args(cpu_possible_mask));
 304
 305         of_node_put(cpu_node);
 306         return cpu;
 307 }
 308
 309 static int __init parse_core(struct device_node *core, int package_id,
 310                              int core_id)
 311 {
 312         char name[20];
 313         bool leaf = true;
 314         int i = 0;
 315         int cpu;
 316         struct device_node *t;
 317
 318         do {
 319                 snprintf(name, sizeof(name), "thread%d", i);
 320                 t = of_get_child_by_name(core, name);
 321                 if (t) {
 322                         leaf = false;
 323                         cpu = get_cpu_for_node(t);
 324                         if (cpu >= 0) {
 325                                 cpu_topology[cpu].package_id = package_id;
 326                                 cpu_topology[cpu].core_id = core_id;
 327                                 cpu_topology[cpu].thread_id = i;
 328                         } else if (cpu != -ENODEV) {
 329                                 pr_err("%pOF: Can't get CPU for thread\n", t);
 330                                 of_node_put(t);
 331                                 return -EINVAL;
 332                         }
 333                         of_node_put(t);
 334                 }
 335                 i++;
 336         } while (t);
 337
 338         cpu = get_cpu_for_node(core);
 339         if (cpu >= 0) {
 340                 if (!leaf) {
 341                         pr_err("%pOF: Core has both threads and CPU\n",
 342                                core);
 343                         return -EINVAL;
 344                 }
 345
 346                 cpu_topology[cpu].package_id = package_id;
 347                 cpu_topology[cpu].core_id = core_id;
 348         } else if (leaf && cpu != -ENODEV) {
 349                 pr_err("%pOF: Can't get CPU for leaf core\n", core);
 350                 return -EINVAL;
 351         }
 352
 353         return 0;
 354 }
 355
 356 static int __init parse_cluster(struct device_node *cluster, int depth)
 357 {
 358         char name[20];
 359         bool leaf = true;
 360         bool has_cores = false;
 361         struct device_node *c;
 362         static int package_id __initdata;
 363         int core_id = 0;
 364         int i, ret;
 365
 366         /*
 367          * First check for child clusters; we currently ignore any
 368          * information about the nesting of clusters and present the
 369          * scheduler with a flat list of them.
 370          */
 371         i = 0;
 372         do {
 373                 snprintf(name, sizeof(name), "cluster%d", i);
 374                 c = of_get_child_by_name(cluster, name);
 375                 if (c) {
 376                         leaf = false;
 377                         ret = parse_cluster(c, depth + 1);
 378                         of_node_put(c);
 379                         if (ret != 0)
 380                                 return ret;
 381                 }
 382                 i++;
 383         } while (c);
 384
 385         /* Now check for cores */
 386         i = 0;
 387         do {
 388                 snprintf(name, sizeof(name), "core%d", i);
 389                 c = of_get_child_by_name(cluster, name);
 390                 if (c) {
 391                         has_cores = true;
 392
 393                         if (depth == 0) {
 394                                 pr_err("%pOF: cpu-map children should be clusters\n",
 395                                        c);
 396                                 of_node_put(c);
 397                                 return -EINVAL;
 398                         }
 399
 400                         if (leaf) {
 401                                 ret = parse_core(c, package_id, core_id++);
 402                         } else {
 403                                 pr_err("%pOF: Non-leaf cluster with core %s\n",
 404                                        cluster, name);
 405                                 ret = -EINVAL;
 406                         }
 407
 408                         of_node_put(c);
 409                         if (ret != 0)
 410                                 return ret;
 411                 }
 412                 i++;
 413         } while (c);
 414
 415         if (leaf && !has_cores)
 416                 pr_warn("%pOF: empty cluster\n", cluster);
 417
 418         if (leaf)
 419                 package_id++;
 420
 421         return 0;
 422 }
 423
 424 static int __init parse_dt_topology(void)
 425 {
 426         struct device_node *cn, *map;
 427         int ret = 0;
 428         int cpu;
 429
 430         cn = of_find_node_by_path("/cpus");
 431         if (!cn) {
 432                 pr_err("No CPU information found in DT\n");
 433                 return 0;
 434         }
 435
 436         /*
 437          * When topology is provided cpu-map is essentially a root
 438          * cluster with restricted subnodes.
 439          */
 440         map = of_get_child_by_name(cn, "cpu-map");
 441         if (!map)
 442                 goto out;
 443
 444         ret = parse_cluster(map, 0);
 445         if (ret != 0)
 446                 goto out_map;
 447
 448         topology_normalize_cpu_scale();
 449
 450         /*
 451          * Check that all cores are in the topology; the SMP code will
 452          * only mark cores described in the DT as possible.
 453          */
 454         for_each_possible_cpu(cpu)
 455                 if (cpu_topology[cpu].package_id == -1)
 456                         ret = -EINVAL;
 457
 458 out_map:
 459         of_node_put(map);
 460 out:
 461         of_node_put(cn);
 462         return ret;
 463 }
 464 #endif
 465
 466 /*
 467  * cpu topology table
 468  */
 469 struct cpu_topology cpu_topology[NR_CPUS];
 470 EXPORT_SYMBOL_GPL(cpu_topology);
 471
 472 const struct cpumask *cpu_coregroup_mask(int cpu)
 473 {
 474         const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
 475
 476         /* Find the smaller of NUMA, core or LLC siblings */
 477         if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
 478                 /* not numa in package, lets use the package siblings */
 479                 core_mask = &cpu_topology[cpu].core_sibling;
 480         }
 481         if (cpu_topology[cpu].llc_id != -1) {
 482                 if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
 483                         core_mask = &cpu_topology[cpu].llc_sibling;
 484         }
 485
 486         return core_mask;
 487 }
 488
 489 void update_siblings_masks(unsigned int cpuid)
 490 {
 491         struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
 492         int cpu;
 493
 494         /* update core and thread sibling masks */
 495         for_each_online_cpu(cpu) {
 496                 cpu_topo = &cpu_topology[cpu];
 497
 498                 if (cpuid_topo->llc_id == cpu_topo->llc_id) {
 499                         cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
 500                         cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
 501                 }
 502
 503                 if (cpuid_topo->package_id != cpu_topo->package_id)
 504                         continue;
 505
 506                 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
 507                 cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
 508
 509                 if (cpuid_topo->core_id != cpu_topo->core_id)
 510                         continue;
 511
 512                 cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
 513                 cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
 514         }
 515 }
 516
 517 static void clear_cpu_topology(int cpu)
 518 {
 519         struct cpu_topology *cpu_topo = &cpu_topology[cpu];
 520
 521         cpumask_clear(&cpu_topo->llc_sibling);
 522         cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
 523
 524         cpumask_clear(&cpu_topo->core_sibling);
 525         cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
 526         cpumask_clear(&cpu_topo->thread_sibling);
 527         cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
 528 }
 529
 530 void __init reset_cpu_topology(void)
 531 {
 532         unsigned int cpu;
 533
 534         for_each_possible_cpu(cpu) {
 535                 struct cpu_topology *cpu_topo = &cpu_topology[cpu];
 536
 537                 cpu_topo->thread_id = -1;
 538                 cpu_topo->core_id = -1;
 539                 cpu_topo->package_id = -1;
 540                 cpu_topo->llc_id = -1;
 541
 542                 clear_cpu_topology(cpu);
 543         }
 544 }
 545
 546 void remove_cpu_topology(unsigned int cpu)
 547 {
 548         int sibling;
 549
 550         for_each_cpu(sibling, topology_core_cpumask(cpu))
 551                 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
 552         for_each_cpu(sibling, topology_sibling_cpumask(cpu))
 553                 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
 554         for_each_cpu(sibling, topology_llc_cpumask(cpu))
 555                 cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
 556
 557         clear_cpu_topology(cpu);
 558 }
 559
 560 __weak int __init parse_acpi_topology(void)
 561 {
 562         return 0;
 563 }
 564
 565 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 566 void __init init_cpu_topology(void)
 567 {
 568         reset_cpu_topology();
 569
 570         /*
 571          * Discard anything that was parsed if we hit an error so we
 572          * don't use partial information.
 573          */
 574         if (parse_acpi_topology())
 575                 reset_cpu_topology();
 576         else if (of_have_populated_dt() && parse_dt_topology())
 577                 reset_cpu_topology();
 578 }
 579 #endif