drivers/base/arch_topology.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Arch specific cpu topology information
   4  *
   5  * Copyright (C) 2016, ARM Ltd.
   6  * Written by: Juri Lelli, ARM Ltd.
   7  */
   8
   9 #include <linux/acpi.h>
  10 #include <linux/cpu.h>
  11 #include <linux/cpufreq.h>
  12 #include <linux/device.h>
  13 #include <linux/of.h>
  14 #include <linux/slab.h>
  15 #include <linux/string.h>
  16 #include <linux/sched/topology.h>
  17 #include <linux/cpuset.h>
  18 #include <linux/cpumask.h>
  19 #include <linux/init.h>
  20 #include <linux/percpu.h>
  21 #include <linux/sched.h>
  22 #include <linux/smp.h>
  23
  24 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
  25
  26 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
  27                          unsigned long max_freq)
  28 {
  29         unsigned long scale;
  30         int i;
  31
  32         scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
  33
  34         for_each_cpu(i, cpus)
  35                 per_cpu(freq_scale, i) = scale;
  36 }
  37
  38 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
  39
  40 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
  41 {
  42         per_cpu(cpu_scale, cpu) = capacity;
  43 }
  44
  45 static ssize_t cpu_capacity_show(struct device *dev,
  46                                  struct device_attribute *attr,
  47                                  char *buf)
  48 {
  49         struct cpu *cpu = container_of(dev, struct cpu, dev);
  50
  51         return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
  52 }
  53
  54 static void update_topology_flags_workfn(struct work_struct *work);
  55 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
  56
  57 static DEVICE_ATTR_RO(cpu_capacity);
  58
  59 static int register_cpu_capacity_sysctl(void)
  60 {
  61         int i;
  62         struct device *cpu;
  63
  64         for_each_possible_cpu(i) {
  65                 cpu = get_cpu_device(i);
  66                 if (!cpu) {
  67                         pr_err("%s: too early to get CPU%d device!\n",
  68                                __func__, i);
  69                         continue;
  70                 }
  71                 device_create_file(cpu, &dev_attr_cpu_capacity);
  72         }
  73
  74         return 0;
  75 }
  76 subsys_initcall(register_cpu_capacity_sysctl);
  77
  78 static int update_topology;
  79
  80 int topology_update_cpu_topology(void)
  81 {
  82         return update_topology;
  83 }
  84
  85 /*
  86  * Updating the sched_domains can't be done directly from cpufreq callbacks
  87  * due to locking, so queue the work for later.
  88  */
  89 static void update_topology_flags_workfn(struct work_struct *work)
  90 {
  91         update_topology = 1;
  92         rebuild_sched_domains();
  93         pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
  94         update_topology = 0;
  95 }
  96
  97 static DEFINE_PER_CPU(u32, freq_factor) = 1;
  98 static u32 *raw_capacity;
  99
 100 static int free_raw_capacity(void)
 101 {
 102         kfree(raw_capacity);
 103         raw_capacity = NULL;
 104
 105         return 0;
 106 }
 107
 108 void topology_normalize_cpu_scale(void)
 109 {
 110         u64 capacity;
 111         u64 capacity_scale;
 112         int cpu;
 113
 114         if (!raw_capacity)
 115                 return;
 116
 117         capacity_scale = 1;
 118         for_each_possible_cpu(cpu) {
 119                 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
 120                 capacity_scale = max(capacity, capacity_scale);
 121         }
 122
 123         pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
 124         for_each_possible_cpu(cpu) {
 125                 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
 126                 capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
 127                         capacity_scale);
 128                 topology_set_cpu_scale(cpu, capacity);
 129                 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
 130                         cpu, topology_get_cpu_scale(cpu));
 131         }
 132 }
 133
 134 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
 135 {
 136         struct clk *cpu_clk;
 137         static bool cap_parsing_failed;
 138         int ret;
 139         u32 cpu_capacity;
 140
 141         if (cap_parsing_failed)
 142                 return false;
 143
 144         ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
 145                                    &cpu_capacity);
 146         if (!ret) {
 147                 if (!raw_capacity) {
 148                         raw_capacity = kcalloc(num_possible_cpus(),
 149                                                sizeof(*raw_capacity),
 150                                                GFP_KERNEL);
 151                         if (!raw_capacity) {
 152                                 cap_parsing_failed = true;
 153                                 return false;
 154                         }
 155                 }
 156                 raw_capacity[cpu] = cpu_capacity;
 157                 pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
 158                         cpu_node, raw_capacity[cpu]);
 159
 160                 /*
 161                  * Update freq_factor for calculating early boot cpu capacities.
 162                  * For non-clk CPU DVFS mechanism, there's no way to get the
 163                  * frequency value now, assuming they are running at the same
 164                  * frequency (by keeping the initial freq_factor value).
 165                  */
 166                 cpu_clk = of_clk_get(cpu_node, 0);
 167                 if (!PTR_ERR_OR_ZERO(cpu_clk)) {
 168                         per_cpu(freq_factor, cpu) =
 169                                 clk_get_rate(cpu_clk) / 1000;
 170                         clk_put(cpu_clk);
 171                 }
 172         } else {
 173                 if (raw_capacity) {
 174                         pr_err("cpu_capacity: missing %pOF raw capacity\n",
 175                                 cpu_node);
 176                         pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
 177                 }
 178                 cap_parsing_failed = true;
 179                 free_raw_capacity();
 180         }
 181
 182         return !ret;
 183 }
 184
 185 #ifdef CONFIG_CPU_FREQ
 186 static cpumask_var_t cpus_to_visit;
 187 static void parsing_done_workfn(struct work_struct *work);
 188 static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
 189
 190 static int
 191 init_cpu_capacity_callback(struct notifier_block *nb,
 192                            unsigned long val,
 193                            void *data)
 194 {
 195         struct cpufreq_policy *policy = data;
 196         int cpu;
 197
 198         if (!raw_capacity)
 199                 return 0;
 200
 201         if (val != CPUFREQ_CREATE_POLICY)
 202                 return 0;
 203
 204         pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
 205                  cpumask_pr_args(policy->related_cpus),
 206                  cpumask_pr_args(cpus_to_visit));
 207
 208         cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
 209
 210         for_each_cpu(cpu, policy->related_cpus)
 211                 per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
 212
 213         if (cpumask_empty(cpus_to_visit)) {
 214                 topology_normalize_cpu_scale();
 215                 schedule_work(&update_topology_flags_work);
 216                 free_raw_capacity();
 217                 pr_debug("cpu_capacity: parsing done\n");
 218                 schedule_work(&parsing_done_work);
 219         }
 220
 221         return 0;
 222 }
 223
 224 static struct notifier_block init_cpu_capacity_notifier = {
 225         .notifier_call = init_cpu_capacity_callback,
 226 };
 227
 228 static int __init register_cpufreq_notifier(void)
 229 {
 230         int ret;
 231
 232         /*
 233          * on ACPI-based systems we need to use the default cpu capacity
 234          * until we have the necessary code to parse the cpu capacity, so
 235          * skip registering cpufreq notifier.
 236          */
 237         if (!acpi_disabled || !raw_capacity)
 238                 return -EINVAL;
 239
 240         if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
 241                 return -ENOMEM;
 242
 243         cpumask_copy(cpus_to_visit, cpu_possible_mask);
 244
 245         ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
 246                                         CPUFREQ_POLICY_NOTIFIER);
 247
 248         if (ret)
 249                 free_cpumask_var(cpus_to_visit);
 250
 251         return ret;
 252 }
 253 core_initcall(register_cpufreq_notifier);
 254
 255 static void parsing_done_workfn(struct work_struct *work)
 256 {
 257         cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
 258                                          CPUFREQ_POLICY_NOTIFIER);
 259         free_cpumask_var(cpus_to_visit);
 260 }
 261
 262 #else
 263 core_initcall(free_raw_capacity);
 264 #endif
 265
 266 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 267 /*
 268  * This function returns the logic cpu number of the node.
 269  * There are basically three kinds of return values:
 270  * (1) logic cpu number which is > 0.
 271  * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
 272  * there is no possible logical CPU in the kernel to match. This happens
 273  * when CONFIG_NR_CPUS is configure to be smaller than the number of
 274  * CPU nodes in DT. We need to just ignore this case.
 275  * (3) -1 if the node does not exist in the device tree
 276  */
 277 static int __init get_cpu_for_node(struct device_node *node)
 278 {
 279         struct device_node *cpu_node;
 280         int cpu;
 281
 282         cpu_node = of_parse_phandle(node, "cpu", 0);
 283         if (!cpu_node)
 284                 return -1;
 285
 286         cpu = of_cpu_node_to_id(cpu_node);
 287         if (cpu >= 0)
 288                 topology_parse_cpu_capacity(cpu_node, cpu);
 289         else
 290                 pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
 291                         cpu_node, cpumask_pr_args(cpu_possible_mask));
 292
 293         of_node_put(cpu_node);
 294         return cpu;
 295 }
 296
 297 static int __init parse_core(struct device_node *core, int package_id,
 298                              int core_id)
 299 {
 300         char name[20];
 301         bool leaf = true;
 302         int i = 0;
 303         int cpu;
 304         struct device_node *t;
 305
 306         do {
 307                 snprintf(name, sizeof(name), "thread%d", i);
 308                 t = of_get_child_by_name(core, name);
 309                 if (t) {
 310                         leaf = false;
 311                         cpu = get_cpu_for_node(t);
 312                         if (cpu >= 0) {
 313                                 cpu_topology[cpu].package_id = package_id;
 314                                 cpu_topology[cpu].core_id = core_id;
 315                                 cpu_topology[cpu].thread_id = i;
 316                         } else if (cpu != -ENODEV) {
 317                                 pr_err("%pOF: Can't get CPU for thread\n", t);
 318                                 of_node_put(t);
 319                                 return -EINVAL;
 320                         }
 321                         of_node_put(t);
 322                 }
 323                 i++;
 324         } while (t);
 325
 326         cpu = get_cpu_for_node(core);
 327         if (cpu >= 0) {
 328                 if (!leaf) {
 329                         pr_err("%pOF: Core has both threads and CPU\n",
 330                                core);
 331                         return -EINVAL;
 332                 }
 333
 334                 cpu_topology[cpu].package_id = package_id;
 335                 cpu_topology[cpu].core_id = core_id;
 336         } else if (leaf && cpu != -ENODEV) {
 337                 pr_err("%pOF: Can't get CPU for leaf core\n", core);
 338                 return -EINVAL;
 339         }
 340
 341         return 0;
 342 }
 343
 344 static int __init parse_cluster(struct device_node *cluster, int depth)
 345 {
 346         char name[20];
 347         bool leaf = true;
 348         bool has_cores = false;
 349         struct device_node *c;
 350         static int package_id __initdata;
 351         int core_id = 0;
 352         int i, ret;
 353
 354         /*
 355          * First check for child clusters; we currently ignore any
 356          * information about the nesting of clusters and present the
 357          * scheduler with a flat list of them.
 358          */
 359         i = 0;
 360         do {
 361                 snprintf(name, sizeof(name), "cluster%d", i);
 362                 c = of_get_child_by_name(cluster, name);
 363                 if (c) {
 364                         leaf = false;
 365                         ret = parse_cluster(c, depth + 1);
 366                         of_node_put(c);
 367                         if (ret != 0)
 368                                 return ret;
 369                 }
 370                 i++;
 371         } while (c);
 372
 373         /* Now check for cores */
 374         i = 0;
 375         do {
 376                 snprintf(name, sizeof(name), "core%d", i);
 377                 c = of_get_child_by_name(cluster, name);
 378                 if (c) {
 379                         has_cores = true;
 380
 381                         if (depth == 0) {
 382                                 pr_err("%pOF: cpu-map children should be clusters\n",
 383                                        c);
 384                                 of_node_put(c);
 385                                 return -EINVAL;
 386                         }
 387
 388                         if (leaf) {
 389                                 ret = parse_core(c, package_id, core_id++);
 390                         } else {
 391                                 pr_err("%pOF: Non-leaf cluster with core %s\n",
 392                                        cluster, name);
 393                                 ret = -EINVAL;
 394                         }
 395
 396                         of_node_put(c);
 397                         if (ret != 0)
 398                                 return ret;
 399                 }
 400                 i++;
 401         } while (c);
 402
 403         if (leaf && !has_cores)
 404                 pr_warn("%pOF: empty cluster\n", cluster);
 405
 406         if (leaf)
 407                 package_id++;
 408
 409         return 0;
 410 }
 411
 412 static int __init parse_dt_topology(void)
 413 {
 414         struct device_node *cn, *map;
 415         int ret = 0;
 416         int cpu;
 417
 418         cn = of_find_node_by_path("/cpus");
 419         if (!cn) {
 420                 pr_err("No CPU information found in DT\n");
 421                 return 0;
 422         }
 423
 424         /*
 425          * When topology is provided cpu-map is essentially a root
 426          * cluster with restricted subnodes.
 427          */
 428         map = of_get_child_by_name(cn, "cpu-map");
 429         if (!map)
 430                 goto out;
 431
 432         ret = parse_cluster(map, 0);
 433         if (ret != 0)
 434                 goto out_map;
 435
 436         topology_normalize_cpu_scale();
 437
 438         /*
 439          * Check that all cores are in the topology; the SMP code will
 440          * only mark cores described in the DT as possible.
 441          */
 442         for_each_possible_cpu(cpu)
 443                 if (cpu_topology[cpu].package_id == -1)
 444                         ret = -EINVAL;
 445
 446 out_map:
 447         of_node_put(map);
 448 out:
 449         of_node_put(cn);
 450         return ret;
 451 }
 452 #endif
 453
 454 /*
 455  * cpu topology table
 456  */
 457 struct cpu_topology cpu_topology[NR_CPUS];
 458 EXPORT_SYMBOL_GPL(cpu_topology);
 459
 460 const struct cpumask *cpu_coregroup_mask(int cpu)
 461 {
 462         const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
 463
 464         /* Find the smaller of NUMA, core or LLC siblings */
 465         if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
 466                 /* not numa in package, lets use the package siblings */
 467                 core_mask = &cpu_topology[cpu].core_sibling;
 468         }
 469         if (cpu_topology[cpu].llc_id != -1) {
 470                 if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
 471                         core_mask = &cpu_topology[cpu].llc_sibling;
 472         }
 473
 474         return core_mask;
 475 }
 476
 477 void update_siblings_masks(unsigned int cpuid)
 478 {
 479         struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
 480         int cpu;
 481
 482         /* update core and thread sibling masks */
 483         for_each_online_cpu(cpu) {
 484                 cpu_topo = &cpu_topology[cpu];
 485
 486                 if (cpuid_topo->llc_id == cpu_topo->llc_id) {
 487                         cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
 488                         cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
 489                 }
 490
 491                 if (cpuid_topo->package_id != cpu_topo->package_id)
 492                         continue;
 493
 494                 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
 495                 cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
 496
 497                 if (cpuid_topo->core_id != cpu_topo->core_id)
 498                         continue;
 499
 500                 cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
 501                 cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
 502         }
 503 }
 504
 505 static void clear_cpu_topology(int cpu)
 506 {
 507         struct cpu_topology *cpu_topo = &cpu_topology[cpu];
 508
 509         cpumask_clear(&cpu_topo->llc_sibling);
 510         cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
 511
 512         cpumask_clear(&cpu_topo->core_sibling);
 513         cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
 514         cpumask_clear(&cpu_topo->thread_sibling);
 515         cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
 516 }
 517
 518 void __init reset_cpu_topology(void)
 519 {
 520         unsigned int cpu;
 521
 522         for_each_possible_cpu(cpu) {
 523                 struct cpu_topology *cpu_topo = &cpu_topology[cpu];
 524
 525                 cpu_topo->thread_id = -1;
 526                 cpu_topo->core_id = -1;
 527                 cpu_topo->package_id = -1;
 528                 cpu_topo->llc_id = -1;
 529
 530                 clear_cpu_topology(cpu);
 531         }
 532 }
 533
 534 void remove_cpu_topology(unsigned int cpu)
 535 {
 536         int sibling;
 537
 538         for_each_cpu(sibling, topology_core_cpumask(cpu))
 539                 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
 540         for_each_cpu(sibling, topology_sibling_cpumask(cpu))
 541                 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
 542         for_each_cpu(sibling, topology_llc_cpumask(cpu))
 543                 cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
 544
 545         clear_cpu_topology(cpu);
 546 }
 547
 548 __weak int __init parse_acpi_topology(void)
 549 {
 550         return 0;
 551 }
 552
 553 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 554 void __init init_cpu_topology(void)
 555 {
 556         reset_cpu_topology();
 557
 558         /*
 559          * Discard anything that was parsed if we hit an error so we
 560          * don't use partial information.
 561          */
 562         if (parse_acpi_topology())
 563                 reset_cpu_topology();
 564         else if (of_have_populated_dt() && parse_dt_topology())
 565                 reset_cpu_topology();
 566 }
 567 #endif