Merge tag 'drm-intel-next-fixes-2021-04-30' of git://anongit.freedesktop.org/drm...
[linux-2.6-microblaze.git] / drivers / base / arch_topology.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arch specific cpu topology information
4  *
5  * Copyright (C) 2016, ARM Ltd.
6  * Written by: Juri Lelli, ARM Ltd.
7  */
8
9 #include <linux/acpi.h>
10 #include <linux/cpu.h>
11 #include <linux/cpufreq.h>
12 #include <linux/device.h>
13 #include <linux/of.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/sched/topology.h>
17 #include <linux/cpuset.h>
18 #include <linux/cpumask.h>
19 #include <linux/init.h>
20 #include <linux/percpu.h>
21 #include <linux/sched.h>
22 #include <linux/smp.h>
23
24 bool topology_scale_freq_invariant(void)
25 {
26         return cpufreq_supports_freq_invariance() ||
27                arch_freq_counters_available(cpu_online_mask);
28 }
29
30 __weak bool arch_freq_counters_available(const struct cpumask *cpus)
31 {
32         return false;
33 }
34 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
35
36 void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
37                              unsigned long max_freq)
38 {
39         unsigned long scale;
40         int i;
41
42         if (WARN_ON_ONCE(!cur_freq || !max_freq))
43                 return;
44
45         /*
46          * If the use of counters for FIE is enabled, just return as we don't
47          * want to update the scale factor with information from CPUFREQ.
48          * Instead the scale factor will be updated from arch_scale_freq_tick.
49          */
50         if (arch_freq_counters_available(cpus))
51                 return;
52
53         scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
54
55         for_each_cpu(i, cpus)
56                 per_cpu(freq_scale, i) = scale;
57 }
58
59 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
60
61 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
62 {
63         per_cpu(cpu_scale, cpu) = capacity;
64 }
65
66 DEFINE_PER_CPU(unsigned long, thermal_pressure);
67
68 void topology_set_thermal_pressure(const struct cpumask *cpus,
69                                unsigned long th_pressure)
70 {
71         int cpu;
72
73         for_each_cpu(cpu, cpus)
74                 WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
75 }
76
77 static ssize_t cpu_capacity_show(struct device *dev,
78                                  struct device_attribute *attr,
79                                  char *buf)
80 {
81         struct cpu *cpu = container_of(dev, struct cpu, dev);
82
83         return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
84 }
85
86 static void update_topology_flags_workfn(struct work_struct *work);
87 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
88
89 static DEVICE_ATTR_RO(cpu_capacity);
90
91 static int register_cpu_capacity_sysctl(void)
92 {
93         int i;
94         struct device *cpu;
95
96         for_each_possible_cpu(i) {
97                 cpu = get_cpu_device(i);
98                 if (!cpu) {
99                         pr_err("%s: too early to get CPU%d device!\n",
100                                __func__, i);
101                         continue;
102                 }
103                 device_create_file(cpu, &dev_attr_cpu_capacity);
104         }
105
106         return 0;
107 }
108 subsys_initcall(register_cpu_capacity_sysctl);
109
110 static int update_topology;
111
112 int topology_update_cpu_topology(void)
113 {
114         return update_topology;
115 }
116
117 /*
118  * Updating the sched_domains can't be done directly from cpufreq callbacks
119  * due to locking, so queue the work for later.
120  */
121 static void update_topology_flags_workfn(struct work_struct *work)
122 {
123         update_topology = 1;
124         rebuild_sched_domains();
125         pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
126         update_topology = 0;
127 }
128
129 static DEFINE_PER_CPU(u32, freq_factor) = 1;
130 static u32 *raw_capacity;
131
132 static int free_raw_capacity(void)
133 {
134         kfree(raw_capacity);
135         raw_capacity = NULL;
136
137         return 0;
138 }
139
140 void topology_normalize_cpu_scale(void)
141 {
142         u64 capacity;
143         u64 capacity_scale;
144         int cpu;
145
146         if (!raw_capacity)
147                 return;
148
149         capacity_scale = 1;
150         for_each_possible_cpu(cpu) {
151                 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
152                 capacity_scale = max(capacity, capacity_scale);
153         }
154
155         pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
156         for_each_possible_cpu(cpu) {
157                 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
158                 capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
159                         capacity_scale);
160                 topology_set_cpu_scale(cpu, capacity);
161                 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
162                         cpu, topology_get_cpu_scale(cpu));
163         }
164 }
165
166 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
167 {
168         struct clk *cpu_clk;
169         static bool cap_parsing_failed;
170         int ret;
171         u32 cpu_capacity;
172
173         if (cap_parsing_failed)
174                 return false;
175
176         ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
177                                    &cpu_capacity);
178         if (!ret) {
179                 if (!raw_capacity) {
180                         raw_capacity = kcalloc(num_possible_cpus(),
181                                                sizeof(*raw_capacity),
182                                                GFP_KERNEL);
183                         if (!raw_capacity) {
184                                 cap_parsing_failed = true;
185                                 return false;
186                         }
187                 }
188                 raw_capacity[cpu] = cpu_capacity;
189                 pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
190                         cpu_node, raw_capacity[cpu]);
191
192                 /*
193                  * Update freq_factor for calculating early boot cpu capacities.
194                  * For non-clk CPU DVFS mechanism, there's no way to get the
195                  * frequency value now, assuming they are running at the same
196                  * frequency (by keeping the initial freq_factor value).
197                  */
198                 cpu_clk = of_clk_get(cpu_node, 0);
199                 if (!PTR_ERR_OR_ZERO(cpu_clk)) {
200                         per_cpu(freq_factor, cpu) =
201                                 clk_get_rate(cpu_clk) / 1000;
202                         clk_put(cpu_clk);
203                 }
204         } else {
205                 if (raw_capacity) {
206                         pr_err("cpu_capacity: missing %pOF raw capacity\n",
207                                 cpu_node);
208                         pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
209                 }
210                 cap_parsing_failed = true;
211                 free_raw_capacity();
212         }
213
214         return !ret;
215 }
216
217 #ifdef CONFIG_CPU_FREQ
218 static cpumask_var_t cpus_to_visit;
219 static void parsing_done_workfn(struct work_struct *work);
220 static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
221
222 static int
223 init_cpu_capacity_callback(struct notifier_block *nb,
224                            unsigned long val,
225                            void *data)
226 {
227         struct cpufreq_policy *policy = data;
228         int cpu;
229
230         if (!raw_capacity)
231                 return 0;
232
233         if (val != CPUFREQ_CREATE_POLICY)
234                 return 0;
235
236         pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
237                  cpumask_pr_args(policy->related_cpus),
238                  cpumask_pr_args(cpus_to_visit));
239
240         cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
241
242         for_each_cpu(cpu, policy->related_cpus)
243                 per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
244
245         if (cpumask_empty(cpus_to_visit)) {
246                 topology_normalize_cpu_scale();
247                 schedule_work(&update_topology_flags_work);
248                 free_raw_capacity();
249                 pr_debug("cpu_capacity: parsing done\n");
250                 schedule_work(&parsing_done_work);
251         }
252
253         return 0;
254 }
255
256 static struct notifier_block init_cpu_capacity_notifier = {
257         .notifier_call = init_cpu_capacity_callback,
258 };
259
260 static int __init register_cpufreq_notifier(void)
261 {
262         int ret;
263
264         /*
265          * on ACPI-based systems we need to use the default cpu capacity
266          * until we have the necessary code to parse the cpu capacity, so
267          * skip registering cpufreq notifier.
268          */
269         if (!acpi_disabled || !raw_capacity)
270                 return -EINVAL;
271
272         if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
273                 return -ENOMEM;
274
275         cpumask_copy(cpus_to_visit, cpu_possible_mask);
276
277         ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
278                                         CPUFREQ_POLICY_NOTIFIER);
279
280         if (ret)
281                 free_cpumask_var(cpus_to_visit);
282
283         return ret;
284 }
285 core_initcall(register_cpufreq_notifier);
286
287 static void parsing_done_workfn(struct work_struct *work)
288 {
289         cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
290                                          CPUFREQ_POLICY_NOTIFIER);
291         free_cpumask_var(cpus_to_visit);
292 }
293
294 #else
295 core_initcall(free_raw_capacity);
296 #endif
297
298 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
299 /*
300  * This function returns the logic cpu number of the node.
301  * There are basically three kinds of return values:
302  * (1) logic cpu number which is > 0.
303  * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
304  * there is no possible logical CPU in the kernel to match. This happens
305  * when CONFIG_NR_CPUS is configure to be smaller than the number of
306  * CPU nodes in DT. We need to just ignore this case.
307  * (3) -1 if the node does not exist in the device tree
308  */
309 static int __init get_cpu_for_node(struct device_node *node)
310 {
311         struct device_node *cpu_node;
312         int cpu;
313
314         cpu_node = of_parse_phandle(node, "cpu", 0);
315         if (!cpu_node)
316                 return -1;
317
318         cpu = of_cpu_node_to_id(cpu_node);
319         if (cpu >= 0)
320                 topology_parse_cpu_capacity(cpu_node, cpu);
321         else
322                 pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
323                         cpu_node, cpumask_pr_args(cpu_possible_mask));
324
325         of_node_put(cpu_node);
326         return cpu;
327 }
328
329 static int __init parse_core(struct device_node *core, int package_id,
330                              int core_id)
331 {
332         char name[20];
333         bool leaf = true;
334         int i = 0;
335         int cpu;
336         struct device_node *t;
337
338         do {
339                 snprintf(name, sizeof(name), "thread%d", i);
340                 t = of_get_child_by_name(core, name);
341                 if (t) {
342                         leaf = false;
343                         cpu = get_cpu_for_node(t);
344                         if (cpu >= 0) {
345                                 cpu_topology[cpu].package_id = package_id;
346                                 cpu_topology[cpu].core_id = core_id;
347                                 cpu_topology[cpu].thread_id = i;
348                         } else if (cpu != -ENODEV) {
349                                 pr_err("%pOF: Can't get CPU for thread\n", t);
350                                 of_node_put(t);
351                                 return -EINVAL;
352                         }
353                         of_node_put(t);
354                 }
355                 i++;
356         } while (t);
357
358         cpu = get_cpu_for_node(core);
359         if (cpu >= 0) {
360                 if (!leaf) {
361                         pr_err("%pOF: Core has both threads and CPU\n",
362                                core);
363                         return -EINVAL;
364                 }
365
366                 cpu_topology[cpu].package_id = package_id;
367                 cpu_topology[cpu].core_id = core_id;
368         } else if (leaf && cpu != -ENODEV) {
369                 pr_err("%pOF: Can't get CPU for leaf core\n", core);
370                 return -EINVAL;
371         }
372
373         return 0;
374 }
375
376 static int __init parse_cluster(struct device_node *cluster, int depth)
377 {
378         char name[20];
379         bool leaf = true;
380         bool has_cores = false;
381         struct device_node *c;
382         static int package_id __initdata;
383         int core_id = 0;
384         int i, ret;
385
386         /*
387          * First check for child clusters; we currently ignore any
388          * information about the nesting of clusters and present the
389          * scheduler with a flat list of them.
390          */
391         i = 0;
392         do {
393                 snprintf(name, sizeof(name), "cluster%d", i);
394                 c = of_get_child_by_name(cluster, name);
395                 if (c) {
396                         leaf = false;
397                         ret = parse_cluster(c, depth + 1);
398                         of_node_put(c);
399                         if (ret != 0)
400                                 return ret;
401                 }
402                 i++;
403         } while (c);
404
405         /* Now check for cores */
406         i = 0;
407         do {
408                 snprintf(name, sizeof(name), "core%d", i);
409                 c = of_get_child_by_name(cluster, name);
410                 if (c) {
411                         has_cores = true;
412
413                         if (depth == 0) {
414                                 pr_err("%pOF: cpu-map children should be clusters\n",
415                                        c);
416                                 of_node_put(c);
417                                 return -EINVAL;
418                         }
419
420                         if (leaf) {
421                                 ret = parse_core(c, package_id, core_id++);
422                         } else {
423                                 pr_err("%pOF: Non-leaf cluster with core %s\n",
424                                        cluster, name);
425                                 ret = -EINVAL;
426                         }
427
428                         of_node_put(c);
429                         if (ret != 0)
430                                 return ret;
431                 }
432                 i++;
433         } while (c);
434
435         if (leaf && !has_cores)
436                 pr_warn("%pOF: empty cluster\n", cluster);
437
438         if (leaf)
439                 package_id++;
440
441         return 0;
442 }
443
444 static int __init parse_dt_topology(void)
445 {
446         struct device_node *cn, *map;
447         int ret = 0;
448         int cpu;
449
450         cn = of_find_node_by_path("/cpus");
451         if (!cn) {
452                 pr_err("No CPU information found in DT\n");
453                 return 0;
454         }
455
456         /*
457          * When topology is provided cpu-map is essentially a root
458          * cluster with restricted subnodes.
459          */
460         map = of_get_child_by_name(cn, "cpu-map");
461         if (!map)
462                 goto out;
463
464         ret = parse_cluster(map, 0);
465         if (ret != 0)
466                 goto out_map;
467
468         topology_normalize_cpu_scale();
469
470         /*
471          * Check that all cores are in the topology; the SMP code will
472          * only mark cores described in the DT as possible.
473          */
474         for_each_possible_cpu(cpu)
475                 if (cpu_topology[cpu].package_id == -1)
476                         ret = -EINVAL;
477
478 out_map:
479         of_node_put(map);
480 out:
481         of_node_put(cn);
482         return ret;
483 }
484 #endif
485
486 /*
487  * cpu topology table
488  */
489 struct cpu_topology cpu_topology[NR_CPUS];
490 EXPORT_SYMBOL_GPL(cpu_topology);
491
492 const struct cpumask *cpu_coregroup_mask(int cpu)
493 {
494         const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
495
496         /* Find the smaller of NUMA, core or LLC siblings */
497         if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
498                 /* not numa in package, lets use the package siblings */
499                 core_mask = &cpu_topology[cpu].core_sibling;
500         }
501         if (cpu_topology[cpu].llc_id != -1) {
502                 if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
503                         core_mask = &cpu_topology[cpu].llc_sibling;
504         }
505
506         return core_mask;
507 }
508
509 void update_siblings_masks(unsigned int cpuid)
510 {
511         struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
512         int cpu;
513
514         /* update core and thread sibling masks */
515         for_each_online_cpu(cpu) {
516                 cpu_topo = &cpu_topology[cpu];
517
518                 if (cpuid_topo->llc_id == cpu_topo->llc_id) {
519                         cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
520                         cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
521                 }
522
523                 if (cpuid_topo->package_id != cpu_topo->package_id)
524                         continue;
525
526                 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
527                 cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
528
529                 if (cpuid_topo->core_id != cpu_topo->core_id)
530                         continue;
531
532                 cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
533                 cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
534         }
535 }
536
537 static void clear_cpu_topology(int cpu)
538 {
539         struct cpu_topology *cpu_topo = &cpu_topology[cpu];
540
541         cpumask_clear(&cpu_topo->llc_sibling);
542         cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
543
544         cpumask_clear(&cpu_topo->core_sibling);
545         cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
546         cpumask_clear(&cpu_topo->thread_sibling);
547         cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
548 }
549
550 void __init reset_cpu_topology(void)
551 {
552         unsigned int cpu;
553
554         for_each_possible_cpu(cpu) {
555                 struct cpu_topology *cpu_topo = &cpu_topology[cpu];
556
557                 cpu_topo->thread_id = -1;
558                 cpu_topo->core_id = -1;
559                 cpu_topo->package_id = -1;
560                 cpu_topo->llc_id = -1;
561
562                 clear_cpu_topology(cpu);
563         }
564 }
565
566 void remove_cpu_topology(unsigned int cpu)
567 {
568         int sibling;
569
570         for_each_cpu(sibling, topology_core_cpumask(cpu))
571                 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
572         for_each_cpu(sibling, topology_sibling_cpumask(cpu))
573                 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
574         for_each_cpu(sibling, topology_llc_cpumask(cpu))
575                 cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
576
577         clear_cpu_topology(cpu);
578 }
579
580 __weak int __init parse_acpi_topology(void)
581 {
582         return 0;
583 }
584
585 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
586 void __init init_cpu_topology(void)
587 {
588         reset_cpu_topology();
589
590         /*
591          * Discard anything that was parsed if we hit an error so we
592          * don't use partial information.
593          */
594         if (parse_acpi_topology())
595                 reset_cpu_topology();
596         else if (of_have_populated_dt() && parse_dt_topology())
597                 reset_cpu_topology();
598 }
599 #endif