Linux 6.9-rc1
[linux-2.6-microblaze.git] / arch / s390 / kernel / topology.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *    Copyright IBM Corp. 2007, 2011
4  */
5
6 #define KMSG_COMPONENT "cpu"
7 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
8
9 #include <linux/workqueue.h>
10 #include <linux/memblock.h>
11 #include <linux/uaccess.h>
12 #include <linux/sysctl.h>
13 #include <linux/cpuset.h>
14 #include <linux/device.h>
15 #include <linux/export.h>
16 #include <linux/kernel.h>
17 #include <linux/sched.h>
18 #include <linux/sched/topology.h>
19 #include <linux/delay.h>
20 #include <linux/init.h>
21 #include <linux/slab.h>
22 #include <linux/cpu.h>
23 #include <linux/smp.h>
24 #include <linux/mm.h>
25 #include <linux/nodemask.h>
26 #include <linux/node.h>
27 #include <asm/sysinfo.h>
28
29 #define PTF_HORIZONTAL  (0UL)
30 #define PTF_VERTICAL    (1UL)
31 #define PTF_CHECK       (2UL)
32
33 enum {
34         TOPOLOGY_MODE_HW,
35         TOPOLOGY_MODE_SINGLE,
36         TOPOLOGY_MODE_PACKAGE,
37         TOPOLOGY_MODE_UNINITIALIZED
38 };
39
40 struct mask_info {
41         struct mask_info *next;
42         unsigned char id;
43         cpumask_t mask;
44 };
45
46 static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
47 static void set_topology_timer(void);
48 static void topology_work_fn(struct work_struct *work);
49 static struct sysinfo_15_1_x *tl_info;
50
51 static DECLARE_WORK(topology_work, topology_work_fn);
52
53 /*
54  * Socket/Book linked lists and cpu_topology updates are
55  * protected by "sched_domains_mutex".
56  */
57 static struct mask_info socket_info;
58 static struct mask_info book_info;
59 static struct mask_info drawer_info;
60
61 struct cpu_topology_s390 cpu_topology[NR_CPUS];
62 EXPORT_SYMBOL_GPL(cpu_topology);
63
64 static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu)
65 {
66         static cpumask_t mask;
67
68         cpumask_clear(&mask);
69         if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
70                 goto out;
71         cpumask_set_cpu(cpu, &mask);
72         switch (topology_mode) {
73         case TOPOLOGY_MODE_HW:
74                 while (info) {
75                         if (cpumask_test_cpu(cpu, &info->mask)) {
76                                 cpumask_copy(&mask, &info->mask);
77                                 break;
78                         }
79                         info = info->next;
80                 }
81                 break;
82         case TOPOLOGY_MODE_PACKAGE:
83                 cpumask_copy(&mask, cpu_present_mask);
84                 break;
85         default:
86                 fallthrough;
87         case TOPOLOGY_MODE_SINGLE:
88                 break;
89         }
90         cpumask_and(&mask, &mask, &cpu_setup_mask);
91 out:
92         cpumask_copy(dst, &mask);
93 }
94
95 static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
96 {
97         static cpumask_t mask;
98         unsigned int max_cpu;
99
100         cpumask_clear(&mask);
101         if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
102                 goto out;
103         cpumask_set_cpu(cpu, &mask);
104         if (topology_mode != TOPOLOGY_MODE_HW)
105                 goto out;
106         cpu -= cpu % (smp_cpu_mtid + 1);
107         max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
108         for (; cpu <= max_cpu; cpu++) {
109                 if (cpumask_test_cpu(cpu, &cpu_setup_mask))
110                         cpumask_set_cpu(cpu, &mask);
111         }
112 out:
113         cpumask_copy(dst, &mask);
114 }
115
116 #define TOPOLOGY_CORE_BITS      64
117
118 static void add_cpus_to_mask(struct topology_core *tl_core,
119                              struct mask_info *drawer,
120                              struct mask_info *book,
121                              struct mask_info *socket)
122 {
123         struct cpu_topology_s390 *topo;
124         unsigned int core;
125
126         for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
127                 unsigned int max_cpu, rcore;
128                 int cpu;
129
130                 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
131                 cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
132                 if (cpu < 0)
133                         continue;
134                 max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
135                 for (; cpu <= max_cpu; cpu++) {
136                         topo = &cpu_topology[cpu];
137                         topo->drawer_id = drawer->id;
138                         topo->book_id = book->id;
139                         topo->socket_id = socket->id;
140                         topo->core_id = rcore;
141                         topo->thread_id = cpu;
142                         topo->dedicated = tl_core->d;
143                         cpumask_set_cpu(cpu, &drawer->mask);
144                         cpumask_set_cpu(cpu, &book->mask);
145                         cpumask_set_cpu(cpu, &socket->mask);
146                         smp_cpu_set_polarization(cpu, tl_core->pp);
147                 }
148         }
149 }
150
151 static void clear_masks(void)
152 {
153         struct mask_info *info;
154
155         info = &socket_info;
156         while (info) {
157                 cpumask_clear(&info->mask);
158                 info = info->next;
159         }
160         info = &book_info;
161         while (info) {
162                 cpumask_clear(&info->mask);
163                 info = info->next;
164         }
165         info = &drawer_info;
166         while (info) {
167                 cpumask_clear(&info->mask);
168                 info = info->next;
169         }
170 }
171
172 static union topology_entry *next_tle(union topology_entry *tle)
173 {
174         if (!tle->nl)
175                 return (union topology_entry *)((struct topology_core *)tle + 1);
176         return (union topology_entry *)((struct topology_container *)tle + 1);
177 }
178
179 static void tl_to_masks(struct sysinfo_15_1_x *info)
180 {
181         struct mask_info *socket = &socket_info;
182         struct mask_info *book = &book_info;
183         struct mask_info *drawer = &drawer_info;
184         union topology_entry *tle, *end;
185
186         clear_masks();
187         tle = info->tle;
188         end = (union topology_entry *)((unsigned long)info + info->length);
189         while (tle < end) {
190                 switch (tle->nl) {
191                 case 3:
192                         drawer = drawer->next;
193                         drawer->id = tle->container.id;
194                         break;
195                 case 2:
196                         book = book->next;
197                         book->id = tle->container.id;
198                         break;
199                 case 1:
200                         socket = socket->next;
201                         socket->id = tle->container.id;
202                         break;
203                 case 0:
204                         add_cpus_to_mask(&tle->cpu, drawer, book, socket);
205                         break;
206                 default:
207                         clear_masks();
208                         return;
209                 }
210                 tle = next_tle(tle);
211         }
212 }
213
214 static void topology_update_polarization_simple(void)
215 {
216         int cpu;
217
218         for_each_possible_cpu(cpu)
219                 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
220 }
221
222 static int ptf(unsigned long fc)
223 {
224         int rc;
225
226         asm volatile(
227                 "       .insn   rre,0xb9a20000,%1,%1\n"
228                 "       ipm     %0\n"
229                 "       srl     %0,28\n"
230                 : "=d" (rc)
231                 : "d" (fc)  : "cc");
232         return rc;
233 }
234
235 int topology_set_cpu_management(int fc)
236 {
237         int cpu, rc;
238
239         if (!MACHINE_HAS_TOPOLOGY)
240                 return -EOPNOTSUPP;
241         if (fc)
242                 rc = ptf(PTF_VERTICAL);
243         else
244                 rc = ptf(PTF_HORIZONTAL);
245         if (rc)
246                 return -EBUSY;
247         for_each_possible_cpu(cpu)
248                 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
249         return rc;
250 }
251
252 void update_cpu_masks(void)
253 {
254         struct cpu_topology_s390 *topo, *topo_package, *topo_sibling;
255         int cpu, sibling, pkg_first, smt_first, id;
256
257         for_each_possible_cpu(cpu) {
258                 topo = &cpu_topology[cpu];
259                 cpu_thread_map(&topo->thread_mask, cpu);
260                 cpu_group_map(&topo->core_mask, &socket_info, cpu);
261                 cpu_group_map(&topo->book_mask, &book_info, cpu);
262                 cpu_group_map(&topo->drawer_mask, &drawer_info, cpu);
263                 topo->booted_cores = 0;
264                 if (topology_mode != TOPOLOGY_MODE_HW) {
265                         id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
266                         topo->thread_id = cpu;
267                         topo->core_id = cpu;
268                         topo->socket_id = id;
269                         topo->book_id = id;
270                         topo->drawer_id = id;
271                 }
272         }
273         for_each_online_cpu(cpu) {
274                 topo = &cpu_topology[cpu];
275                 pkg_first = cpumask_first(&topo->core_mask);
276                 topo_package = &cpu_topology[pkg_first];
277                 if (cpu == pkg_first) {
278                         for_each_cpu(sibling, &topo->core_mask) {
279                                 topo_sibling = &cpu_topology[sibling];
280                                 smt_first = cpumask_first(&topo_sibling->thread_mask);
281                                 if (sibling == smt_first)
282                                         topo_package->booted_cores++;
283                         }
284                 } else {
285                         topo->booted_cores = topo_package->booted_cores;
286                 }
287         }
288 }
289
290 void store_topology(struct sysinfo_15_1_x *info)
291 {
292         stsi(info, 15, 1, topology_mnest_limit());
293 }
294
295 static void __arch_update_dedicated_flag(void *arg)
296 {
297         if (topology_cpu_dedicated(smp_processor_id()))
298                 set_cpu_flag(CIF_DEDICATED_CPU);
299         else
300                 clear_cpu_flag(CIF_DEDICATED_CPU);
301 }
302
303 static int __arch_update_cpu_topology(void)
304 {
305         struct sysinfo_15_1_x *info = tl_info;
306         int rc = 0;
307
308         mutex_lock(&smp_cpu_state_mutex);
309         if (MACHINE_HAS_TOPOLOGY) {
310                 rc = 1;
311                 store_topology(info);
312                 tl_to_masks(info);
313         }
314         update_cpu_masks();
315         if (!MACHINE_HAS_TOPOLOGY)
316                 topology_update_polarization_simple();
317         mutex_unlock(&smp_cpu_state_mutex);
318         return rc;
319 }
320
321 int arch_update_cpu_topology(void)
322 {
323         struct device *dev;
324         int cpu, rc;
325
326         rc = __arch_update_cpu_topology();
327         on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
328         for_each_online_cpu(cpu) {
329                 dev = get_cpu_device(cpu);
330                 if (dev)
331                         kobject_uevent(&dev->kobj, KOBJ_CHANGE);
332         }
333         return rc;
334 }
335
336 static void topology_work_fn(struct work_struct *work)
337 {
338         rebuild_sched_domains();
339 }
340
341 void topology_schedule_update(void)
342 {
343         schedule_work(&topology_work);
344 }
345
346 static void topology_flush_work(void)
347 {
348         flush_work(&topology_work);
349 }
350
351 static void topology_timer_fn(struct timer_list *unused)
352 {
353         if (ptf(PTF_CHECK))
354                 topology_schedule_update();
355         set_topology_timer();
356 }
357
358 static struct timer_list topology_timer;
359
360 static atomic_t topology_poll = ATOMIC_INIT(0);
361
362 static void set_topology_timer(void)
363 {
364         if (atomic_add_unless(&topology_poll, -1, 0))
365                 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
366         else
367                 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
368 }
369
370 void topology_expect_change(void)
371 {
372         if (!MACHINE_HAS_TOPOLOGY)
373                 return;
374         /* This is racy, but it doesn't matter since it is just a heuristic.
375          * Worst case is that we poll in a higher frequency for a bit longer.
376          */
377         if (atomic_read(&topology_poll) > 60)
378                 return;
379         atomic_add(60, &topology_poll);
380         set_topology_timer();
381 }
382
383 static int cpu_management;
384
385 static ssize_t dispatching_show(struct device *dev,
386                                 struct device_attribute *attr,
387                                 char *buf)
388 {
389         ssize_t count;
390
391         mutex_lock(&smp_cpu_state_mutex);
392         count = sprintf(buf, "%d\n", cpu_management);
393         mutex_unlock(&smp_cpu_state_mutex);
394         return count;
395 }
396
397 static ssize_t dispatching_store(struct device *dev,
398                                  struct device_attribute *attr,
399                                  const char *buf,
400                                  size_t count)
401 {
402         int val, rc;
403         char delim;
404
405         if (sscanf(buf, "%d %c", &val, &delim) != 1)
406                 return -EINVAL;
407         if (val != 0 && val != 1)
408                 return -EINVAL;
409         rc = 0;
410         cpus_read_lock();
411         mutex_lock(&smp_cpu_state_mutex);
412         if (cpu_management == val)
413                 goto out;
414         rc = topology_set_cpu_management(val);
415         if (rc)
416                 goto out;
417         cpu_management = val;
418         topology_expect_change();
419 out:
420         mutex_unlock(&smp_cpu_state_mutex);
421         cpus_read_unlock();
422         return rc ? rc : count;
423 }
424 static DEVICE_ATTR_RW(dispatching);
425
426 static ssize_t cpu_polarization_show(struct device *dev,
427                                      struct device_attribute *attr, char *buf)
428 {
429         int cpu = dev->id;
430         ssize_t count;
431
432         mutex_lock(&smp_cpu_state_mutex);
433         switch (smp_cpu_get_polarization(cpu)) {
434         case POLARIZATION_HRZ:
435                 count = sprintf(buf, "horizontal\n");
436                 break;
437         case POLARIZATION_VL:
438                 count = sprintf(buf, "vertical:low\n");
439                 break;
440         case POLARIZATION_VM:
441                 count = sprintf(buf, "vertical:medium\n");
442                 break;
443         case POLARIZATION_VH:
444                 count = sprintf(buf, "vertical:high\n");
445                 break;
446         default:
447                 count = sprintf(buf, "unknown\n");
448                 break;
449         }
450         mutex_unlock(&smp_cpu_state_mutex);
451         return count;
452 }
453 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
454
455 static struct attribute *topology_cpu_attrs[] = {
456         &dev_attr_polarization.attr,
457         NULL,
458 };
459
460 static struct attribute_group topology_cpu_attr_group = {
461         .attrs = topology_cpu_attrs,
462 };
463
464 static ssize_t cpu_dedicated_show(struct device *dev,
465                                   struct device_attribute *attr, char *buf)
466 {
467         int cpu = dev->id;
468         ssize_t count;
469
470         mutex_lock(&smp_cpu_state_mutex);
471         count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu));
472         mutex_unlock(&smp_cpu_state_mutex);
473         return count;
474 }
475 static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL);
476
477 static struct attribute *topology_extra_cpu_attrs[] = {
478         &dev_attr_dedicated.attr,
479         NULL,
480 };
481
482 static struct attribute_group topology_extra_cpu_attr_group = {
483         .attrs = topology_extra_cpu_attrs,
484 };
485
486 int topology_cpu_init(struct cpu *cpu)
487 {
488         int rc;
489
490         rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
491         if (rc || !MACHINE_HAS_TOPOLOGY)
492                 return rc;
493         rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group);
494         if (rc)
495                 sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group);
496         return rc;
497 }
498
499 static const struct cpumask *cpu_thread_mask(int cpu)
500 {
501         return &cpu_topology[cpu].thread_mask;
502 }
503
504
505 const struct cpumask *cpu_coregroup_mask(int cpu)
506 {
507         return &cpu_topology[cpu].core_mask;
508 }
509
510 static const struct cpumask *cpu_book_mask(int cpu)
511 {
512         return &cpu_topology[cpu].book_mask;
513 }
514
515 static const struct cpumask *cpu_drawer_mask(int cpu)
516 {
517         return &cpu_topology[cpu].drawer_mask;
518 }
519
520 static struct sched_domain_topology_level s390_topology[] = {
521         { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
522         { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
523         { cpu_book_mask, SD_INIT_NAME(BOOK) },
524         { cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
525         { cpu_cpu_mask, SD_INIT_NAME(PKG) },
526         { NULL, },
527 };
528
529 static void __init alloc_masks(struct sysinfo_15_1_x *info,
530                                struct mask_info *mask, int offset)
531 {
532         int i, nr_masks;
533
534         nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
535         for (i = 0; i < info->mnest - offset; i++)
536                 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
537         nr_masks = max(nr_masks, 1);
538         for (i = 0; i < nr_masks; i++) {
539                 mask->next = memblock_alloc(sizeof(*mask->next), 8);
540                 if (!mask->next)
541                         panic("%s: Failed to allocate %zu bytes align=0x%x\n",
542                               __func__, sizeof(*mask->next), 8);
543                 mask = mask->next;
544         }
545 }
546
547 void __init topology_init_early(void)
548 {
549         struct sysinfo_15_1_x *info;
550
551         set_sched_topology(s390_topology);
552         if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
553                 if (MACHINE_HAS_TOPOLOGY)
554                         topology_mode = TOPOLOGY_MODE_HW;
555                 else
556                         topology_mode = TOPOLOGY_MODE_SINGLE;
557         }
558         if (!MACHINE_HAS_TOPOLOGY)
559                 goto out;
560         tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
561         if (!tl_info)
562                 panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
563                       __func__, PAGE_SIZE, PAGE_SIZE);
564         info = tl_info;
565         store_topology(info);
566         pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
567                 info->mag[0], info->mag[1], info->mag[2], info->mag[3],
568                 info->mag[4], info->mag[5], info->mnest);
569         alloc_masks(info, &socket_info, 1);
570         alloc_masks(info, &book_info, 2);
571         alloc_masks(info, &drawer_info, 3);
572 out:
573         cpumask_set_cpu(0, &cpu_setup_mask);
574         __arch_update_cpu_topology();
575         __arch_update_dedicated_flag(NULL);
576 }
577
578 static inline int topology_get_mode(int enabled)
579 {
580         if (!enabled)
581                 return TOPOLOGY_MODE_SINGLE;
582         return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
583 }
584
585 static inline int topology_is_enabled(void)
586 {
587         return topology_mode != TOPOLOGY_MODE_SINGLE;
588 }
589
590 static int __init topology_setup(char *str)
591 {
592         bool enabled;
593         int rc;
594
595         rc = kstrtobool(str, &enabled);
596         if (rc)
597                 return rc;
598         topology_mode = topology_get_mode(enabled);
599         return 0;
600 }
601 early_param("topology", topology_setup);
602
603 static int topology_ctl_handler(struct ctl_table *ctl, int write,
604                                 void *buffer, size_t *lenp, loff_t *ppos)
605 {
606         int enabled = topology_is_enabled();
607         int new_mode;
608         int rc;
609         struct ctl_table ctl_entry = {
610                 .procname       = ctl->procname,
611                 .data           = &enabled,
612                 .maxlen         = sizeof(int),
613                 .extra1         = SYSCTL_ZERO,
614                 .extra2         = SYSCTL_ONE,
615         };
616
617         rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
618         if (rc < 0 || !write)
619                 return rc;
620
621         mutex_lock(&smp_cpu_state_mutex);
622         new_mode = topology_get_mode(enabled);
623         if (topology_mode != new_mode) {
624                 topology_mode = new_mode;
625                 topology_schedule_update();
626         }
627         mutex_unlock(&smp_cpu_state_mutex);
628         topology_flush_work();
629
630         return rc;
631 }
632
633 static struct ctl_table topology_ctl_table[] = {
634         {
635                 .procname       = "topology",
636                 .mode           = 0644,
637                 .proc_handler   = topology_ctl_handler,
638         },
639 };
640
641 static int __init topology_init(void)
642 {
643         struct device *dev_root;
644         int rc = 0;
645
646         timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE);
647         if (MACHINE_HAS_TOPOLOGY)
648                 set_topology_timer();
649         else
650                 topology_update_polarization_simple();
651         register_sysctl("s390", topology_ctl_table);
652
653         dev_root = bus_get_dev_root(&cpu_subsys);
654         if (dev_root) {
655                 rc = device_create_file(dev_root, &dev_attr_dispatching);
656                 put_device(dev_root);
657         }
658         return rc;
659 }
660 device_initcall(topology_init);