Merge tag 'regulator-fix-v5.13-rc4' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / arch / x86 / events / intel / uncore.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/module.h>
3
4 #include <asm/cpu_device_id.h>
5 #include <asm/intel-family.h>
6 #include "uncore.h"
7 #include "uncore_discovery.h"
8
9 static bool uncore_no_discover;
10 module_param(uncore_no_discover, bool, 0);
11 MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism "
12                                      "(default: enable the discovery mechanism).");
13 struct intel_uncore_type *empty_uncore[] = { NULL, };
14 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
15 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
16 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
17
18 static bool pcidrv_registered;
19 struct pci_driver *uncore_pci_driver;
20 /* The PCI driver for the device which the uncore doesn't own. */
21 struct pci_driver *uncore_pci_sub_driver;
22 /* pci bus to socket mapping */
23 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
24 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
25 struct pci_extra_dev *uncore_extra_pci_dev;
26 int __uncore_max_dies;
27
28 /* mask of cpus that collect uncore events */
29 static cpumask_t uncore_cpu_mask;
30
31 /* constraint for the fixed counter */
32 static struct event_constraint uncore_constraint_fixed =
33         EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
34 struct event_constraint uncore_constraint_empty =
35         EVENT_CONSTRAINT(0, 0, 0);
36
37 MODULE_LICENSE("GPL");
38
39 int uncore_pcibus_to_dieid(struct pci_bus *bus)
40 {
41         struct pci2phy_map *map;
42         int die_id = -1;
43
44         raw_spin_lock(&pci2phy_map_lock);
45         list_for_each_entry(map, &pci2phy_map_head, list) {
46                 if (map->segment == pci_domain_nr(bus)) {
47                         die_id = map->pbus_to_dieid[bus->number];
48                         break;
49                 }
50         }
51         raw_spin_unlock(&pci2phy_map_lock);
52
53         return die_id;
54 }
55
56 int uncore_die_to_segment(int die)
57 {
58         struct pci_bus *bus = NULL;
59
60         /* Find first pci bus which attributes to specified die. */
61         while ((bus = pci_find_next_bus(bus)) &&
62                (die != uncore_pcibus_to_dieid(bus)))
63                 ;
64
65         return bus ? pci_domain_nr(bus) : -EINVAL;
66 }
67
68 static void uncore_free_pcibus_map(void)
69 {
70         struct pci2phy_map *map, *tmp;
71
72         list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
73                 list_del(&map->list);
74                 kfree(map);
75         }
76 }
77
78 struct pci2phy_map *__find_pci2phy_map(int segment)
79 {
80         struct pci2phy_map *map, *alloc = NULL;
81         int i;
82
83         lockdep_assert_held(&pci2phy_map_lock);
84
85 lookup:
86         list_for_each_entry(map, &pci2phy_map_head, list) {
87                 if (map->segment == segment)
88                         goto end;
89         }
90
91         if (!alloc) {
92                 raw_spin_unlock(&pci2phy_map_lock);
93                 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
94                 raw_spin_lock(&pci2phy_map_lock);
95
96                 if (!alloc)
97                         return NULL;
98
99                 goto lookup;
100         }
101
102         map = alloc;
103         alloc = NULL;
104         map->segment = segment;
105         for (i = 0; i < 256; i++)
106                 map->pbus_to_dieid[i] = -1;
107         list_add_tail(&map->list, &pci2phy_map_head);
108
109 end:
110         kfree(alloc);
111         return map;
112 }
113
114 ssize_t uncore_event_show(struct device *dev,
115                           struct device_attribute *attr, char *buf)
116 {
117         struct uncore_event_desc *event =
118                 container_of(attr, struct uncore_event_desc, attr);
119         return sprintf(buf, "%s", event->config);
120 }
121
122 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
123 {
124         unsigned int dieid = topology_logical_die_id(cpu);
125
126         /*
127          * The unsigned check also catches the '-1' return value for non
128          * existent mappings in the topology map.
129          */
130         return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
131 }
132
133 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
134 {
135         u64 count;
136
137         rdmsrl(event->hw.event_base, count);
138
139         return count;
140 }
141
142 void uncore_mmio_exit_box(struct intel_uncore_box *box)
143 {
144         if (box->io_addr)
145                 iounmap(box->io_addr);
146 }
147
148 u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
149                              struct perf_event *event)
150 {
151         if (!box->io_addr)
152                 return 0;
153
154         if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
155                 return 0;
156
157         return readq(box->io_addr + event->hw.event_base);
158 }
159
160 /*
161  * generic get constraint function for shared match/mask registers.
162  */
163 struct event_constraint *
164 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
165 {
166         struct intel_uncore_extra_reg *er;
167         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
168         struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
169         unsigned long flags;
170         bool ok = false;
171
172         /*
173          * reg->alloc can be set due to existing state, so for fake box we
174          * need to ignore this, otherwise we might fail to allocate proper
175          * fake state for this extra reg constraint.
176          */
177         if (reg1->idx == EXTRA_REG_NONE ||
178             (!uncore_box_is_fake(box) && reg1->alloc))
179                 return NULL;
180
181         er = &box->shared_regs[reg1->idx];
182         raw_spin_lock_irqsave(&er->lock, flags);
183         if (!atomic_read(&er->ref) ||
184             (er->config1 == reg1->config && er->config2 == reg2->config)) {
185                 atomic_inc(&er->ref);
186                 er->config1 = reg1->config;
187                 er->config2 = reg2->config;
188                 ok = true;
189         }
190         raw_spin_unlock_irqrestore(&er->lock, flags);
191
192         if (ok) {
193                 if (!uncore_box_is_fake(box))
194                         reg1->alloc = 1;
195                 return NULL;
196         }
197
198         return &uncore_constraint_empty;
199 }
200
201 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
202 {
203         struct intel_uncore_extra_reg *er;
204         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
205
206         /*
207          * Only put constraint if extra reg was actually allocated. Also
208          * takes care of event which do not use an extra shared reg.
209          *
210          * Also, if this is a fake box we shouldn't touch any event state
211          * (reg->alloc) and we don't care about leaving inconsistent box
212          * state either since it will be thrown out.
213          */
214         if (uncore_box_is_fake(box) || !reg1->alloc)
215                 return;
216
217         er = &box->shared_regs[reg1->idx];
218         atomic_dec(&er->ref);
219         reg1->alloc = 0;
220 }
221
222 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
223 {
224         struct intel_uncore_extra_reg *er;
225         unsigned long flags;
226         u64 config;
227
228         er = &box->shared_regs[idx];
229
230         raw_spin_lock_irqsave(&er->lock, flags);
231         config = er->config;
232         raw_spin_unlock_irqrestore(&er->lock, flags);
233
234         return config;
235 }
236
237 static void uncore_assign_hw_event(struct intel_uncore_box *box,
238                                    struct perf_event *event, int idx)
239 {
240         struct hw_perf_event *hwc = &event->hw;
241
242         hwc->idx = idx;
243         hwc->last_tag = ++box->tags[idx];
244
245         if (uncore_pmc_fixed(hwc->idx)) {
246                 hwc->event_base = uncore_fixed_ctr(box);
247                 hwc->config_base = uncore_fixed_ctl(box);
248                 return;
249         }
250
251         hwc->config_base = uncore_event_ctl(box, hwc->idx);
252         hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
253 }
254
255 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
256 {
257         u64 prev_count, new_count, delta;
258         int shift;
259
260         if (uncore_pmc_freerunning(event->hw.idx))
261                 shift = 64 - uncore_freerunning_bits(box, event);
262         else if (uncore_pmc_fixed(event->hw.idx))
263                 shift = 64 - uncore_fixed_ctr_bits(box);
264         else
265                 shift = 64 - uncore_perf_ctr_bits(box);
266
267         /* the hrtimer might modify the previous event value */
268 again:
269         prev_count = local64_read(&event->hw.prev_count);
270         new_count = uncore_read_counter(box, event);
271         if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
272                 goto again;
273
274         delta = (new_count << shift) - (prev_count << shift);
275         delta >>= shift;
276
277         local64_add(delta, &event->count);
278 }
279
280 /*
281  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
282  * for SandyBridge. So we use hrtimer to periodically poll the counter
283  * to avoid overflow.
284  */
285 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
286 {
287         struct intel_uncore_box *box;
288         struct perf_event *event;
289         unsigned long flags;
290         int bit;
291
292         box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
293         if (!box->n_active || box->cpu != smp_processor_id())
294                 return HRTIMER_NORESTART;
295         /*
296          * disable local interrupt to prevent uncore_pmu_event_start/stop
297          * to interrupt the update process
298          */
299         local_irq_save(flags);
300
301         /*
302          * handle boxes with an active event list as opposed to active
303          * counters
304          */
305         list_for_each_entry(event, &box->active_list, active_entry) {
306                 uncore_perf_event_update(box, event);
307         }
308
309         for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
310                 uncore_perf_event_update(box, box->events[bit]);
311
312         local_irq_restore(flags);
313
314         hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
315         return HRTIMER_RESTART;
316 }
317
318 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
319 {
320         hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
321                       HRTIMER_MODE_REL_PINNED);
322 }
323
324 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
325 {
326         hrtimer_cancel(&box->hrtimer);
327 }
328
329 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
330 {
331         hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
332         box->hrtimer.function = uncore_pmu_hrtimer;
333 }
334
335 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
336                                                  int node)
337 {
338         int i, size, numshared = type->num_shared_regs ;
339         struct intel_uncore_box *box;
340
341         size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
342
343         box = kzalloc_node(size, GFP_KERNEL, node);
344         if (!box)
345                 return NULL;
346
347         for (i = 0; i < numshared; i++)
348                 raw_spin_lock_init(&box->shared_regs[i].lock);
349
350         uncore_pmu_init_hrtimer(box);
351         box->cpu = -1;
352         box->dieid = -1;
353
354         /* set default hrtimer timeout */
355         box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
356
357         INIT_LIST_HEAD(&box->active_list);
358
359         return box;
360 }
361
362 /*
363  * Using uncore_pmu_event_init pmu event_init callback
364  * as a detection point for uncore events.
365  */
366 static int uncore_pmu_event_init(struct perf_event *event);
367
368 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
369 {
370         return &box->pmu->pmu == event->pmu;
371 }
372
373 static int
374 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
375                       bool dogrp)
376 {
377         struct perf_event *event;
378         int n, max_count;
379
380         max_count = box->pmu->type->num_counters;
381         if (box->pmu->type->fixed_ctl)
382                 max_count++;
383
384         if (box->n_events >= max_count)
385                 return -EINVAL;
386
387         n = box->n_events;
388
389         if (is_box_event(box, leader)) {
390                 box->event_list[n] = leader;
391                 n++;
392         }
393
394         if (!dogrp)
395                 return n;
396
397         for_each_sibling_event(event, leader) {
398                 if (!is_box_event(box, event) ||
399                     event->state <= PERF_EVENT_STATE_OFF)
400                         continue;
401
402                 if (n >= max_count)
403                         return -EINVAL;
404
405                 box->event_list[n] = event;
406                 n++;
407         }
408         return n;
409 }
410
411 static struct event_constraint *
412 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
413 {
414         struct intel_uncore_type *type = box->pmu->type;
415         struct event_constraint *c;
416
417         if (type->ops->get_constraint) {
418                 c = type->ops->get_constraint(box, event);
419                 if (c)
420                         return c;
421         }
422
423         if (event->attr.config == UNCORE_FIXED_EVENT)
424                 return &uncore_constraint_fixed;
425
426         if (type->constraints) {
427                 for_each_event_constraint(c, type->constraints) {
428                         if ((event->hw.config & c->cmask) == c->code)
429                                 return c;
430                 }
431         }
432
433         return &type->unconstrainted;
434 }
435
436 static void uncore_put_event_constraint(struct intel_uncore_box *box,
437                                         struct perf_event *event)
438 {
439         if (box->pmu->type->ops->put_constraint)
440                 box->pmu->type->ops->put_constraint(box, event);
441 }
442
443 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
444 {
445         unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
446         struct event_constraint *c;
447         int i, wmin, wmax, ret = 0;
448         struct hw_perf_event *hwc;
449
450         bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
451
452         for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
453                 c = uncore_get_event_constraint(box, box->event_list[i]);
454                 box->event_constraint[i] = c;
455                 wmin = min(wmin, c->weight);
456                 wmax = max(wmax, c->weight);
457         }
458
459         /* fastpath, try to reuse previous register */
460         for (i = 0; i < n; i++) {
461                 hwc = &box->event_list[i]->hw;
462                 c = box->event_constraint[i];
463
464                 /* never assigned */
465                 if (hwc->idx == -1)
466                         break;
467
468                 /* constraint still honored */
469                 if (!test_bit(hwc->idx, c->idxmsk))
470                         break;
471
472                 /* not already used */
473                 if (test_bit(hwc->idx, used_mask))
474                         break;
475
476                 __set_bit(hwc->idx, used_mask);
477                 if (assign)
478                         assign[i] = hwc->idx;
479         }
480         /* slow path */
481         if (i != n)
482                 ret = perf_assign_events(box->event_constraint, n,
483                                          wmin, wmax, n, assign);
484
485         if (!assign || ret) {
486                 for (i = 0; i < n; i++)
487                         uncore_put_event_constraint(box, box->event_list[i]);
488         }
489         return ret ? -EINVAL : 0;
490 }
491
492 void uncore_pmu_event_start(struct perf_event *event, int flags)
493 {
494         struct intel_uncore_box *box = uncore_event_to_box(event);
495         int idx = event->hw.idx;
496
497         if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
498                 return;
499
500         /*
501          * Free running counter is read-only and always active.
502          * Use the current counter value as start point.
503          * There is no overflow interrupt for free running counter.
504          * Use hrtimer to periodically poll the counter to avoid overflow.
505          */
506         if (uncore_pmc_freerunning(event->hw.idx)) {
507                 list_add_tail(&event->active_entry, &box->active_list);
508                 local64_set(&event->hw.prev_count,
509                             uncore_read_counter(box, event));
510                 if (box->n_active++ == 0)
511                         uncore_pmu_start_hrtimer(box);
512                 return;
513         }
514
515         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
516                 return;
517
518         event->hw.state = 0;
519         box->events[idx] = event;
520         box->n_active++;
521         __set_bit(idx, box->active_mask);
522
523         local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
524         uncore_enable_event(box, event);
525
526         if (box->n_active == 1)
527                 uncore_pmu_start_hrtimer(box);
528 }
529
530 void uncore_pmu_event_stop(struct perf_event *event, int flags)
531 {
532         struct intel_uncore_box *box = uncore_event_to_box(event);
533         struct hw_perf_event *hwc = &event->hw;
534
535         /* Cannot disable free running counter which is read-only */
536         if (uncore_pmc_freerunning(hwc->idx)) {
537                 list_del(&event->active_entry);
538                 if (--box->n_active == 0)
539                         uncore_pmu_cancel_hrtimer(box);
540                 uncore_perf_event_update(box, event);
541                 return;
542         }
543
544         if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
545                 uncore_disable_event(box, event);
546                 box->n_active--;
547                 box->events[hwc->idx] = NULL;
548                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
549                 hwc->state |= PERF_HES_STOPPED;
550
551                 if (box->n_active == 0)
552                         uncore_pmu_cancel_hrtimer(box);
553         }
554
555         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
556                 /*
557                  * Drain the remaining delta count out of a event
558                  * that we are disabling:
559                  */
560                 uncore_perf_event_update(box, event);
561                 hwc->state |= PERF_HES_UPTODATE;
562         }
563 }
564
565 int uncore_pmu_event_add(struct perf_event *event, int flags)
566 {
567         struct intel_uncore_box *box = uncore_event_to_box(event);
568         struct hw_perf_event *hwc = &event->hw;
569         int assign[UNCORE_PMC_IDX_MAX];
570         int i, n, ret;
571
572         if (!box)
573                 return -ENODEV;
574
575         /*
576          * The free funning counter is assigned in event_init().
577          * The free running counter event and free running counter
578          * are 1:1 mapped. It doesn't need to be tracked in event_list.
579          */
580         if (uncore_pmc_freerunning(hwc->idx)) {
581                 if (flags & PERF_EF_START)
582                         uncore_pmu_event_start(event, 0);
583                 return 0;
584         }
585
586         ret = n = uncore_collect_events(box, event, false);
587         if (ret < 0)
588                 return ret;
589
590         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
591         if (!(flags & PERF_EF_START))
592                 hwc->state |= PERF_HES_ARCH;
593
594         ret = uncore_assign_events(box, assign, n);
595         if (ret)
596                 return ret;
597
598         /* save events moving to new counters */
599         for (i = 0; i < box->n_events; i++) {
600                 event = box->event_list[i];
601                 hwc = &event->hw;
602
603                 if (hwc->idx == assign[i] &&
604                         hwc->last_tag == box->tags[assign[i]])
605                         continue;
606                 /*
607                  * Ensure we don't accidentally enable a stopped
608                  * counter simply because we rescheduled.
609                  */
610                 if (hwc->state & PERF_HES_STOPPED)
611                         hwc->state |= PERF_HES_ARCH;
612
613                 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
614         }
615
616         /* reprogram moved events into new counters */
617         for (i = 0; i < n; i++) {
618                 event = box->event_list[i];
619                 hwc = &event->hw;
620
621                 if (hwc->idx != assign[i] ||
622                         hwc->last_tag != box->tags[assign[i]])
623                         uncore_assign_hw_event(box, event, assign[i]);
624                 else if (i < box->n_events)
625                         continue;
626
627                 if (hwc->state & PERF_HES_ARCH)
628                         continue;
629
630                 uncore_pmu_event_start(event, 0);
631         }
632         box->n_events = n;
633
634         return 0;
635 }
636
637 void uncore_pmu_event_del(struct perf_event *event, int flags)
638 {
639         struct intel_uncore_box *box = uncore_event_to_box(event);
640         int i;
641
642         uncore_pmu_event_stop(event, PERF_EF_UPDATE);
643
644         /*
645          * The event for free running counter is not tracked by event_list.
646          * It doesn't need to force event->hw.idx = -1 to reassign the counter.
647          * Because the event and the free running counter are 1:1 mapped.
648          */
649         if (uncore_pmc_freerunning(event->hw.idx))
650                 return;
651
652         for (i = 0; i < box->n_events; i++) {
653                 if (event == box->event_list[i]) {
654                         uncore_put_event_constraint(box, event);
655
656                         for (++i; i < box->n_events; i++)
657                                 box->event_list[i - 1] = box->event_list[i];
658
659                         --box->n_events;
660                         break;
661                 }
662         }
663
664         event->hw.idx = -1;
665         event->hw.last_tag = ~0ULL;
666 }
667
668 void uncore_pmu_event_read(struct perf_event *event)
669 {
670         struct intel_uncore_box *box = uncore_event_to_box(event);
671         uncore_perf_event_update(box, event);
672 }
673
674 /*
675  * validation ensures the group can be loaded onto the
676  * PMU if it was the only group available.
677  */
678 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
679                                 struct perf_event *event)
680 {
681         struct perf_event *leader = event->group_leader;
682         struct intel_uncore_box *fake_box;
683         int ret = -EINVAL, n;
684
685         /* The free running counter is always active. */
686         if (uncore_pmc_freerunning(event->hw.idx))
687                 return 0;
688
689         fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
690         if (!fake_box)
691                 return -ENOMEM;
692
693         fake_box->pmu = pmu;
694         /*
695          * the event is not yet connected with its
696          * siblings therefore we must first collect
697          * existing siblings, then add the new event
698          * before we can simulate the scheduling
699          */
700         n = uncore_collect_events(fake_box, leader, true);
701         if (n < 0)
702                 goto out;
703
704         fake_box->n_events = n;
705         n = uncore_collect_events(fake_box, event, false);
706         if (n < 0)
707                 goto out;
708
709         fake_box->n_events = n;
710
711         ret = uncore_assign_events(fake_box, NULL, n);
712 out:
713         kfree(fake_box);
714         return ret;
715 }
716
717 static int uncore_pmu_event_init(struct perf_event *event)
718 {
719         struct intel_uncore_pmu *pmu;
720         struct intel_uncore_box *box;
721         struct hw_perf_event *hwc = &event->hw;
722         int ret;
723
724         if (event->attr.type != event->pmu->type)
725                 return -ENOENT;
726
727         pmu = uncore_event_to_pmu(event);
728         /* no device found for this pmu */
729         if (pmu->func_id < 0)
730                 return -ENOENT;
731
732         /* Sampling not supported yet */
733         if (hwc->sample_period)
734                 return -EINVAL;
735
736         /*
737          * Place all uncore events for a particular physical package
738          * onto a single cpu
739          */
740         if (event->cpu < 0)
741                 return -EINVAL;
742         box = uncore_pmu_to_box(pmu, event->cpu);
743         if (!box || box->cpu < 0)
744                 return -EINVAL;
745         event->cpu = box->cpu;
746         event->pmu_private = box;
747
748         event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
749
750         event->hw.idx = -1;
751         event->hw.last_tag = ~0ULL;
752         event->hw.extra_reg.idx = EXTRA_REG_NONE;
753         event->hw.branch_reg.idx = EXTRA_REG_NONE;
754
755         if (event->attr.config == UNCORE_FIXED_EVENT) {
756                 /* no fixed counter */
757                 if (!pmu->type->fixed_ctl)
758                         return -EINVAL;
759                 /*
760                  * if there is only one fixed counter, only the first pmu
761                  * can access the fixed counter
762                  */
763                 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
764                         return -EINVAL;
765
766                 /* fixed counters have event field hardcoded to zero */
767                 hwc->config = 0ULL;
768         } else if (is_freerunning_event(event)) {
769                 hwc->config = event->attr.config;
770                 if (!check_valid_freerunning_event(box, event))
771                         return -EINVAL;
772                 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
773                 /*
774                  * The free running counter event and free running counter
775                  * are always 1:1 mapped.
776                  * The free running counter is always active.
777                  * Assign the free running counter here.
778                  */
779                 event->hw.event_base = uncore_freerunning_counter(box, event);
780         } else {
781                 hwc->config = event->attr.config &
782                               (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
783                 if (pmu->type->ops->hw_config) {
784                         ret = pmu->type->ops->hw_config(box, event);
785                         if (ret)
786                                 return ret;
787                 }
788         }
789
790         if (event->group_leader != event)
791                 ret = uncore_validate_group(pmu, event);
792         else
793                 ret = 0;
794
795         return ret;
796 }
797
798 static void uncore_pmu_enable(struct pmu *pmu)
799 {
800         struct intel_uncore_pmu *uncore_pmu;
801         struct intel_uncore_box *box;
802
803         uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
804         if (!uncore_pmu)
805                 return;
806
807         box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
808         if (!box)
809                 return;
810
811         if (uncore_pmu->type->ops->enable_box)
812                 uncore_pmu->type->ops->enable_box(box);
813 }
814
815 static void uncore_pmu_disable(struct pmu *pmu)
816 {
817         struct intel_uncore_pmu *uncore_pmu;
818         struct intel_uncore_box *box;
819
820         uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
821         if (!uncore_pmu)
822                 return;
823
824         box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
825         if (!box)
826                 return;
827
828         if (uncore_pmu->type->ops->disable_box)
829                 uncore_pmu->type->ops->disable_box(box);
830 }
831
832 static ssize_t uncore_get_attr_cpumask(struct device *dev,
833                                 struct device_attribute *attr, char *buf)
834 {
835         return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
836 }
837
838 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
839
840 static struct attribute *uncore_pmu_attrs[] = {
841         &dev_attr_cpumask.attr,
842         NULL,
843 };
844
845 static const struct attribute_group uncore_pmu_attr_group = {
846         .attrs = uncore_pmu_attrs,
847 };
848
849 static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
850 {
851         struct intel_uncore_type *type = pmu->type;
852
853         /*
854          * No uncore block name in discovery table.
855          * Use uncore_type_&typeid_&boxid as name.
856          */
857         if (!type->name) {
858                 if (type->num_boxes == 1)
859                         sprintf(pmu->name, "uncore_type_%u", type->type_id);
860                 else {
861                         sprintf(pmu->name, "uncore_type_%u_%d",
862                                 type->type_id, type->box_ids[pmu->pmu_idx]);
863                 }
864                 return;
865         }
866
867         if (type->num_boxes == 1) {
868                 if (strlen(type->name) > 0)
869                         sprintf(pmu->name, "uncore_%s", type->name);
870                 else
871                         sprintf(pmu->name, "uncore");
872         } else
873                 sprintf(pmu->name, "uncore_%s_%d", type->name, pmu->pmu_idx);
874
875 }
876
877 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
878 {
879         int ret;
880
881         if (!pmu->type->pmu) {
882                 pmu->pmu = (struct pmu) {
883                         .attr_groups    = pmu->type->attr_groups,
884                         .task_ctx_nr    = perf_invalid_context,
885                         .pmu_enable     = uncore_pmu_enable,
886                         .pmu_disable    = uncore_pmu_disable,
887                         .event_init     = uncore_pmu_event_init,
888                         .add            = uncore_pmu_event_add,
889                         .del            = uncore_pmu_event_del,
890                         .start          = uncore_pmu_event_start,
891                         .stop           = uncore_pmu_event_stop,
892                         .read           = uncore_pmu_event_read,
893                         .module         = THIS_MODULE,
894                         .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
895                         .attr_update    = pmu->type->attr_update,
896                 };
897         } else {
898                 pmu->pmu = *pmu->type->pmu;
899                 pmu->pmu.attr_groups = pmu->type->attr_groups;
900                 pmu->pmu.attr_update = pmu->type->attr_update;
901         }
902
903         uncore_get_pmu_name(pmu);
904
905         ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
906         if (!ret)
907                 pmu->registered = true;
908         return ret;
909 }
910
911 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
912 {
913         if (!pmu->registered)
914                 return;
915         perf_pmu_unregister(&pmu->pmu);
916         pmu->registered = false;
917 }
918
919 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
920 {
921         int die;
922
923         for (die = 0; die < uncore_max_dies(); die++)
924                 kfree(pmu->boxes[die]);
925         kfree(pmu->boxes);
926 }
927
928 static void uncore_type_exit(struct intel_uncore_type *type)
929 {
930         struct intel_uncore_pmu *pmu = type->pmus;
931         int i;
932
933         if (type->cleanup_mapping)
934                 type->cleanup_mapping(type);
935
936         if (pmu) {
937                 for (i = 0; i < type->num_boxes; i++, pmu++) {
938                         uncore_pmu_unregister(pmu);
939                         uncore_free_boxes(pmu);
940                 }
941                 kfree(type->pmus);
942                 type->pmus = NULL;
943         }
944         if (type->box_ids) {
945                 kfree(type->box_ids);
946                 type->box_ids = NULL;
947         }
948         kfree(type->events_group);
949         type->events_group = NULL;
950 }
951
952 static void uncore_types_exit(struct intel_uncore_type **types)
953 {
954         for (; *types; types++)
955                 uncore_type_exit(*types);
956 }
957
958 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
959 {
960         struct intel_uncore_pmu *pmus;
961         size_t size;
962         int i, j;
963
964         pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
965         if (!pmus)
966                 return -ENOMEM;
967
968         size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
969
970         for (i = 0; i < type->num_boxes; i++) {
971                 pmus[i].func_id = setid ? i : -1;
972                 pmus[i].pmu_idx = i;
973                 pmus[i].type    = type;
974                 pmus[i].boxes   = kzalloc(size, GFP_KERNEL);
975                 if (!pmus[i].boxes)
976                         goto err;
977         }
978
979         type->pmus = pmus;
980         type->unconstrainted = (struct event_constraint)
981                 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
982                                 0, type->num_counters, 0, 0);
983
984         if (type->event_descs) {
985                 struct {
986                         struct attribute_group group;
987                         struct attribute *attrs[];
988                 } *attr_group;
989                 for (i = 0; type->event_descs[i].attr.attr.name; i++);
990
991                 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
992                                                                 GFP_KERNEL);
993                 if (!attr_group)
994                         goto err;
995
996                 attr_group->group.name = "events";
997                 attr_group->group.attrs = attr_group->attrs;
998
999                 for (j = 0; j < i; j++)
1000                         attr_group->attrs[j] = &type->event_descs[j].attr.attr;
1001
1002                 type->events_group = &attr_group->group;
1003         }
1004
1005         type->pmu_group = &uncore_pmu_attr_group;
1006
1007         if (type->set_mapping)
1008                 type->set_mapping(type);
1009
1010         return 0;
1011
1012 err:
1013         for (i = 0; i < type->num_boxes; i++)
1014                 kfree(pmus[i].boxes);
1015         kfree(pmus);
1016
1017         return -ENOMEM;
1018 }
1019
1020 static int __init
1021 uncore_types_init(struct intel_uncore_type **types, bool setid)
1022 {
1023         int ret;
1024
1025         for (; *types; types++) {
1026                 ret = uncore_type_init(*types, setid);
1027                 if (ret)
1028                         return ret;
1029         }
1030         return 0;
1031 }
1032
1033 /*
1034  * Get the die information of a PCI device.
1035  * @pdev: The PCI device.
1036  * @die: The die id which the device maps to.
1037  */
1038 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
1039 {
1040         *die = uncore_pcibus_to_dieid(pdev->bus);
1041         if (*die < 0)
1042                 return -EINVAL;
1043
1044         return 0;
1045 }
1046
1047 static struct intel_uncore_pmu *
1048 uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
1049 {
1050         struct intel_uncore_type **types = uncore_pci_uncores;
1051         struct intel_uncore_type *type;
1052         u64 box_ctl;
1053         int i, die;
1054
1055         for (; *types; types++) {
1056                 type = *types;
1057                 for (die = 0; die < __uncore_max_dies; die++) {
1058                         for (i = 0; i < type->num_boxes; i++) {
1059                                 if (!type->box_ctls[die])
1060                                         continue;
1061                                 box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1062                                 if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) &&
1063                                     pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) &&
1064                                     pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
1065                                         return &type->pmus[i];
1066                         }
1067                 }
1068         }
1069
1070         return NULL;
1071 }
1072
1073 /*
1074  * Find the PMU of a PCI device.
1075  * @pdev: The PCI device.
1076  * @ids: The ID table of the available PCI devices with a PMU.
1077  *       If NULL, search the whole uncore_pci_uncores.
1078  */
1079 static struct intel_uncore_pmu *
1080 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1081 {
1082         struct intel_uncore_pmu *pmu = NULL;
1083         struct intel_uncore_type *type;
1084         kernel_ulong_t data;
1085         unsigned int devfn;
1086
1087         if (!ids)
1088                 return uncore_pci_find_dev_pmu_from_types(pdev);
1089
1090         while (ids && ids->vendor) {
1091                 if ((ids->vendor == pdev->vendor) &&
1092                     (ids->device == pdev->device)) {
1093                         data = ids->driver_data;
1094                         devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1095                                           UNCORE_PCI_DEV_FUNC(data));
1096                         if (devfn == pdev->devfn) {
1097                                 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1098                                 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1099                                 break;
1100                         }
1101                 }
1102                 ids++;
1103         }
1104         return pmu;
1105 }
1106
1107 /*
1108  * Register the PMU for a PCI device
1109  * @pdev: The PCI device.
1110  * @type: The corresponding PMU type of the device.
1111  * @pmu: The corresponding PMU of the device.
1112  * @die: The die id which the device maps to.
1113  */
1114 static int uncore_pci_pmu_register(struct pci_dev *pdev,
1115                                    struct intel_uncore_type *type,
1116                                    struct intel_uncore_pmu *pmu,
1117                                    int die)
1118 {
1119         struct intel_uncore_box *box;
1120         int ret;
1121
1122         if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1123                 return -EINVAL;
1124
1125         box = uncore_alloc_box(type, NUMA_NO_NODE);
1126         if (!box)
1127                 return -ENOMEM;
1128
1129         if (pmu->func_id < 0)
1130                 pmu->func_id = pdev->devfn;
1131         else
1132                 WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1133
1134         atomic_inc(&box->refcnt);
1135         box->dieid = die;
1136         box->pci_dev = pdev;
1137         box->pmu = pmu;
1138         uncore_box_init(box);
1139
1140         pmu->boxes[die] = box;
1141         if (atomic_inc_return(&pmu->activeboxes) > 1)
1142                 return 0;
1143
1144         /* First active box registers the pmu */
1145         ret = uncore_pmu_register(pmu);
1146         if (ret) {
1147                 pmu->boxes[die] = NULL;
1148                 uncore_box_exit(box);
1149                 kfree(box);
1150         }
1151         return ret;
1152 }
1153
1154 /*
1155  * add a pci uncore device
1156  */
1157 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1158 {
1159         struct intel_uncore_type *type;
1160         struct intel_uncore_pmu *pmu = NULL;
1161         int die, ret;
1162
1163         ret = uncore_pci_get_dev_die_info(pdev, &die);
1164         if (ret)
1165                 return ret;
1166
1167         if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1168                 int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1169
1170                 uncore_extra_pci_dev[die].dev[idx] = pdev;
1171                 pci_set_drvdata(pdev, NULL);
1172                 return 0;
1173         }
1174
1175         type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1176
1177         /*
1178          * Some platforms, e.g.  Knights Landing, use a common PCI device ID
1179          * for multiple instances of an uncore PMU device type. We should check
1180          * PCI slot and func to indicate the uncore box.
1181          */
1182         if (id->driver_data & ~0xffff) {
1183                 struct pci_driver *pci_drv = pdev->driver;
1184
1185                 pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1186                 if (pmu == NULL)
1187                         return -ENODEV;
1188         } else {
1189                 /*
1190                  * for performance monitoring unit with multiple boxes,
1191                  * each box has a different function id.
1192                  */
1193                 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1194         }
1195
1196         ret = uncore_pci_pmu_register(pdev, type, pmu, die);
1197
1198         pci_set_drvdata(pdev, pmu->boxes[die]);
1199
1200         return ret;
1201 }
1202
1203 /*
1204  * Unregister the PMU of a PCI device
1205  * @pmu: The corresponding PMU is unregistered.
1206  * @die: The die id which the device maps to.
1207  */
1208 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
1209 {
1210         struct intel_uncore_box *box = pmu->boxes[die];
1211
1212         pmu->boxes[die] = NULL;
1213         if (atomic_dec_return(&pmu->activeboxes) == 0)
1214                 uncore_pmu_unregister(pmu);
1215         uncore_box_exit(box);
1216         kfree(box);
1217 }
1218
1219 static void uncore_pci_remove(struct pci_dev *pdev)
1220 {
1221         struct intel_uncore_box *box;
1222         struct intel_uncore_pmu *pmu;
1223         int i, die;
1224
1225         if (uncore_pci_get_dev_die_info(pdev, &die))
1226                 return;
1227
1228         box = pci_get_drvdata(pdev);
1229         if (!box) {
1230                 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1231                         if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1232                                 uncore_extra_pci_dev[die].dev[i] = NULL;
1233                                 break;
1234                         }
1235                 }
1236                 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1237                 return;
1238         }
1239
1240         pmu = box->pmu;
1241
1242         pci_set_drvdata(pdev, NULL);
1243
1244         uncore_pci_pmu_unregister(pmu, die);
1245 }
1246
1247 static int uncore_bus_notify(struct notifier_block *nb,
1248                              unsigned long action, void *data,
1249                              const struct pci_device_id *ids)
1250 {
1251         struct device *dev = data;
1252         struct pci_dev *pdev = to_pci_dev(dev);
1253         struct intel_uncore_pmu *pmu;
1254         int die;
1255
1256         /* Unregister the PMU when the device is going to be deleted. */
1257         if (action != BUS_NOTIFY_DEL_DEVICE)
1258                 return NOTIFY_DONE;
1259
1260         pmu = uncore_pci_find_dev_pmu(pdev, ids);
1261         if (!pmu)
1262                 return NOTIFY_DONE;
1263
1264         if (uncore_pci_get_dev_die_info(pdev, &die))
1265                 return NOTIFY_DONE;
1266
1267         uncore_pci_pmu_unregister(pmu, die);
1268
1269         return NOTIFY_OK;
1270 }
1271
1272 static int uncore_pci_sub_bus_notify(struct notifier_block *nb,
1273                                      unsigned long action, void *data)
1274 {
1275         return uncore_bus_notify(nb, action, data,
1276                                  uncore_pci_sub_driver->id_table);
1277 }
1278
1279 static struct notifier_block uncore_pci_sub_notifier = {
1280         .notifier_call = uncore_pci_sub_bus_notify,
1281 };
1282
1283 static void uncore_pci_sub_driver_init(void)
1284 {
1285         const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
1286         struct intel_uncore_type *type;
1287         struct intel_uncore_pmu *pmu;
1288         struct pci_dev *pci_sub_dev;
1289         bool notify = false;
1290         unsigned int devfn;
1291         int die;
1292
1293         while (ids && ids->vendor) {
1294                 pci_sub_dev = NULL;
1295                 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
1296                 /*
1297                  * Search the available device, and register the
1298                  * corresponding PMU.
1299                  */
1300                 while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
1301                                                      ids->device, pci_sub_dev))) {
1302                         devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
1303                                           UNCORE_PCI_DEV_FUNC(ids->driver_data));
1304                         if (devfn != pci_sub_dev->devfn)
1305                                 continue;
1306
1307                         pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
1308                         if (!pmu)
1309                                 continue;
1310
1311                         if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
1312                                 continue;
1313
1314                         if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
1315                                                      die))
1316                                 notify = true;
1317                 }
1318                 ids++;
1319         }
1320
1321         if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier))
1322                 notify = false;
1323
1324         if (!notify)
1325                 uncore_pci_sub_driver = NULL;
1326 }
1327
1328 static int uncore_pci_bus_notify(struct notifier_block *nb,
1329                                      unsigned long action, void *data)
1330 {
1331         return uncore_bus_notify(nb, action, data, NULL);
1332 }
1333
1334 static struct notifier_block uncore_pci_notifier = {
1335         .notifier_call = uncore_pci_bus_notify,
1336 };
1337
1338
1339 static void uncore_pci_pmus_register(void)
1340 {
1341         struct intel_uncore_type **types = uncore_pci_uncores;
1342         struct intel_uncore_type *type;
1343         struct intel_uncore_pmu *pmu;
1344         struct pci_dev *pdev;
1345         u64 box_ctl;
1346         int i, die;
1347
1348         for (; *types; types++) {
1349                 type = *types;
1350                 for (die = 0; die < __uncore_max_dies; die++) {
1351                         for (i = 0; i < type->num_boxes; i++) {
1352                                 if (!type->box_ctls[die])
1353                                         continue;
1354                                 box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1355                                 pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl),
1356                                                                    UNCORE_DISCOVERY_PCI_BUS(box_ctl),
1357                                                                    UNCORE_DISCOVERY_PCI_DEVFN(box_ctl));
1358                                 if (!pdev)
1359                                         continue;
1360                                 pmu = &type->pmus[i];
1361
1362                                 uncore_pci_pmu_register(pdev, type, pmu, die);
1363                         }
1364                 }
1365         }
1366
1367         bus_register_notifier(&pci_bus_type, &uncore_pci_notifier);
1368 }
1369
1370 static int __init uncore_pci_init(void)
1371 {
1372         size_t size;
1373         int ret;
1374
1375         size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1376         uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1377         if (!uncore_extra_pci_dev) {
1378                 ret = -ENOMEM;
1379                 goto err;
1380         }
1381
1382         ret = uncore_types_init(uncore_pci_uncores, false);
1383         if (ret)
1384                 goto errtype;
1385
1386         if (uncore_pci_driver) {
1387                 uncore_pci_driver->probe = uncore_pci_probe;
1388                 uncore_pci_driver->remove = uncore_pci_remove;
1389
1390                 ret = pci_register_driver(uncore_pci_driver);
1391                 if (ret)
1392                         goto errtype;
1393         } else
1394                 uncore_pci_pmus_register();
1395
1396         if (uncore_pci_sub_driver)
1397                 uncore_pci_sub_driver_init();
1398
1399         pcidrv_registered = true;
1400         return 0;
1401
1402 errtype:
1403         uncore_types_exit(uncore_pci_uncores);
1404         kfree(uncore_extra_pci_dev);
1405         uncore_extra_pci_dev = NULL;
1406         uncore_free_pcibus_map();
1407 err:
1408         uncore_pci_uncores = empty_uncore;
1409         return ret;
1410 }
1411
1412 static void uncore_pci_exit(void)
1413 {
1414         if (pcidrv_registered) {
1415                 pcidrv_registered = false;
1416                 if (uncore_pci_sub_driver)
1417                         bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier);
1418                 if (uncore_pci_driver)
1419                         pci_unregister_driver(uncore_pci_driver);
1420                 else
1421                         bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier);
1422                 uncore_types_exit(uncore_pci_uncores);
1423                 kfree(uncore_extra_pci_dev);
1424                 uncore_free_pcibus_map();
1425         }
1426 }
1427
1428 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1429                                    int new_cpu)
1430 {
1431         struct intel_uncore_pmu *pmu = type->pmus;
1432         struct intel_uncore_box *box;
1433         int i, die;
1434
1435         die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1436         for (i = 0; i < type->num_boxes; i++, pmu++) {
1437                 box = pmu->boxes[die];
1438                 if (!box)
1439                         continue;
1440
1441                 if (old_cpu < 0) {
1442                         WARN_ON_ONCE(box->cpu != -1);
1443                         box->cpu = new_cpu;
1444                         continue;
1445                 }
1446
1447                 WARN_ON_ONCE(box->cpu != old_cpu);
1448                 box->cpu = -1;
1449                 if (new_cpu < 0)
1450                         continue;
1451
1452                 uncore_pmu_cancel_hrtimer(box);
1453                 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1454                 box->cpu = new_cpu;
1455         }
1456 }
1457
1458 static void uncore_change_context(struct intel_uncore_type **uncores,
1459                                   int old_cpu, int new_cpu)
1460 {
1461         for (; *uncores; uncores++)
1462                 uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1463 }
1464
1465 static void uncore_box_unref(struct intel_uncore_type **types, int id)
1466 {
1467         struct intel_uncore_type *type;
1468         struct intel_uncore_pmu *pmu;
1469         struct intel_uncore_box *box;
1470         int i;
1471
1472         for (; *types; types++) {
1473                 type = *types;
1474                 pmu = type->pmus;
1475                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1476                         box = pmu->boxes[id];
1477                         if (box && atomic_dec_return(&box->refcnt) == 0)
1478                                 uncore_box_exit(box);
1479                 }
1480         }
1481 }
1482
1483 static int uncore_event_cpu_offline(unsigned int cpu)
1484 {
1485         int die, target;
1486
1487         /* Check if exiting cpu is used for collecting uncore events */
1488         if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1489                 goto unref;
1490         /* Find a new cpu to collect uncore events */
1491         target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1492
1493         /* Migrate uncore events to the new target */
1494         if (target < nr_cpu_ids)
1495                 cpumask_set_cpu(target, &uncore_cpu_mask);
1496         else
1497                 target = -1;
1498
1499         uncore_change_context(uncore_msr_uncores, cpu, target);
1500         uncore_change_context(uncore_mmio_uncores, cpu, target);
1501         uncore_change_context(uncore_pci_uncores, cpu, target);
1502
1503 unref:
1504         /* Clear the references */
1505         die = topology_logical_die_id(cpu);
1506         uncore_box_unref(uncore_msr_uncores, die);
1507         uncore_box_unref(uncore_mmio_uncores, die);
1508         return 0;
1509 }
1510
1511 static int allocate_boxes(struct intel_uncore_type **types,
1512                          unsigned int die, unsigned int cpu)
1513 {
1514         struct intel_uncore_box *box, *tmp;
1515         struct intel_uncore_type *type;
1516         struct intel_uncore_pmu *pmu;
1517         LIST_HEAD(allocated);
1518         int i;
1519
1520         /* Try to allocate all required boxes */
1521         for (; *types; types++) {
1522                 type = *types;
1523                 pmu = type->pmus;
1524                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1525                         if (pmu->boxes[die])
1526                                 continue;
1527                         box = uncore_alloc_box(type, cpu_to_node(cpu));
1528                         if (!box)
1529                                 goto cleanup;
1530                         box->pmu = pmu;
1531                         box->dieid = die;
1532                         list_add(&box->active_list, &allocated);
1533                 }
1534         }
1535         /* Install them in the pmus */
1536         list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1537                 list_del_init(&box->active_list);
1538                 box->pmu->boxes[die] = box;
1539         }
1540         return 0;
1541
1542 cleanup:
1543         list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1544                 list_del_init(&box->active_list);
1545                 kfree(box);
1546         }
1547         return -ENOMEM;
1548 }
1549
1550 static int uncore_box_ref(struct intel_uncore_type **types,
1551                           int id, unsigned int cpu)
1552 {
1553         struct intel_uncore_type *type;
1554         struct intel_uncore_pmu *pmu;
1555         struct intel_uncore_box *box;
1556         int i, ret;
1557
1558         ret = allocate_boxes(types, id, cpu);
1559         if (ret)
1560                 return ret;
1561
1562         for (; *types; types++) {
1563                 type = *types;
1564                 pmu = type->pmus;
1565                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1566                         box = pmu->boxes[id];
1567                         if (box && atomic_inc_return(&box->refcnt) == 1)
1568                                 uncore_box_init(box);
1569                 }
1570         }
1571         return 0;
1572 }
1573
1574 static int uncore_event_cpu_online(unsigned int cpu)
1575 {
1576         int die, target, msr_ret, mmio_ret;
1577
1578         die = topology_logical_die_id(cpu);
1579         msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1580         mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1581         if (msr_ret && mmio_ret)
1582                 return -ENOMEM;
1583
1584         /*
1585          * Check if there is an online cpu in the package
1586          * which collects uncore events already.
1587          */
1588         target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1589         if (target < nr_cpu_ids)
1590                 return 0;
1591
1592         cpumask_set_cpu(cpu, &uncore_cpu_mask);
1593
1594         if (!msr_ret)
1595                 uncore_change_context(uncore_msr_uncores, -1, cpu);
1596         if (!mmio_ret)
1597                 uncore_change_context(uncore_mmio_uncores, -1, cpu);
1598         uncore_change_context(uncore_pci_uncores, -1, cpu);
1599         return 0;
1600 }
1601
1602 static int __init type_pmu_register(struct intel_uncore_type *type)
1603 {
1604         int i, ret;
1605
1606         for (i = 0; i < type->num_boxes; i++) {
1607                 ret = uncore_pmu_register(&type->pmus[i]);
1608                 if (ret)
1609                         return ret;
1610         }
1611         return 0;
1612 }
1613
1614 static int __init uncore_msr_pmus_register(void)
1615 {
1616         struct intel_uncore_type **types = uncore_msr_uncores;
1617         int ret;
1618
1619         for (; *types; types++) {
1620                 ret = type_pmu_register(*types);
1621                 if (ret)
1622                         return ret;
1623         }
1624         return 0;
1625 }
1626
1627 static int __init uncore_cpu_init(void)
1628 {
1629         int ret;
1630
1631         ret = uncore_types_init(uncore_msr_uncores, true);
1632         if (ret)
1633                 goto err;
1634
1635         ret = uncore_msr_pmus_register();
1636         if (ret)
1637                 goto err;
1638         return 0;
1639 err:
1640         uncore_types_exit(uncore_msr_uncores);
1641         uncore_msr_uncores = empty_uncore;
1642         return ret;
1643 }
1644
1645 static int __init uncore_mmio_init(void)
1646 {
1647         struct intel_uncore_type **types = uncore_mmio_uncores;
1648         int ret;
1649
1650         ret = uncore_types_init(types, true);
1651         if (ret)
1652                 goto err;
1653
1654         for (; *types; types++) {
1655                 ret = type_pmu_register(*types);
1656                 if (ret)
1657                         goto err;
1658         }
1659         return 0;
1660 err:
1661         uncore_types_exit(uncore_mmio_uncores);
1662         uncore_mmio_uncores = empty_uncore;
1663         return ret;
1664 }
1665
1666 struct intel_uncore_init_fun {
1667         void    (*cpu_init)(void);
1668         int     (*pci_init)(void);
1669         void    (*mmio_init)(void);
1670 };
1671
1672 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1673         .cpu_init = nhm_uncore_cpu_init,
1674 };
1675
1676 static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1677         .cpu_init = snb_uncore_cpu_init,
1678         .pci_init = snb_uncore_pci_init,
1679 };
1680
1681 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1682         .cpu_init = snb_uncore_cpu_init,
1683         .pci_init = ivb_uncore_pci_init,
1684 };
1685
1686 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1687         .cpu_init = snb_uncore_cpu_init,
1688         .pci_init = hsw_uncore_pci_init,
1689 };
1690
1691 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1692         .cpu_init = snb_uncore_cpu_init,
1693         .pci_init = bdw_uncore_pci_init,
1694 };
1695
1696 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1697         .cpu_init = snbep_uncore_cpu_init,
1698         .pci_init = snbep_uncore_pci_init,
1699 };
1700
1701 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1702         .cpu_init = nhmex_uncore_cpu_init,
1703 };
1704
1705 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1706         .cpu_init = ivbep_uncore_cpu_init,
1707         .pci_init = ivbep_uncore_pci_init,
1708 };
1709
1710 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1711         .cpu_init = hswep_uncore_cpu_init,
1712         .pci_init = hswep_uncore_pci_init,
1713 };
1714
1715 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1716         .cpu_init = bdx_uncore_cpu_init,
1717         .pci_init = bdx_uncore_pci_init,
1718 };
1719
1720 static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1721         .cpu_init = knl_uncore_cpu_init,
1722         .pci_init = knl_uncore_pci_init,
1723 };
1724
1725 static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1726         .cpu_init = skl_uncore_cpu_init,
1727         .pci_init = skl_uncore_pci_init,
1728 };
1729
1730 static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1731         .cpu_init = skx_uncore_cpu_init,
1732         .pci_init = skx_uncore_pci_init,
1733 };
1734
1735 static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1736         .cpu_init = icl_uncore_cpu_init,
1737         .pci_init = skl_uncore_pci_init,
1738 };
1739
1740 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1741         .cpu_init = tgl_uncore_cpu_init,
1742         .mmio_init = tgl_uncore_mmio_init,
1743 };
1744
1745 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1746         .cpu_init = tgl_uncore_cpu_init,
1747         .mmio_init = tgl_l_uncore_mmio_init,
1748 };
1749
1750 static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
1751         .cpu_init = tgl_uncore_cpu_init,
1752         .pci_init = skl_uncore_pci_init,
1753 };
1754
1755 static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
1756         .cpu_init = adl_uncore_cpu_init,
1757         .mmio_init = tgl_uncore_mmio_init,
1758 };
1759
1760 static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1761         .cpu_init = icx_uncore_cpu_init,
1762         .pci_init = icx_uncore_pci_init,
1763         .mmio_init = icx_uncore_mmio_init,
1764 };
1765
1766 static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1767         .cpu_init = snr_uncore_cpu_init,
1768         .pci_init = snr_uncore_pci_init,
1769         .mmio_init = snr_uncore_mmio_init,
1770 };
1771
1772 static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
1773         .cpu_init = intel_uncore_generic_uncore_cpu_init,
1774         .pci_init = intel_uncore_generic_uncore_pci_init,
1775         .mmio_init = intel_uncore_generic_uncore_mmio_init,
1776 };
1777
1778 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1779         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &nhm_uncore_init),
1780         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &nhm_uncore_init),
1781         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &nhm_uncore_init),
1782         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &nhm_uncore_init),
1783         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &snb_uncore_init),
1784         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &ivb_uncore_init),
1785         X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &hsw_uncore_init),
1786         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &hsw_uncore_init),
1787         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &hsw_uncore_init),
1788         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &bdw_uncore_init),
1789         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &bdw_uncore_init),
1790         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &snbep_uncore_init),
1791         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &nhmex_uncore_init),
1792         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &nhmex_uncore_init),
1793         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &ivbep_uncore_init),
1794         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &hswep_uncore_init),
1795         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &bdx_uncore_init),
1796         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &bdx_uncore_init),
1797         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &knl_uncore_init),
1798         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &knl_uncore_init),
1799         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &skl_uncore_init),
1800         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &skl_uncore_init),
1801         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &skx_uncore_init),
1802         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &skl_uncore_init),
1803         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &skl_uncore_init),
1804         X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,         &skl_uncore_init),
1805         X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,           &skl_uncore_init),
1806         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           &icl_uncore_init),
1807         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,        &icl_uncore_init),
1808         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,             &icl_uncore_init),
1809         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &icx_uncore_init),
1810         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &icx_uncore_init),
1811         X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         &tgl_l_uncore_init),
1812         X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           &tgl_uncore_init),
1813         X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &rkl_uncore_init),
1814         X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &adl_uncore_init),
1815         X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &adl_uncore_init),
1816         X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &snr_uncore_init),
1817         {},
1818 };
1819 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1820
1821 static int __init intel_uncore_init(void)
1822 {
1823         const struct x86_cpu_id *id;
1824         struct intel_uncore_init_fun *uncore_init;
1825         int pret = 0, cret = 0, mret = 0, ret;
1826
1827         if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1828                 return -ENODEV;
1829
1830         __uncore_max_dies =
1831                 topology_max_packages() * topology_max_die_per_package();
1832
1833         id = x86_match_cpu(intel_uncore_match);
1834         if (!id) {
1835                 if (!uncore_no_discover && intel_uncore_has_discovery_tables())
1836                         uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
1837                 else
1838                         return -ENODEV;
1839         } else
1840                 uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1841
1842         if (uncore_init->pci_init) {
1843                 pret = uncore_init->pci_init();
1844                 if (!pret)
1845                         pret = uncore_pci_init();
1846         }
1847
1848         if (uncore_init->cpu_init) {
1849                 uncore_init->cpu_init();
1850                 cret = uncore_cpu_init();
1851         }
1852
1853         if (uncore_init->mmio_init) {
1854                 uncore_init->mmio_init();
1855                 mret = uncore_mmio_init();
1856         }
1857
1858         if (cret && pret && mret) {
1859                 ret = -ENODEV;
1860                 goto free_discovery;
1861         }
1862
1863         /* Install hotplug callbacks to setup the targets for each package */
1864         ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1865                                 "perf/x86/intel/uncore:online",
1866                                 uncore_event_cpu_online,
1867                                 uncore_event_cpu_offline);
1868         if (ret)
1869                 goto err;
1870         return 0;
1871
1872 err:
1873         uncore_types_exit(uncore_msr_uncores);
1874         uncore_types_exit(uncore_mmio_uncores);
1875         uncore_pci_exit();
1876 free_discovery:
1877         intel_uncore_clear_discovery_tables();
1878         return ret;
1879 }
1880 module_init(intel_uncore_init);
1881
1882 static void __exit intel_uncore_exit(void)
1883 {
1884         cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1885         uncore_types_exit(uncore_msr_uncores);
1886         uncore_types_exit(uncore_mmio_uncores);
1887         uncore_pci_exit();
1888         intel_uncore_clear_discovery_tables();
1889 }
1890 module_exit(intel_uncore_exit);