Merge tag 'defconfig-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
[linux-2.6-microblaze.git] / arch / x86 / events / intel / uncore.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/module.h>
3
4 #include <asm/cpu_device_id.h>
5 #include <asm/intel-family.h>
6 #include "uncore.h"
7 #include "uncore_discovery.h"
8
9 static bool uncore_no_discover;
10 module_param(uncore_no_discover, bool, 0);
11 MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism "
12                                      "(default: enable the discovery mechanism).");
13 struct intel_uncore_type *empty_uncore[] = { NULL, };
14 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
15 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
16 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
17
18 static bool pcidrv_registered;
19 struct pci_driver *uncore_pci_driver;
20 /* The PCI driver for the device which the uncore doesn't own. */
21 struct pci_driver *uncore_pci_sub_driver;
22 /* pci bus to socket mapping */
23 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
24 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
25 struct pci_extra_dev *uncore_extra_pci_dev;
26 int __uncore_max_dies;
27
28 /* mask of cpus that collect uncore events */
29 static cpumask_t uncore_cpu_mask;
30
31 /* constraint for the fixed counter */
32 static struct event_constraint uncore_constraint_fixed =
33         EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
34 struct event_constraint uncore_constraint_empty =
35         EVENT_CONSTRAINT(0, 0, 0);
36
37 MODULE_LICENSE("GPL");
38
39 int uncore_pcibus_to_dieid(struct pci_bus *bus)
40 {
41         struct pci2phy_map *map;
42         int die_id = -1;
43
44         raw_spin_lock(&pci2phy_map_lock);
45         list_for_each_entry(map, &pci2phy_map_head, list) {
46                 if (map->segment == pci_domain_nr(bus)) {
47                         die_id = map->pbus_to_dieid[bus->number];
48                         break;
49                 }
50         }
51         raw_spin_unlock(&pci2phy_map_lock);
52
53         return die_id;
54 }
55
56 int uncore_die_to_segment(int die)
57 {
58         struct pci_bus *bus = NULL;
59
60         /* Find first pci bus which attributes to specified die. */
61         while ((bus = pci_find_next_bus(bus)) &&
62                (die != uncore_pcibus_to_dieid(bus)))
63                 ;
64
65         return bus ? pci_domain_nr(bus) : -EINVAL;
66 }
67
68 static void uncore_free_pcibus_map(void)
69 {
70         struct pci2phy_map *map, *tmp;
71
72         list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
73                 list_del(&map->list);
74                 kfree(map);
75         }
76 }
77
78 struct pci2phy_map *__find_pci2phy_map(int segment)
79 {
80         struct pci2phy_map *map, *alloc = NULL;
81         int i;
82
83         lockdep_assert_held(&pci2phy_map_lock);
84
85 lookup:
86         list_for_each_entry(map, &pci2phy_map_head, list) {
87                 if (map->segment == segment)
88                         goto end;
89         }
90
91         if (!alloc) {
92                 raw_spin_unlock(&pci2phy_map_lock);
93                 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
94                 raw_spin_lock(&pci2phy_map_lock);
95
96                 if (!alloc)
97                         return NULL;
98
99                 goto lookup;
100         }
101
102         map = alloc;
103         alloc = NULL;
104         map->segment = segment;
105         for (i = 0; i < 256; i++)
106                 map->pbus_to_dieid[i] = -1;
107         list_add_tail(&map->list, &pci2phy_map_head);
108
109 end:
110         kfree(alloc);
111         return map;
112 }
113
114 ssize_t uncore_event_show(struct device *dev,
115                           struct device_attribute *attr, char *buf)
116 {
117         struct uncore_event_desc *event =
118                 container_of(attr, struct uncore_event_desc, attr);
119         return sprintf(buf, "%s", event->config);
120 }
121
122 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
123 {
124         unsigned int dieid = topology_logical_die_id(cpu);
125
126         /*
127          * The unsigned check also catches the '-1' return value for non
128          * existent mappings in the topology map.
129          */
130         return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
131 }
132
133 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
134 {
135         u64 count;
136
137         rdmsrl(event->hw.event_base, count);
138
139         return count;
140 }
141
142 void uncore_mmio_exit_box(struct intel_uncore_box *box)
143 {
144         if (box->io_addr)
145                 iounmap(box->io_addr);
146 }
147
148 u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
149                              struct perf_event *event)
150 {
151         if (!box->io_addr)
152                 return 0;
153
154         if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
155                 return 0;
156
157         return readq(box->io_addr + event->hw.event_base);
158 }
159
160 /*
161  * generic get constraint function for shared match/mask registers.
162  */
163 struct event_constraint *
164 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
165 {
166         struct intel_uncore_extra_reg *er;
167         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
168         struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
169         unsigned long flags;
170         bool ok = false;
171
172         /*
173          * reg->alloc can be set due to existing state, so for fake box we
174          * need to ignore this, otherwise we might fail to allocate proper
175          * fake state for this extra reg constraint.
176          */
177         if (reg1->idx == EXTRA_REG_NONE ||
178             (!uncore_box_is_fake(box) && reg1->alloc))
179                 return NULL;
180
181         er = &box->shared_regs[reg1->idx];
182         raw_spin_lock_irqsave(&er->lock, flags);
183         if (!atomic_read(&er->ref) ||
184             (er->config1 == reg1->config && er->config2 == reg2->config)) {
185                 atomic_inc(&er->ref);
186                 er->config1 = reg1->config;
187                 er->config2 = reg2->config;
188                 ok = true;
189         }
190         raw_spin_unlock_irqrestore(&er->lock, flags);
191
192         if (ok) {
193                 if (!uncore_box_is_fake(box))
194                         reg1->alloc = 1;
195                 return NULL;
196         }
197
198         return &uncore_constraint_empty;
199 }
200
201 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
202 {
203         struct intel_uncore_extra_reg *er;
204         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
205
206         /*
207          * Only put constraint if extra reg was actually allocated. Also
208          * takes care of event which do not use an extra shared reg.
209          *
210          * Also, if this is a fake box we shouldn't touch any event state
211          * (reg->alloc) and we don't care about leaving inconsistent box
212          * state either since it will be thrown out.
213          */
214         if (uncore_box_is_fake(box) || !reg1->alloc)
215                 return;
216
217         er = &box->shared_regs[reg1->idx];
218         atomic_dec(&er->ref);
219         reg1->alloc = 0;
220 }
221
222 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
223 {
224         struct intel_uncore_extra_reg *er;
225         unsigned long flags;
226         u64 config;
227
228         er = &box->shared_regs[idx];
229
230         raw_spin_lock_irqsave(&er->lock, flags);
231         config = er->config;
232         raw_spin_unlock_irqrestore(&er->lock, flags);
233
234         return config;
235 }
236
237 static void uncore_assign_hw_event(struct intel_uncore_box *box,
238                                    struct perf_event *event, int idx)
239 {
240         struct hw_perf_event *hwc = &event->hw;
241
242         hwc->idx = idx;
243         hwc->last_tag = ++box->tags[idx];
244
245         if (uncore_pmc_fixed(hwc->idx)) {
246                 hwc->event_base = uncore_fixed_ctr(box);
247                 hwc->config_base = uncore_fixed_ctl(box);
248                 return;
249         }
250
251         hwc->config_base = uncore_event_ctl(box, hwc->idx);
252         hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
253 }
254
255 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
256 {
257         u64 prev_count, new_count, delta;
258         int shift;
259
260         if (uncore_pmc_freerunning(event->hw.idx))
261                 shift = 64 - uncore_freerunning_bits(box, event);
262         else if (uncore_pmc_fixed(event->hw.idx))
263                 shift = 64 - uncore_fixed_ctr_bits(box);
264         else
265                 shift = 64 - uncore_perf_ctr_bits(box);
266
267         /* the hrtimer might modify the previous event value */
268 again:
269         prev_count = local64_read(&event->hw.prev_count);
270         new_count = uncore_read_counter(box, event);
271         if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
272                 goto again;
273
274         delta = (new_count << shift) - (prev_count << shift);
275         delta >>= shift;
276
277         local64_add(delta, &event->count);
278 }
279
280 /*
281  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
282  * for SandyBridge. So we use hrtimer to periodically poll the counter
283  * to avoid overflow.
284  */
285 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
286 {
287         struct intel_uncore_box *box;
288         struct perf_event *event;
289         unsigned long flags;
290         int bit;
291
292         box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
293         if (!box->n_active || box->cpu != smp_processor_id())
294                 return HRTIMER_NORESTART;
295         /*
296          * disable local interrupt to prevent uncore_pmu_event_start/stop
297          * to interrupt the update process
298          */
299         local_irq_save(flags);
300
301         /*
302          * handle boxes with an active event list as opposed to active
303          * counters
304          */
305         list_for_each_entry(event, &box->active_list, active_entry) {
306                 uncore_perf_event_update(box, event);
307         }
308
309         for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
310                 uncore_perf_event_update(box, box->events[bit]);
311
312         local_irq_restore(flags);
313
314         hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
315         return HRTIMER_RESTART;
316 }
317
318 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
319 {
320         hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
321                       HRTIMER_MODE_REL_PINNED);
322 }
323
324 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
325 {
326         hrtimer_cancel(&box->hrtimer);
327 }
328
329 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
330 {
331         hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
332         box->hrtimer.function = uncore_pmu_hrtimer;
333 }
334
335 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
336                                                  int node)
337 {
338         int i, size, numshared = type->num_shared_regs ;
339         struct intel_uncore_box *box;
340
341         size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
342
343         box = kzalloc_node(size, GFP_KERNEL, node);
344         if (!box)
345                 return NULL;
346
347         for (i = 0; i < numshared; i++)
348                 raw_spin_lock_init(&box->shared_regs[i].lock);
349
350         uncore_pmu_init_hrtimer(box);
351         box->cpu = -1;
352         box->dieid = -1;
353
354         /* set default hrtimer timeout */
355         box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
356
357         INIT_LIST_HEAD(&box->active_list);
358
359         return box;
360 }
361
362 /*
363  * Using uncore_pmu_event_init pmu event_init callback
364  * as a detection point for uncore events.
365  */
366 static int uncore_pmu_event_init(struct perf_event *event);
367
368 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
369 {
370         return &box->pmu->pmu == event->pmu;
371 }
372
373 static int
374 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
375                       bool dogrp)
376 {
377         struct perf_event *event;
378         int n, max_count;
379
380         max_count = box->pmu->type->num_counters;
381         if (box->pmu->type->fixed_ctl)
382                 max_count++;
383
384         if (box->n_events >= max_count)
385                 return -EINVAL;
386
387         n = box->n_events;
388
389         if (is_box_event(box, leader)) {
390                 box->event_list[n] = leader;
391                 n++;
392         }
393
394         if (!dogrp)
395                 return n;
396
397         for_each_sibling_event(event, leader) {
398                 if (!is_box_event(box, event) ||
399                     event->state <= PERF_EVENT_STATE_OFF)
400                         continue;
401
402                 if (n >= max_count)
403                         return -EINVAL;
404
405                 box->event_list[n] = event;
406                 n++;
407         }
408         return n;
409 }
410
411 static struct event_constraint *
412 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
413 {
414         struct intel_uncore_type *type = box->pmu->type;
415         struct event_constraint *c;
416
417         if (type->ops->get_constraint) {
418                 c = type->ops->get_constraint(box, event);
419                 if (c)
420                         return c;
421         }
422
423         if (event->attr.config == UNCORE_FIXED_EVENT)
424                 return &uncore_constraint_fixed;
425
426         if (type->constraints) {
427                 for_each_event_constraint(c, type->constraints) {
428                         if ((event->hw.config & c->cmask) == c->code)
429                                 return c;
430                 }
431         }
432
433         return &type->unconstrainted;
434 }
435
436 static void uncore_put_event_constraint(struct intel_uncore_box *box,
437                                         struct perf_event *event)
438 {
439         if (box->pmu->type->ops->put_constraint)
440                 box->pmu->type->ops->put_constraint(box, event);
441 }
442
443 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
444 {
445         unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
446         struct event_constraint *c;
447         int i, wmin, wmax, ret = 0;
448         struct hw_perf_event *hwc;
449
450         bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
451
452         for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
453                 c = uncore_get_event_constraint(box, box->event_list[i]);
454                 box->event_constraint[i] = c;
455                 wmin = min(wmin, c->weight);
456                 wmax = max(wmax, c->weight);
457         }
458
459         /* fastpath, try to reuse previous register */
460         for (i = 0; i < n; i++) {
461                 hwc = &box->event_list[i]->hw;
462                 c = box->event_constraint[i];
463
464                 /* never assigned */
465                 if (hwc->idx == -1)
466                         break;
467
468                 /* constraint still honored */
469                 if (!test_bit(hwc->idx, c->idxmsk))
470                         break;
471
472                 /* not already used */
473                 if (test_bit(hwc->idx, used_mask))
474                         break;
475
476                 __set_bit(hwc->idx, used_mask);
477                 if (assign)
478                         assign[i] = hwc->idx;
479         }
480         /* slow path */
481         if (i != n)
482                 ret = perf_assign_events(box->event_constraint, n,
483                                          wmin, wmax, n, assign);
484
485         if (!assign || ret) {
486                 for (i = 0; i < n; i++)
487                         uncore_put_event_constraint(box, box->event_list[i]);
488         }
489         return ret ? -EINVAL : 0;
490 }
491
492 void uncore_pmu_event_start(struct perf_event *event, int flags)
493 {
494         struct intel_uncore_box *box = uncore_event_to_box(event);
495         int idx = event->hw.idx;
496
497         if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
498                 return;
499
500         /*
501          * Free running counter is read-only and always active.
502          * Use the current counter value as start point.
503          * There is no overflow interrupt for free running counter.
504          * Use hrtimer to periodically poll the counter to avoid overflow.
505          */
506         if (uncore_pmc_freerunning(event->hw.idx)) {
507                 list_add_tail(&event->active_entry, &box->active_list);
508                 local64_set(&event->hw.prev_count,
509                             uncore_read_counter(box, event));
510                 if (box->n_active++ == 0)
511                         uncore_pmu_start_hrtimer(box);
512                 return;
513         }
514
515         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
516                 return;
517
518         event->hw.state = 0;
519         box->events[idx] = event;
520         box->n_active++;
521         __set_bit(idx, box->active_mask);
522
523         local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
524         uncore_enable_event(box, event);
525
526         if (box->n_active == 1)
527                 uncore_pmu_start_hrtimer(box);
528 }
529
530 void uncore_pmu_event_stop(struct perf_event *event, int flags)
531 {
532         struct intel_uncore_box *box = uncore_event_to_box(event);
533         struct hw_perf_event *hwc = &event->hw;
534
535         /* Cannot disable free running counter which is read-only */
536         if (uncore_pmc_freerunning(hwc->idx)) {
537                 list_del(&event->active_entry);
538                 if (--box->n_active == 0)
539                         uncore_pmu_cancel_hrtimer(box);
540                 uncore_perf_event_update(box, event);
541                 return;
542         }
543
544         if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
545                 uncore_disable_event(box, event);
546                 box->n_active--;
547                 box->events[hwc->idx] = NULL;
548                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
549                 hwc->state |= PERF_HES_STOPPED;
550
551                 if (box->n_active == 0)
552                         uncore_pmu_cancel_hrtimer(box);
553         }
554
555         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
556                 /*
557                  * Drain the remaining delta count out of a event
558                  * that we are disabling:
559                  */
560                 uncore_perf_event_update(box, event);
561                 hwc->state |= PERF_HES_UPTODATE;
562         }
563 }
564
565 int uncore_pmu_event_add(struct perf_event *event, int flags)
566 {
567         struct intel_uncore_box *box = uncore_event_to_box(event);
568         struct hw_perf_event *hwc = &event->hw;
569         int assign[UNCORE_PMC_IDX_MAX];
570         int i, n, ret;
571
572         if (!box)
573                 return -ENODEV;
574
575         /*
576          * The free funning counter is assigned in event_init().
577          * The free running counter event and free running counter
578          * are 1:1 mapped. It doesn't need to be tracked in event_list.
579          */
580         if (uncore_pmc_freerunning(hwc->idx)) {
581                 if (flags & PERF_EF_START)
582                         uncore_pmu_event_start(event, 0);
583                 return 0;
584         }
585
586         ret = n = uncore_collect_events(box, event, false);
587         if (ret < 0)
588                 return ret;
589
590         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
591         if (!(flags & PERF_EF_START))
592                 hwc->state |= PERF_HES_ARCH;
593
594         ret = uncore_assign_events(box, assign, n);
595         if (ret)
596                 return ret;
597
598         /* save events moving to new counters */
599         for (i = 0; i < box->n_events; i++) {
600                 event = box->event_list[i];
601                 hwc = &event->hw;
602
603                 if (hwc->idx == assign[i] &&
604                         hwc->last_tag == box->tags[assign[i]])
605                         continue;
606                 /*
607                  * Ensure we don't accidentally enable a stopped
608                  * counter simply because we rescheduled.
609                  */
610                 if (hwc->state & PERF_HES_STOPPED)
611                         hwc->state |= PERF_HES_ARCH;
612
613                 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
614         }
615
616         /* reprogram moved events into new counters */
617         for (i = 0; i < n; i++) {
618                 event = box->event_list[i];
619                 hwc = &event->hw;
620
621                 if (hwc->idx != assign[i] ||
622                         hwc->last_tag != box->tags[assign[i]])
623                         uncore_assign_hw_event(box, event, assign[i]);
624                 else if (i < box->n_events)
625                         continue;
626
627                 if (hwc->state & PERF_HES_ARCH)
628                         continue;
629
630                 uncore_pmu_event_start(event, 0);
631         }
632         box->n_events = n;
633
634         return 0;
635 }
636
637 void uncore_pmu_event_del(struct perf_event *event, int flags)
638 {
639         struct intel_uncore_box *box = uncore_event_to_box(event);
640         int i;
641
642         uncore_pmu_event_stop(event, PERF_EF_UPDATE);
643
644         /*
645          * The event for free running counter is not tracked by event_list.
646          * It doesn't need to force event->hw.idx = -1 to reassign the counter.
647          * Because the event and the free running counter are 1:1 mapped.
648          */
649         if (uncore_pmc_freerunning(event->hw.idx))
650                 return;
651
652         for (i = 0; i < box->n_events; i++) {
653                 if (event == box->event_list[i]) {
654                         uncore_put_event_constraint(box, event);
655
656                         for (++i; i < box->n_events; i++)
657                                 box->event_list[i - 1] = box->event_list[i];
658
659                         --box->n_events;
660                         break;
661                 }
662         }
663
664         event->hw.idx = -1;
665         event->hw.last_tag = ~0ULL;
666 }
667
668 void uncore_pmu_event_read(struct perf_event *event)
669 {
670         struct intel_uncore_box *box = uncore_event_to_box(event);
671         uncore_perf_event_update(box, event);
672 }
673
674 /*
675  * validation ensures the group can be loaded onto the
676  * PMU if it was the only group available.
677  */
678 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
679                                 struct perf_event *event)
680 {
681         struct perf_event *leader = event->group_leader;
682         struct intel_uncore_box *fake_box;
683         int ret = -EINVAL, n;
684
685         /* The free running counter is always active. */
686         if (uncore_pmc_freerunning(event->hw.idx))
687                 return 0;
688
689         fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
690         if (!fake_box)
691                 return -ENOMEM;
692
693         fake_box->pmu = pmu;
694         /*
695          * the event is not yet connected with its
696          * siblings therefore we must first collect
697          * existing siblings, then add the new event
698          * before we can simulate the scheduling
699          */
700         n = uncore_collect_events(fake_box, leader, true);
701         if (n < 0)
702                 goto out;
703
704         fake_box->n_events = n;
705         n = uncore_collect_events(fake_box, event, false);
706         if (n < 0)
707                 goto out;
708
709         fake_box->n_events = n;
710
711         ret = uncore_assign_events(fake_box, NULL, n);
712 out:
713         kfree(fake_box);
714         return ret;
715 }
716
717 static int uncore_pmu_event_init(struct perf_event *event)
718 {
719         struct intel_uncore_pmu *pmu;
720         struct intel_uncore_box *box;
721         struct hw_perf_event *hwc = &event->hw;
722         int ret;
723
724         if (event->attr.type != event->pmu->type)
725                 return -ENOENT;
726
727         pmu = uncore_event_to_pmu(event);
728         /* no device found for this pmu */
729         if (pmu->func_id < 0)
730                 return -ENOENT;
731
732         /* Sampling not supported yet */
733         if (hwc->sample_period)
734                 return -EINVAL;
735
736         /*
737          * Place all uncore events for a particular physical package
738          * onto a single cpu
739          */
740         if (event->cpu < 0)
741                 return -EINVAL;
742         box = uncore_pmu_to_box(pmu, event->cpu);
743         if (!box || box->cpu < 0)
744                 return -EINVAL;
745         event->cpu = box->cpu;
746         event->pmu_private = box;
747
748         event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
749
750         event->hw.idx = -1;
751         event->hw.last_tag = ~0ULL;
752         event->hw.extra_reg.idx = EXTRA_REG_NONE;
753         event->hw.branch_reg.idx = EXTRA_REG_NONE;
754
755         if (event->attr.config == UNCORE_FIXED_EVENT) {
756                 /* no fixed counter */
757                 if (!pmu->type->fixed_ctl)
758                         return -EINVAL;
759                 /*
760                  * if there is only one fixed counter, only the first pmu
761                  * can access the fixed counter
762                  */
763                 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
764                         return -EINVAL;
765
766                 /* fixed counters have event field hardcoded to zero */
767                 hwc->config = 0ULL;
768         } else if (is_freerunning_event(event)) {
769                 hwc->config = event->attr.config;
770                 if (!check_valid_freerunning_event(box, event))
771                         return -EINVAL;
772                 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
773                 /*
774                  * The free running counter event and free running counter
775                  * are always 1:1 mapped.
776                  * The free running counter is always active.
777                  * Assign the free running counter here.
778                  */
779                 event->hw.event_base = uncore_freerunning_counter(box, event);
780         } else {
781                 hwc->config = event->attr.config &
782                               (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
783                 if (pmu->type->ops->hw_config) {
784                         ret = pmu->type->ops->hw_config(box, event);
785                         if (ret)
786                                 return ret;
787                 }
788         }
789
790         if (event->group_leader != event)
791                 ret = uncore_validate_group(pmu, event);
792         else
793                 ret = 0;
794
795         return ret;
796 }
797
798 static void uncore_pmu_enable(struct pmu *pmu)
799 {
800         struct intel_uncore_pmu *uncore_pmu;
801         struct intel_uncore_box *box;
802
803         uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
804
805         box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
806         if (!box)
807                 return;
808
809         if (uncore_pmu->type->ops->enable_box)
810                 uncore_pmu->type->ops->enable_box(box);
811 }
812
813 static void uncore_pmu_disable(struct pmu *pmu)
814 {
815         struct intel_uncore_pmu *uncore_pmu;
816         struct intel_uncore_box *box;
817
818         uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
819
820         box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
821         if (!box)
822                 return;
823
824         if (uncore_pmu->type->ops->disable_box)
825                 uncore_pmu->type->ops->disable_box(box);
826 }
827
828 static ssize_t uncore_get_attr_cpumask(struct device *dev,
829                                 struct device_attribute *attr, char *buf)
830 {
831         return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
832 }
833
834 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
835
836 static struct attribute *uncore_pmu_attrs[] = {
837         &dev_attr_cpumask.attr,
838         NULL,
839 };
840
841 static const struct attribute_group uncore_pmu_attr_group = {
842         .attrs = uncore_pmu_attrs,
843 };
844
845 void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
846 {
847         struct intel_uncore_type *type = pmu->type;
848
849         if (type->num_boxes == 1)
850                 sprintf(pmu_name, "uncore_type_%u", type->type_id);
851         else {
852                 sprintf(pmu_name, "uncore_type_%u_%d",
853                         type->type_id, type->box_ids[pmu->pmu_idx]);
854         }
855 }
856
857 static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
858 {
859         struct intel_uncore_type *type = pmu->type;
860
861         /*
862          * No uncore block name in discovery table.
863          * Use uncore_type_&typeid_&boxid as name.
864          */
865         if (!type->name) {
866                 uncore_get_alias_name(pmu->name, pmu);
867                 return;
868         }
869
870         if (type->num_boxes == 1) {
871                 if (strlen(type->name) > 0)
872                         sprintf(pmu->name, "uncore_%s", type->name);
873                 else
874                         sprintf(pmu->name, "uncore");
875         } else {
876                 /*
877                  * Use the box ID from the discovery table if applicable.
878                  */
879                 sprintf(pmu->name, "uncore_%s_%d", type->name,
880                         type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
881         }
882 }
883
884 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
885 {
886         int ret;
887
888         if (!pmu->type->pmu) {
889                 pmu->pmu = (struct pmu) {
890                         .attr_groups    = pmu->type->attr_groups,
891                         .task_ctx_nr    = perf_invalid_context,
892                         .pmu_enable     = uncore_pmu_enable,
893                         .pmu_disable    = uncore_pmu_disable,
894                         .event_init     = uncore_pmu_event_init,
895                         .add            = uncore_pmu_event_add,
896                         .del            = uncore_pmu_event_del,
897                         .start          = uncore_pmu_event_start,
898                         .stop           = uncore_pmu_event_stop,
899                         .read           = uncore_pmu_event_read,
900                         .module         = THIS_MODULE,
901                         .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
902                         .attr_update    = pmu->type->attr_update,
903                 };
904         } else {
905                 pmu->pmu = *pmu->type->pmu;
906                 pmu->pmu.attr_groups = pmu->type->attr_groups;
907                 pmu->pmu.attr_update = pmu->type->attr_update;
908         }
909
910         uncore_get_pmu_name(pmu);
911
912         ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
913         if (!ret)
914                 pmu->registered = true;
915         return ret;
916 }
917
918 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
919 {
920         if (!pmu->registered)
921                 return;
922         perf_pmu_unregister(&pmu->pmu);
923         pmu->registered = false;
924 }
925
926 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
927 {
928         int die;
929
930         for (die = 0; die < uncore_max_dies(); die++)
931                 kfree(pmu->boxes[die]);
932         kfree(pmu->boxes);
933 }
934
935 static void uncore_type_exit(struct intel_uncore_type *type)
936 {
937         struct intel_uncore_pmu *pmu = type->pmus;
938         int i;
939
940         if (type->cleanup_mapping)
941                 type->cleanup_mapping(type);
942
943         if (pmu) {
944                 for (i = 0; i < type->num_boxes; i++, pmu++) {
945                         uncore_pmu_unregister(pmu);
946                         uncore_free_boxes(pmu);
947                 }
948                 kfree(type->pmus);
949                 type->pmus = NULL;
950         }
951         if (type->box_ids) {
952                 kfree(type->box_ids);
953                 type->box_ids = NULL;
954         }
955         kfree(type->events_group);
956         type->events_group = NULL;
957 }
958
959 static void uncore_types_exit(struct intel_uncore_type **types)
960 {
961         for (; *types; types++)
962                 uncore_type_exit(*types);
963 }
964
965 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
966 {
967         struct intel_uncore_pmu *pmus;
968         size_t size;
969         int i, j;
970
971         pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
972         if (!pmus)
973                 return -ENOMEM;
974
975         size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
976
977         for (i = 0; i < type->num_boxes; i++) {
978                 pmus[i].func_id = setid ? i : -1;
979                 pmus[i].pmu_idx = i;
980                 pmus[i].type    = type;
981                 pmus[i].boxes   = kzalloc(size, GFP_KERNEL);
982                 if (!pmus[i].boxes)
983                         goto err;
984         }
985
986         type->pmus = pmus;
987         type->unconstrainted = (struct event_constraint)
988                 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
989                                 0, type->num_counters, 0, 0);
990
991         if (type->event_descs) {
992                 struct {
993                         struct attribute_group group;
994                         struct attribute *attrs[];
995                 } *attr_group;
996                 for (i = 0; type->event_descs[i].attr.attr.name; i++);
997
998                 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
999                                                                 GFP_KERNEL);
1000                 if (!attr_group)
1001                         goto err;
1002
1003                 attr_group->group.name = "events";
1004                 attr_group->group.attrs = attr_group->attrs;
1005
1006                 for (j = 0; j < i; j++)
1007                         attr_group->attrs[j] = &type->event_descs[j].attr.attr;
1008
1009                 type->events_group = &attr_group->group;
1010         }
1011
1012         type->pmu_group = &uncore_pmu_attr_group;
1013
1014         if (type->set_mapping)
1015                 type->set_mapping(type);
1016
1017         return 0;
1018
1019 err:
1020         for (i = 0; i < type->num_boxes; i++)
1021                 kfree(pmus[i].boxes);
1022         kfree(pmus);
1023
1024         return -ENOMEM;
1025 }
1026
1027 static int __init
1028 uncore_types_init(struct intel_uncore_type **types, bool setid)
1029 {
1030         int ret;
1031
1032         for (; *types; types++) {
1033                 ret = uncore_type_init(*types, setid);
1034                 if (ret)
1035                         return ret;
1036         }
1037         return 0;
1038 }
1039
1040 /*
1041  * Get the die information of a PCI device.
1042  * @pdev: The PCI device.
1043  * @die: The die id which the device maps to.
1044  */
1045 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
1046 {
1047         *die = uncore_pcibus_to_dieid(pdev->bus);
1048         if (*die < 0)
1049                 return -EINVAL;
1050
1051         return 0;
1052 }
1053
1054 static struct intel_uncore_pmu *
1055 uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
1056 {
1057         struct intel_uncore_type **types = uncore_pci_uncores;
1058         struct intel_uncore_type *type;
1059         u64 box_ctl;
1060         int i, die;
1061
1062         for (; *types; types++) {
1063                 type = *types;
1064                 for (die = 0; die < __uncore_max_dies; die++) {
1065                         for (i = 0; i < type->num_boxes; i++) {
1066                                 if (!type->box_ctls[die])
1067                                         continue;
1068                                 box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1069                                 if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) &&
1070                                     pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) &&
1071                                     pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
1072                                         return &type->pmus[i];
1073                         }
1074                 }
1075         }
1076
1077         return NULL;
1078 }
1079
1080 /*
1081  * Find the PMU of a PCI device.
1082  * @pdev: The PCI device.
1083  * @ids: The ID table of the available PCI devices with a PMU.
1084  *       If NULL, search the whole uncore_pci_uncores.
1085  */
1086 static struct intel_uncore_pmu *
1087 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1088 {
1089         struct intel_uncore_pmu *pmu = NULL;
1090         struct intel_uncore_type *type;
1091         kernel_ulong_t data;
1092         unsigned int devfn;
1093
1094         if (!ids)
1095                 return uncore_pci_find_dev_pmu_from_types(pdev);
1096
1097         while (ids && ids->vendor) {
1098                 if ((ids->vendor == pdev->vendor) &&
1099                     (ids->device == pdev->device)) {
1100                         data = ids->driver_data;
1101                         devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1102                                           UNCORE_PCI_DEV_FUNC(data));
1103                         if (devfn == pdev->devfn) {
1104                                 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1105                                 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1106                                 break;
1107                         }
1108                 }
1109                 ids++;
1110         }
1111         return pmu;
1112 }
1113
1114 /*
1115  * Register the PMU for a PCI device
1116  * @pdev: The PCI device.
1117  * @type: The corresponding PMU type of the device.
1118  * @pmu: The corresponding PMU of the device.
1119  * @die: The die id which the device maps to.
1120  */
1121 static int uncore_pci_pmu_register(struct pci_dev *pdev,
1122                                    struct intel_uncore_type *type,
1123                                    struct intel_uncore_pmu *pmu,
1124                                    int die)
1125 {
1126         struct intel_uncore_box *box;
1127         int ret;
1128
1129         if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1130                 return -EINVAL;
1131
1132         box = uncore_alloc_box(type, NUMA_NO_NODE);
1133         if (!box)
1134                 return -ENOMEM;
1135
1136         if (pmu->func_id < 0)
1137                 pmu->func_id = pdev->devfn;
1138         else
1139                 WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1140
1141         atomic_inc(&box->refcnt);
1142         box->dieid = die;
1143         box->pci_dev = pdev;
1144         box->pmu = pmu;
1145         uncore_box_init(box);
1146
1147         pmu->boxes[die] = box;
1148         if (atomic_inc_return(&pmu->activeboxes) > 1)
1149                 return 0;
1150
1151         /* First active box registers the pmu */
1152         ret = uncore_pmu_register(pmu);
1153         if (ret) {
1154                 pmu->boxes[die] = NULL;
1155                 uncore_box_exit(box);
1156                 kfree(box);
1157         }
1158         return ret;
1159 }
1160
1161 /*
1162  * add a pci uncore device
1163  */
1164 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1165 {
1166         struct intel_uncore_type *type;
1167         struct intel_uncore_pmu *pmu = NULL;
1168         int die, ret;
1169
1170         ret = uncore_pci_get_dev_die_info(pdev, &die);
1171         if (ret)
1172                 return ret;
1173
1174         if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1175                 int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1176
1177                 uncore_extra_pci_dev[die].dev[idx] = pdev;
1178                 pci_set_drvdata(pdev, NULL);
1179                 return 0;
1180         }
1181
1182         type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1183
1184         /*
1185          * Some platforms, e.g.  Knights Landing, use a common PCI device ID
1186          * for multiple instances of an uncore PMU device type. We should check
1187          * PCI slot and func to indicate the uncore box.
1188          */
1189         if (id->driver_data & ~0xffff) {
1190                 struct pci_driver *pci_drv = pdev->driver;
1191
1192                 pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1193                 if (pmu == NULL)
1194                         return -ENODEV;
1195         } else {
1196                 /*
1197                  * for performance monitoring unit with multiple boxes,
1198                  * each box has a different function id.
1199                  */
1200                 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1201         }
1202
1203         ret = uncore_pci_pmu_register(pdev, type, pmu, die);
1204
1205         pci_set_drvdata(pdev, pmu->boxes[die]);
1206
1207         return ret;
1208 }
1209
1210 /*
1211  * Unregister the PMU of a PCI device
1212  * @pmu: The corresponding PMU is unregistered.
1213  * @die: The die id which the device maps to.
1214  */
1215 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
1216 {
1217         struct intel_uncore_box *box = pmu->boxes[die];
1218
1219         pmu->boxes[die] = NULL;
1220         if (atomic_dec_return(&pmu->activeboxes) == 0)
1221                 uncore_pmu_unregister(pmu);
1222         uncore_box_exit(box);
1223         kfree(box);
1224 }
1225
1226 static void uncore_pci_remove(struct pci_dev *pdev)
1227 {
1228         struct intel_uncore_box *box;
1229         struct intel_uncore_pmu *pmu;
1230         int i, die;
1231
1232         if (uncore_pci_get_dev_die_info(pdev, &die))
1233                 return;
1234
1235         box = pci_get_drvdata(pdev);
1236         if (!box) {
1237                 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1238                         if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1239                                 uncore_extra_pci_dev[die].dev[i] = NULL;
1240                                 break;
1241                         }
1242                 }
1243                 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1244                 return;
1245         }
1246
1247         pmu = box->pmu;
1248
1249         pci_set_drvdata(pdev, NULL);
1250
1251         uncore_pci_pmu_unregister(pmu, die);
1252 }
1253
1254 static int uncore_bus_notify(struct notifier_block *nb,
1255                              unsigned long action, void *data,
1256                              const struct pci_device_id *ids)
1257 {
1258         struct device *dev = data;
1259         struct pci_dev *pdev = to_pci_dev(dev);
1260         struct intel_uncore_pmu *pmu;
1261         int die;
1262
1263         /* Unregister the PMU when the device is going to be deleted. */
1264         if (action != BUS_NOTIFY_DEL_DEVICE)
1265                 return NOTIFY_DONE;
1266
1267         pmu = uncore_pci_find_dev_pmu(pdev, ids);
1268         if (!pmu)
1269                 return NOTIFY_DONE;
1270
1271         if (uncore_pci_get_dev_die_info(pdev, &die))
1272                 return NOTIFY_DONE;
1273
1274         uncore_pci_pmu_unregister(pmu, die);
1275
1276         return NOTIFY_OK;
1277 }
1278
1279 static int uncore_pci_sub_bus_notify(struct notifier_block *nb,
1280                                      unsigned long action, void *data)
1281 {
1282         return uncore_bus_notify(nb, action, data,
1283                                  uncore_pci_sub_driver->id_table);
1284 }
1285
1286 static struct notifier_block uncore_pci_sub_notifier = {
1287         .notifier_call = uncore_pci_sub_bus_notify,
1288 };
1289
1290 static void uncore_pci_sub_driver_init(void)
1291 {
1292         const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
1293         struct intel_uncore_type *type;
1294         struct intel_uncore_pmu *pmu;
1295         struct pci_dev *pci_sub_dev;
1296         bool notify = false;
1297         unsigned int devfn;
1298         int die;
1299
1300         while (ids && ids->vendor) {
1301                 pci_sub_dev = NULL;
1302                 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
1303                 /*
1304                  * Search the available device, and register the
1305                  * corresponding PMU.
1306                  */
1307                 while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
1308                                                      ids->device, pci_sub_dev))) {
1309                         devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
1310                                           UNCORE_PCI_DEV_FUNC(ids->driver_data));
1311                         if (devfn != pci_sub_dev->devfn)
1312                                 continue;
1313
1314                         pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
1315                         if (!pmu)
1316                                 continue;
1317
1318                         if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
1319                                 continue;
1320
1321                         if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
1322                                                      die))
1323                                 notify = true;
1324                 }
1325                 ids++;
1326         }
1327
1328         if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier))
1329                 notify = false;
1330
1331         if (!notify)
1332                 uncore_pci_sub_driver = NULL;
1333 }
1334
1335 static int uncore_pci_bus_notify(struct notifier_block *nb,
1336                                      unsigned long action, void *data)
1337 {
1338         return uncore_bus_notify(nb, action, data, NULL);
1339 }
1340
1341 static struct notifier_block uncore_pci_notifier = {
1342         .notifier_call = uncore_pci_bus_notify,
1343 };
1344
1345
1346 static void uncore_pci_pmus_register(void)
1347 {
1348         struct intel_uncore_type **types = uncore_pci_uncores;
1349         struct intel_uncore_type *type;
1350         struct intel_uncore_pmu *pmu;
1351         struct pci_dev *pdev;
1352         u64 box_ctl;
1353         int i, die;
1354
1355         for (; *types; types++) {
1356                 type = *types;
1357                 for (die = 0; die < __uncore_max_dies; die++) {
1358                         for (i = 0; i < type->num_boxes; i++) {
1359                                 if (!type->box_ctls[die])
1360                                         continue;
1361                                 box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1362                                 pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl),
1363                                                                    UNCORE_DISCOVERY_PCI_BUS(box_ctl),
1364                                                                    UNCORE_DISCOVERY_PCI_DEVFN(box_ctl));
1365                                 if (!pdev)
1366                                         continue;
1367                                 pmu = &type->pmus[i];
1368
1369                                 uncore_pci_pmu_register(pdev, type, pmu, die);
1370                         }
1371                 }
1372         }
1373
1374         bus_register_notifier(&pci_bus_type, &uncore_pci_notifier);
1375 }
1376
1377 static int __init uncore_pci_init(void)
1378 {
1379         size_t size;
1380         int ret;
1381
1382         size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1383         uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1384         if (!uncore_extra_pci_dev) {
1385                 ret = -ENOMEM;
1386                 goto err;
1387         }
1388
1389         ret = uncore_types_init(uncore_pci_uncores, false);
1390         if (ret)
1391                 goto errtype;
1392
1393         if (uncore_pci_driver) {
1394                 uncore_pci_driver->probe = uncore_pci_probe;
1395                 uncore_pci_driver->remove = uncore_pci_remove;
1396
1397                 ret = pci_register_driver(uncore_pci_driver);
1398                 if (ret)
1399                         goto errtype;
1400         } else
1401                 uncore_pci_pmus_register();
1402
1403         if (uncore_pci_sub_driver)
1404                 uncore_pci_sub_driver_init();
1405
1406         pcidrv_registered = true;
1407         return 0;
1408
1409 errtype:
1410         uncore_types_exit(uncore_pci_uncores);
1411         kfree(uncore_extra_pci_dev);
1412         uncore_extra_pci_dev = NULL;
1413         uncore_free_pcibus_map();
1414 err:
1415         uncore_pci_uncores = empty_uncore;
1416         return ret;
1417 }
1418
1419 static void uncore_pci_exit(void)
1420 {
1421         if (pcidrv_registered) {
1422                 pcidrv_registered = false;
1423                 if (uncore_pci_sub_driver)
1424                         bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier);
1425                 if (uncore_pci_driver)
1426                         pci_unregister_driver(uncore_pci_driver);
1427                 else
1428                         bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier);
1429                 uncore_types_exit(uncore_pci_uncores);
1430                 kfree(uncore_extra_pci_dev);
1431                 uncore_free_pcibus_map();
1432         }
1433 }
1434
1435 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1436                                    int new_cpu)
1437 {
1438         struct intel_uncore_pmu *pmu = type->pmus;
1439         struct intel_uncore_box *box;
1440         int i, die;
1441
1442         die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1443         for (i = 0; i < type->num_boxes; i++, pmu++) {
1444                 box = pmu->boxes[die];
1445                 if (!box)
1446                         continue;
1447
1448                 if (old_cpu < 0) {
1449                         WARN_ON_ONCE(box->cpu != -1);
1450                         box->cpu = new_cpu;
1451                         continue;
1452                 }
1453
1454                 WARN_ON_ONCE(box->cpu != old_cpu);
1455                 box->cpu = -1;
1456                 if (new_cpu < 0)
1457                         continue;
1458
1459                 uncore_pmu_cancel_hrtimer(box);
1460                 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1461                 box->cpu = new_cpu;
1462         }
1463 }
1464
1465 static void uncore_change_context(struct intel_uncore_type **uncores,
1466                                   int old_cpu, int new_cpu)
1467 {
1468         for (; *uncores; uncores++)
1469                 uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1470 }
1471
1472 static void uncore_box_unref(struct intel_uncore_type **types, int id)
1473 {
1474         struct intel_uncore_type *type;
1475         struct intel_uncore_pmu *pmu;
1476         struct intel_uncore_box *box;
1477         int i;
1478
1479         for (; *types; types++) {
1480                 type = *types;
1481                 pmu = type->pmus;
1482                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1483                         box = pmu->boxes[id];
1484                         if (box && atomic_dec_return(&box->refcnt) == 0)
1485                                 uncore_box_exit(box);
1486                 }
1487         }
1488 }
1489
1490 static int uncore_event_cpu_offline(unsigned int cpu)
1491 {
1492         int die, target;
1493
1494         /* Check if exiting cpu is used for collecting uncore events */
1495         if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1496                 goto unref;
1497         /* Find a new cpu to collect uncore events */
1498         target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1499
1500         /* Migrate uncore events to the new target */
1501         if (target < nr_cpu_ids)
1502                 cpumask_set_cpu(target, &uncore_cpu_mask);
1503         else
1504                 target = -1;
1505
1506         uncore_change_context(uncore_msr_uncores, cpu, target);
1507         uncore_change_context(uncore_mmio_uncores, cpu, target);
1508         uncore_change_context(uncore_pci_uncores, cpu, target);
1509
1510 unref:
1511         /* Clear the references */
1512         die = topology_logical_die_id(cpu);
1513         uncore_box_unref(uncore_msr_uncores, die);
1514         uncore_box_unref(uncore_mmio_uncores, die);
1515         return 0;
1516 }
1517
1518 static int allocate_boxes(struct intel_uncore_type **types,
1519                          unsigned int die, unsigned int cpu)
1520 {
1521         struct intel_uncore_box *box, *tmp;
1522         struct intel_uncore_type *type;
1523         struct intel_uncore_pmu *pmu;
1524         LIST_HEAD(allocated);
1525         int i;
1526
1527         /* Try to allocate all required boxes */
1528         for (; *types; types++) {
1529                 type = *types;
1530                 pmu = type->pmus;
1531                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1532                         if (pmu->boxes[die])
1533                                 continue;
1534                         box = uncore_alloc_box(type, cpu_to_node(cpu));
1535                         if (!box)
1536                                 goto cleanup;
1537                         box->pmu = pmu;
1538                         box->dieid = die;
1539                         list_add(&box->active_list, &allocated);
1540                 }
1541         }
1542         /* Install them in the pmus */
1543         list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1544                 list_del_init(&box->active_list);
1545                 box->pmu->boxes[die] = box;
1546         }
1547         return 0;
1548
1549 cleanup:
1550         list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1551                 list_del_init(&box->active_list);
1552                 kfree(box);
1553         }
1554         return -ENOMEM;
1555 }
1556
1557 static int uncore_box_ref(struct intel_uncore_type **types,
1558                           int id, unsigned int cpu)
1559 {
1560         struct intel_uncore_type *type;
1561         struct intel_uncore_pmu *pmu;
1562         struct intel_uncore_box *box;
1563         int i, ret;
1564
1565         ret = allocate_boxes(types, id, cpu);
1566         if (ret)
1567                 return ret;
1568
1569         for (; *types; types++) {
1570                 type = *types;
1571                 pmu = type->pmus;
1572                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1573                         box = pmu->boxes[id];
1574                         if (box && atomic_inc_return(&box->refcnt) == 1)
1575                                 uncore_box_init(box);
1576                 }
1577         }
1578         return 0;
1579 }
1580
1581 static int uncore_event_cpu_online(unsigned int cpu)
1582 {
1583         int die, target, msr_ret, mmio_ret;
1584
1585         die = topology_logical_die_id(cpu);
1586         msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1587         mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1588         if (msr_ret && mmio_ret)
1589                 return -ENOMEM;
1590
1591         /*
1592          * Check if there is an online cpu in the package
1593          * which collects uncore events already.
1594          */
1595         target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1596         if (target < nr_cpu_ids)
1597                 return 0;
1598
1599         cpumask_set_cpu(cpu, &uncore_cpu_mask);
1600
1601         if (!msr_ret)
1602                 uncore_change_context(uncore_msr_uncores, -1, cpu);
1603         if (!mmio_ret)
1604                 uncore_change_context(uncore_mmio_uncores, -1, cpu);
1605         uncore_change_context(uncore_pci_uncores, -1, cpu);
1606         return 0;
1607 }
1608
1609 static int __init type_pmu_register(struct intel_uncore_type *type)
1610 {
1611         int i, ret;
1612
1613         for (i = 0; i < type->num_boxes; i++) {
1614                 ret = uncore_pmu_register(&type->pmus[i]);
1615                 if (ret)
1616                         return ret;
1617         }
1618         return 0;
1619 }
1620
1621 static int __init uncore_msr_pmus_register(void)
1622 {
1623         struct intel_uncore_type **types = uncore_msr_uncores;
1624         int ret;
1625
1626         for (; *types; types++) {
1627                 ret = type_pmu_register(*types);
1628                 if (ret)
1629                         return ret;
1630         }
1631         return 0;
1632 }
1633
1634 static int __init uncore_cpu_init(void)
1635 {
1636         int ret;
1637
1638         ret = uncore_types_init(uncore_msr_uncores, true);
1639         if (ret)
1640                 goto err;
1641
1642         ret = uncore_msr_pmus_register();
1643         if (ret)
1644                 goto err;
1645         return 0;
1646 err:
1647         uncore_types_exit(uncore_msr_uncores);
1648         uncore_msr_uncores = empty_uncore;
1649         return ret;
1650 }
1651
1652 static int __init uncore_mmio_init(void)
1653 {
1654         struct intel_uncore_type **types = uncore_mmio_uncores;
1655         int ret;
1656
1657         ret = uncore_types_init(types, true);
1658         if (ret)
1659                 goto err;
1660
1661         for (; *types; types++) {
1662                 ret = type_pmu_register(*types);
1663                 if (ret)
1664                         goto err;
1665         }
1666         return 0;
1667 err:
1668         uncore_types_exit(uncore_mmio_uncores);
1669         uncore_mmio_uncores = empty_uncore;
1670         return ret;
1671 }
1672
1673 struct intel_uncore_init_fun {
1674         void    (*cpu_init)(void);
1675         int     (*pci_init)(void);
1676         void    (*mmio_init)(void);
1677         bool    use_discovery;
1678 };
1679
1680 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1681         .cpu_init = nhm_uncore_cpu_init,
1682 };
1683
1684 static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1685         .cpu_init = snb_uncore_cpu_init,
1686         .pci_init = snb_uncore_pci_init,
1687 };
1688
1689 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1690         .cpu_init = snb_uncore_cpu_init,
1691         .pci_init = ivb_uncore_pci_init,
1692 };
1693
1694 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1695         .cpu_init = snb_uncore_cpu_init,
1696         .pci_init = hsw_uncore_pci_init,
1697 };
1698
1699 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1700         .cpu_init = snb_uncore_cpu_init,
1701         .pci_init = bdw_uncore_pci_init,
1702 };
1703
1704 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1705         .cpu_init = snbep_uncore_cpu_init,
1706         .pci_init = snbep_uncore_pci_init,
1707 };
1708
1709 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1710         .cpu_init = nhmex_uncore_cpu_init,
1711 };
1712
1713 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1714         .cpu_init = ivbep_uncore_cpu_init,
1715         .pci_init = ivbep_uncore_pci_init,
1716 };
1717
1718 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1719         .cpu_init = hswep_uncore_cpu_init,
1720         .pci_init = hswep_uncore_pci_init,
1721 };
1722
1723 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1724         .cpu_init = bdx_uncore_cpu_init,
1725         .pci_init = bdx_uncore_pci_init,
1726 };
1727
1728 static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1729         .cpu_init = knl_uncore_cpu_init,
1730         .pci_init = knl_uncore_pci_init,
1731 };
1732
1733 static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1734         .cpu_init = skl_uncore_cpu_init,
1735         .pci_init = skl_uncore_pci_init,
1736 };
1737
1738 static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1739         .cpu_init = skx_uncore_cpu_init,
1740         .pci_init = skx_uncore_pci_init,
1741 };
1742
1743 static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1744         .cpu_init = icl_uncore_cpu_init,
1745         .pci_init = skl_uncore_pci_init,
1746 };
1747
1748 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1749         .cpu_init = tgl_uncore_cpu_init,
1750         .mmio_init = tgl_uncore_mmio_init,
1751 };
1752
1753 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1754         .cpu_init = tgl_uncore_cpu_init,
1755         .mmio_init = tgl_l_uncore_mmio_init,
1756 };
1757
1758 static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
1759         .cpu_init = tgl_uncore_cpu_init,
1760         .pci_init = skl_uncore_pci_init,
1761 };
1762
1763 static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
1764         .cpu_init = adl_uncore_cpu_init,
1765         .mmio_init = tgl_uncore_mmio_init,
1766 };
1767
1768 static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1769         .cpu_init = icx_uncore_cpu_init,
1770         .pci_init = icx_uncore_pci_init,
1771         .mmio_init = icx_uncore_mmio_init,
1772 };
1773
1774 static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1775         .cpu_init = snr_uncore_cpu_init,
1776         .pci_init = snr_uncore_pci_init,
1777         .mmio_init = snr_uncore_mmio_init,
1778 };
1779
1780 static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
1781         .cpu_init = spr_uncore_cpu_init,
1782         .pci_init = spr_uncore_pci_init,
1783         .mmio_init = spr_uncore_mmio_init,
1784         .use_discovery = true,
1785 };
1786
1787 static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
1788         .cpu_init = intel_uncore_generic_uncore_cpu_init,
1789         .pci_init = intel_uncore_generic_uncore_pci_init,
1790         .mmio_init = intel_uncore_generic_uncore_mmio_init,
1791 };
1792
1793 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1794         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &nhm_uncore_init),
1795         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &nhm_uncore_init),
1796         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &nhm_uncore_init),
1797         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &nhm_uncore_init),
1798         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &snb_uncore_init),
1799         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &ivb_uncore_init),
1800         X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &hsw_uncore_init),
1801         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &hsw_uncore_init),
1802         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &hsw_uncore_init),
1803         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &bdw_uncore_init),
1804         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &bdw_uncore_init),
1805         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &snbep_uncore_init),
1806         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &nhmex_uncore_init),
1807         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &nhmex_uncore_init),
1808         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &ivbep_uncore_init),
1809         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &hswep_uncore_init),
1810         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &bdx_uncore_init),
1811         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &bdx_uncore_init),
1812         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &knl_uncore_init),
1813         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &knl_uncore_init),
1814         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &skl_uncore_init),
1815         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &skl_uncore_init),
1816         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &skx_uncore_init),
1817         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &skl_uncore_init),
1818         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &skl_uncore_init),
1819         X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,         &skl_uncore_init),
1820         X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,           &skl_uncore_init),
1821         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           &icl_uncore_init),
1822         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,        &icl_uncore_init),
1823         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,             &icl_uncore_init),
1824         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &icx_uncore_init),
1825         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &icx_uncore_init),
1826         X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         &tgl_l_uncore_init),
1827         X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           &tgl_uncore_init),
1828         X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &rkl_uncore_init),
1829         X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &adl_uncore_init),
1830         X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &adl_uncore_init),
1831         X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &spr_uncore_init),
1832         X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &snr_uncore_init),
1833         {},
1834 };
1835 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1836
1837 static int __init intel_uncore_init(void)
1838 {
1839         const struct x86_cpu_id *id;
1840         struct intel_uncore_init_fun *uncore_init;
1841         int pret = 0, cret = 0, mret = 0, ret;
1842
1843         if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1844                 return -ENODEV;
1845
1846         __uncore_max_dies =
1847                 topology_max_packages() * topology_max_die_per_package();
1848
1849         id = x86_match_cpu(intel_uncore_match);
1850         if (!id) {
1851                 if (!uncore_no_discover && intel_uncore_has_discovery_tables())
1852                         uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
1853                 else
1854                         return -ENODEV;
1855         } else {
1856                 uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1857                 if (uncore_no_discover && uncore_init->use_discovery)
1858                         return -ENODEV;
1859                 if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
1860                         return -ENODEV;
1861         }
1862
1863         if (uncore_init->pci_init) {
1864                 pret = uncore_init->pci_init();
1865                 if (!pret)
1866                         pret = uncore_pci_init();
1867         }
1868
1869         if (uncore_init->cpu_init) {
1870                 uncore_init->cpu_init();
1871                 cret = uncore_cpu_init();
1872         }
1873
1874         if (uncore_init->mmio_init) {
1875                 uncore_init->mmio_init();
1876                 mret = uncore_mmio_init();
1877         }
1878
1879         if (cret && pret && mret) {
1880                 ret = -ENODEV;
1881                 goto free_discovery;
1882         }
1883
1884         /* Install hotplug callbacks to setup the targets for each package */
1885         ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1886                                 "perf/x86/intel/uncore:online",
1887                                 uncore_event_cpu_online,
1888                                 uncore_event_cpu_offline);
1889         if (ret)
1890                 goto err;
1891         return 0;
1892
1893 err:
1894         uncore_types_exit(uncore_msr_uncores);
1895         uncore_types_exit(uncore_mmio_uncores);
1896         uncore_pci_exit();
1897 free_discovery:
1898         intel_uncore_clear_discovery_tables();
1899         return ret;
1900 }
1901 module_init(intel_uncore_init);
1902
1903 static void __exit intel_uncore_exit(void)
1904 {
1905         cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1906         uncore_types_exit(uncore_msr_uncores);
1907         uncore_types_exit(uncore_mmio_uncores);
1908         uncore_pci_exit();
1909         intel_uncore_clear_discovery_tables();
1910 }
1911 module_exit(intel_uncore_exit);