Merge tag 'seccomp-v5.14-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/kees...
[linux-2.6-microblaze.git] / arch / x86 / events / intel / uncore.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/module.h>
3
4 #include <asm/cpu_device_id.h>
5 #include <asm/intel-family.h>
6 #include "uncore.h"
7 #include "uncore_discovery.h"
8
9 static bool uncore_no_discover;
10 module_param(uncore_no_discover, bool, 0);
11 MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism "
12                                      "(default: enable the discovery mechanism).");
13 struct intel_uncore_type *empty_uncore[] = { NULL, };
14 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
15 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
16 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
17
18 static bool pcidrv_registered;
19 struct pci_driver *uncore_pci_driver;
20 /* The PCI driver for the device which the uncore doesn't own. */
21 struct pci_driver *uncore_pci_sub_driver;
22 /* pci bus to socket mapping */
23 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
24 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
25 struct pci_extra_dev *uncore_extra_pci_dev;
26 int __uncore_max_dies;
27
28 /* mask of cpus that collect uncore events */
29 static cpumask_t uncore_cpu_mask;
30
31 /* constraint for the fixed counter */
32 static struct event_constraint uncore_constraint_fixed =
33         EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
34 struct event_constraint uncore_constraint_empty =
35         EVENT_CONSTRAINT(0, 0, 0);
36
37 MODULE_LICENSE("GPL");
38
39 int uncore_pcibus_to_dieid(struct pci_bus *bus)
40 {
41         struct pci2phy_map *map;
42         int die_id = -1;
43
44         raw_spin_lock(&pci2phy_map_lock);
45         list_for_each_entry(map, &pci2phy_map_head, list) {
46                 if (map->segment == pci_domain_nr(bus)) {
47                         die_id = map->pbus_to_dieid[bus->number];
48                         break;
49                 }
50         }
51         raw_spin_unlock(&pci2phy_map_lock);
52
53         return die_id;
54 }
55
56 int uncore_die_to_segment(int die)
57 {
58         struct pci_bus *bus = NULL;
59
60         /* Find first pci bus which attributes to specified die. */
61         while ((bus = pci_find_next_bus(bus)) &&
62                (die != uncore_pcibus_to_dieid(bus)))
63                 ;
64
65         return bus ? pci_domain_nr(bus) : -EINVAL;
66 }
67
68 static void uncore_free_pcibus_map(void)
69 {
70         struct pci2phy_map *map, *tmp;
71
72         list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
73                 list_del(&map->list);
74                 kfree(map);
75         }
76 }
77
78 struct pci2phy_map *__find_pci2phy_map(int segment)
79 {
80         struct pci2phy_map *map, *alloc = NULL;
81         int i;
82
83         lockdep_assert_held(&pci2phy_map_lock);
84
85 lookup:
86         list_for_each_entry(map, &pci2phy_map_head, list) {
87                 if (map->segment == segment)
88                         goto end;
89         }
90
91         if (!alloc) {
92                 raw_spin_unlock(&pci2phy_map_lock);
93                 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
94                 raw_spin_lock(&pci2phy_map_lock);
95
96                 if (!alloc)
97                         return NULL;
98
99                 goto lookup;
100         }
101
102         map = alloc;
103         alloc = NULL;
104         map->segment = segment;
105         for (i = 0; i < 256; i++)
106                 map->pbus_to_dieid[i] = -1;
107         list_add_tail(&map->list, &pci2phy_map_head);
108
109 end:
110         kfree(alloc);
111         return map;
112 }
113
114 ssize_t uncore_event_show(struct device *dev,
115                           struct device_attribute *attr, char *buf)
116 {
117         struct uncore_event_desc *event =
118                 container_of(attr, struct uncore_event_desc, attr);
119         return sprintf(buf, "%s", event->config);
120 }
121
122 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
123 {
124         unsigned int dieid = topology_logical_die_id(cpu);
125
126         /*
127          * The unsigned check also catches the '-1' return value for non
128          * existent mappings in the topology map.
129          */
130         return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
131 }
132
133 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
134 {
135         u64 count;
136
137         rdmsrl(event->hw.event_base, count);
138
139         return count;
140 }
141
142 void uncore_mmio_exit_box(struct intel_uncore_box *box)
143 {
144         if (box->io_addr)
145                 iounmap(box->io_addr);
146 }
147
148 u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
149                              struct perf_event *event)
150 {
151         if (!box->io_addr)
152                 return 0;
153
154         if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
155                 return 0;
156
157         return readq(box->io_addr + event->hw.event_base);
158 }
159
160 /*
161  * generic get constraint function for shared match/mask registers.
162  */
163 struct event_constraint *
164 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
165 {
166         struct intel_uncore_extra_reg *er;
167         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
168         struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
169         unsigned long flags;
170         bool ok = false;
171
172         /*
173          * reg->alloc can be set due to existing state, so for fake box we
174          * need to ignore this, otherwise we might fail to allocate proper
175          * fake state for this extra reg constraint.
176          */
177         if (reg1->idx == EXTRA_REG_NONE ||
178             (!uncore_box_is_fake(box) && reg1->alloc))
179                 return NULL;
180
181         er = &box->shared_regs[reg1->idx];
182         raw_spin_lock_irqsave(&er->lock, flags);
183         if (!atomic_read(&er->ref) ||
184             (er->config1 == reg1->config && er->config2 == reg2->config)) {
185                 atomic_inc(&er->ref);
186                 er->config1 = reg1->config;
187                 er->config2 = reg2->config;
188                 ok = true;
189         }
190         raw_spin_unlock_irqrestore(&er->lock, flags);
191
192         if (ok) {
193                 if (!uncore_box_is_fake(box))
194                         reg1->alloc = 1;
195                 return NULL;
196         }
197
198         return &uncore_constraint_empty;
199 }
200
201 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
202 {
203         struct intel_uncore_extra_reg *er;
204         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
205
206         /*
207          * Only put constraint if extra reg was actually allocated. Also
208          * takes care of event which do not use an extra shared reg.
209          *
210          * Also, if this is a fake box we shouldn't touch any event state
211          * (reg->alloc) and we don't care about leaving inconsistent box
212          * state either since it will be thrown out.
213          */
214         if (uncore_box_is_fake(box) || !reg1->alloc)
215                 return;
216
217         er = &box->shared_regs[reg1->idx];
218         atomic_dec(&er->ref);
219         reg1->alloc = 0;
220 }
221
222 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
223 {
224         struct intel_uncore_extra_reg *er;
225         unsigned long flags;
226         u64 config;
227
228         er = &box->shared_regs[idx];
229
230         raw_spin_lock_irqsave(&er->lock, flags);
231         config = er->config;
232         raw_spin_unlock_irqrestore(&er->lock, flags);
233
234         return config;
235 }
236
237 static void uncore_assign_hw_event(struct intel_uncore_box *box,
238                                    struct perf_event *event, int idx)
239 {
240         struct hw_perf_event *hwc = &event->hw;
241
242         hwc->idx = idx;
243         hwc->last_tag = ++box->tags[idx];
244
245         if (uncore_pmc_fixed(hwc->idx)) {
246                 hwc->event_base = uncore_fixed_ctr(box);
247                 hwc->config_base = uncore_fixed_ctl(box);
248                 return;
249         }
250
251         hwc->config_base = uncore_event_ctl(box, hwc->idx);
252         hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
253 }
254
255 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
256 {
257         u64 prev_count, new_count, delta;
258         int shift;
259
260         if (uncore_pmc_freerunning(event->hw.idx))
261                 shift = 64 - uncore_freerunning_bits(box, event);
262         else if (uncore_pmc_fixed(event->hw.idx))
263                 shift = 64 - uncore_fixed_ctr_bits(box);
264         else
265                 shift = 64 - uncore_perf_ctr_bits(box);
266
267         /* the hrtimer might modify the previous event value */
268 again:
269         prev_count = local64_read(&event->hw.prev_count);
270         new_count = uncore_read_counter(box, event);
271         if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
272                 goto again;
273
274         delta = (new_count << shift) - (prev_count << shift);
275         delta >>= shift;
276
277         local64_add(delta, &event->count);
278 }
279
280 /*
281  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
282  * for SandyBridge. So we use hrtimer to periodically poll the counter
283  * to avoid overflow.
284  */
285 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
286 {
287         struct intel_uncore_box *box;
288         struct perf_event *event;
289         unsigned long flags;
290         int bit;
291
292         box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
293         if (!box->n_active || box->cpu != smp_processor_id())
294                 return HRTIMER_NORESTART;
295         /*
296          * disable local interrupt to prevent uncore_pmu_event_start/stop
297          * to interrupt the update process
298          */
299         local_irq_save(flags);
300
301         /*
302          * handle boxes with an active event list as opposed to active
303          * counters
304          */
305         list_for_each_entry(event, &box->active_list, active_entry) {
306                 uncore_perf_event_update(box, event);
307         }
308
309         for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
310                 uncore_perf_event_update(box, box->events[bit]);
311
312         local_irq_restore(flags);
313
314         hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
315         return HRTIMER_RESTART;
316 }
317
318 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
319 {
320         hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
321                       HRTIMER_MODE_REL_PINNED);
322 }
323
324 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
325 {
326         hrtimer_cancel(&box->hrtimer);
327 }
328
329 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
330 {
331         hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
332         box->hrtimer.function = uncore_pmu_hrtimer;
333 }
334
335 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
336                                                  int node)
337 {
338         int i, size, numshared = type->num_shared_regs ;
339         struct intel_uncore_box *box;
340
341         size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
342
343         box = kzalloc_node(size, GFP_KERNEL, node);
344         if (!box)
345                 return NULL;
346
347         for (i = 0; i < numshared; i++)
348                 raw_spin_lock_init(&box->shared_regs[i].lock);
349
350         uncore_pmu_init_hrtimer(box);
351         box->cpu = -1;
352         box->dieid = -1;
353
354         /* set default hrtimer timeout */
355         box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
356
357         INIT_LIST_HEAD(&box->active_list);
358
359         return box;
360 }
361
362 /*
363  * Using uncore_pmu_event_init pmu event_init callback
364  * as a detection point for uncore events.
365  */
366 static int uncore_pmu_event_init(struct perf_event *event);
367
368 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
369 {
370         return &box->pmu->pmu == event->pmu;
371 }
372
373 static int
374 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
375                       bool dogrp)
376 {
377         struct perf_event *event;
378         int n, max_count;
379
380         max_count = box->pmu->type->num_counters;
381         if (box->pmu->type->fixed_ctl)
382                 max_count++;
383
384         if (box->n_events >= max_count)
385                 return -EINVAL;
386
387         n = box->n_events;
388
389         if (is_box_event(box, leader)) {
390                 box->event_list[n] = leader;
391                 n++;
392         }
393
394         if (!dogrp)
395                 return n;
396
397         for_each_sibling_event(event, leader) {
398                 if (!is_box_event(box, event) ||
399                     event->state <= PERF_EVENT_STATE_OFF)
400                         continue;
401
402                 if (n >= max_count)
403                         return -EINVAL;
404
405                 box->event_list[n] = event;
406                 n++;
407         }
408         return n;
409 }
410
411 static struct event_constraint *
412 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
413 {
414         struct intel_uncore_type *type = box->pmu->type;
415         struct event_constraint *c;
416
417         if (type->ops->get_constraint) {
418                 c = type->ops->get_constraint(box, event);
419                 if (c)
420                         return c;
421         }
422
423         if (event->attr.config == UNCORE_FIXED_EVENT)
424                 return &uncore_constraint_fixed;
425
426         if (type->constraints) {
427                 for_each_event_constraint(c, type->constraints) {
428                         if ((event->hw.config & c->cmask) == c->code)
429                                 return c;
430                 }
431         }
432
433         return &type->unconstrainted;
434 }
435
436 static void uncore_put_event_constraint(struct intel_uncore_box *box,
437                                         struct perf_event *event)
438 {
439         if (box->pmu->type->ops->put_constraint)
440                 box->pmu->type->ops->put_constraint(box, event);
441 }
442
443 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
444 {
445         unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
446         struct event_constraint *c;
447         int i, wmin, wmax, ret = 0;
448         struct hw_perf_event *hwc;
449
450         bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
451
452         for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
453                 c = uncore_get_event_constraint(box, box->event_list[i]);
454                 box->event_constraint[i] = c;
455                 wmin = min(wmin, c->weight);
456                 wmax = max(wmax, c->weight);
457         }
458
459         /* fastpath, try to reuse previous register */
460         for (i = 0; i < n; i++) {
461                 hwc = &box->event_list[i]->hw;
462                 c = box->event_constraint[i];
463
464                 /* never assigned */
465                 if (hwc->idx == -1)
466                         break;
467
468                 /* constraint still honored */
469                 if (!test_bit(hwc->idx, c->idxmsk))
470                         break;
471
472                 /* not already used */
473                 if (test_bit(hwc->idx, used_mask))
474                         break;
475
476                 __set_bit(hwc->idx, used_mask);
477                 if (assign)
478                         assign[i] = hwc->idx;
479         }
480         /* slow path */
481         if (i != n)
482                 ret = perf_assign_events(box->event_constraint, n,
483                                          wmin, wmax, n, assign);
484
485         if (!assign || ret) {
486                 for (i = 0; i < n; i++)
487                         uncore_put_event_constraint(box, box->event_list[i]);
488         }
489         return ret ? -EINVAL : 0;
490 }
491
492 void uncore_pmu_event_start(struct perf_event *event, int flags)
493 {
494         struct intel_uncore_box *box = uncore_event_to_box(event);
495         int idx = event->hw.idx;
496
497         if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
498                 return;
499
500         /*
501          * Free running counter is read-only and always active.
502          * Use the current counter value as start point.
503          * There is no overflow interrupt for free running counter.
504          * Use hrtimer to periodically poll the counter to avoid overflow.
505          */
506         if (uncore_pmc_freerunning(event->hw.idx)) {
507                 list_add_tail(&event->active_entry, &box->active_list);
508                 local64_set(&event->hw.prev_count,
509                             uncore_read_counter(box, event));
510                 if (box->n_active++ == 0)
511                         uncore_pmu_start_hrtimer(box);
512                 return;
513         }
514
515         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
516                 return;
517
518         event->hw.state = 0;
519         box->events[idx] = event;
520         box->n_active++;
521         __set_bit(idx, box->active_mask);
522
523         local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
524         uncore_enable_event(box, event);
525
526         if (box->n_active == 1)
527                 uncore_pmu_start_hrtimer(box);
528 }
529
530 void uncore_pmu_event_stop(struct perf_event *event, int flags)
531 {
532         struct intel_uncore_box *box = uncore_event_to_box(event);
533         struct hw_perf_event *hwc = &event->hw;
534
535         /* Cannot disable free running counter which is read-only */
536         if (uncore_pmc_freerunning(hwc->idx)) {
537                 list_del(&event->active_entry);
538                 if (--box->n_active == 0)
539                         uncore_pmu_cancel_hrtimer(box);
540                 uncore_perf_event_update(box, event);
541                 return;
542         }
543
544         if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
545                 uncore_disable_event(box, event);
546                 box->n_active--;
547                 box->events[hwc->idx] = NULL;
548                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
549                 hwc->state |= PERF_HES_STOPPED;
550
551                 if (box->n_active == 0)
552                         uncore_pmu_cancel_hrtimer(box);
553         }
554
555         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
556                 /*
557                  * Drain the remaining delta count out of a event
558                  * that we are disabling:
559                  */
560                 uncore_perf_event_update(box, event);
561                 hwc->state |= PERF_HES_UPTODATE;
562         }
563 }
564
565 int uncore_pmu_event_add(struct perf_event *event, int flags)
566 {
567         struct intel_uncore_box *box = uncore_event_to_box(event);
568         struct hw_perf_event *hwc = &event->hw;
569         int assign[UNCORE_PMC_IDX_MAX];
570         int i, n, ret;
571
572         if (!box)
573                 return -ENODEV;
574
575         /*
576          * The free funning counter is assigned in event_init().
577          * The free running counter event and free running counter
578          * are 1:1 mapped. It doesn't need to be tracked in event_list.
579          */
580         if (uncore_pmc_freerunning(hwc->idx)) {
581                 if (flags & PERF_EF_START)
582                         uncore_pmu_event_start(event, 0);
583                 return 0;
584         }
585
586         ret = n = uncore_collect_events(box, event, false);
587         if (ret < 0)
588                 return ret;
589
590         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
591         if (!(flags & PERF_EF_START))
592                 hwc->state |= PERF_HES_ARCH;
593
594         ret = uncore_assign_events(box, assign, n);
595         if (ret)
596                 return ret;
597
598         /* save events moving to new counters */
599         for (i = 0; i < box->n_events; i++) {
600                 event = box->event_list[i];
601                 hwc = &event->hw;
602
603                 if (hwc->idx == assign[i] &&
604                         hwc->last_tag == box->tags[assign[i]])
605                         continue;
606                 /*
607                  * Ensure we don't accidentally enable a stopped
608                  * counter simply because we rescheduled.
609                  */
610                 if (hwc->state & PERF_HES_STOPPED)
611                         hwc->state |= PERF_HES_ARCH;
612
613                 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
614         }
615
616         /* reprogram moved events into new counters */
617         for (i = 0; i < n; i++) {
618                 event = box->event_list[i];
619                 hwc = &event->hw;
620
621                 if (hwc->idx != assign[i] ||
622                         hwc->last_tag != box->tags[assign[i]])
623                         uncore_assign_hw_event(box, event, assign[i]);
624                 else if (i < box->n_events)
625                         continue;
626
627                 if (hwc->state & PERF_HES_ARCH)
628                         continue;
629
630                 uncore_pmu_event_start(event, 0);
631         }
632         box->n_events = n;
633
634         return 0;
635 }
636
637 void uncore_pmu_event_del(struct perf_event *event, int flags)
638 {
639         struct intel_uncore_box *box = uncore_event_to_box(event);
640         int i;
641
642         uncore_pmu_event_stop(event, PERF_EF_UPDATE);
643
644         /*
645          * The event for free running counter is not tracked by event_list.
646          * It doesn't need to force event->hw.idx = -1 to reassign the counter.
647          * Because the event and the free running counter are 1:1 mapped.
648          */
649         if (uncore_pmc_freerunning(event->hw.idx))
650                 return;
651
652         for (i = 0; i < box->n_events; i++) {
653                 if (event == box->event_list[i]) {
654                         uncore_put_event_constraint(box, event);
655
656                         for (++i; i < box->n_events; i++)
657                                 box->event_list[i - 1] = box->event_list[i];
658
659                         --box->n_events;
660                         break;
661                 }
662         }
663
664         event->hw.idx = -1;
665         event->hw.last_tag = ~0ULL;
666 }
667
668 void uncore_pmu_event_read(struct perf_event *event)
669 {
670         struct intel_uncore_box *box = uncore_event_to_box(event);
671         uncore_perf_event_update(box, event);
672 }
673
674 /*
675  * validation ensures the group can be loaded onto the
676  * PMU if it was the only group available.
677  */
678 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
679                                 struct perf_event *event)
680 {
681         struct perf_event *leader = event->group_leader;
682         struct intel_uncore_box *fake_box;
683         int ret = -EINVAL, n;
684
685         /* The free running counter is always active. */
686         if (uncore_pmc_freerunning(event->hw.idx))
687                 return 0;
688
689         fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
690         if (!fake_box)
691                 return -ENOMEM;
692
693         fake_box->pmu = pmu;
694         /*
695          * the event is not yet connected with its
696          * siblings therefore we must first collect
697          * existing siblings, then add the new event
698          * before we can simulate the scheduling
699          */
700         n = uncore_collect_events(fake_box, leader, true);
701         if (n < 0)
702                 goto out;
703
704         fake_box->n_events = n;
705         n = uncore_collect_events(fake_box, event, false);
706         if (n < 0)
707                 goto out;
708
709         fake_box->n_events = n;
710
711         ret = uncore_assign_events(fake_box, NULL, n);
712 out:
713         kfree(fake_box);
714         return ret;
715 }
716
717 static int uncore_pmu_event_init(struct perf_event *event)
718 {
719         struct intel_uncore_pmu *pmu;
720         struct intel_uncore_box *box;
721         struct hw_perf_event *hwc = &event->hw;
722         int ret;
723
724         if (event->attr.type != event->pmu->type)
725                 return -ENOENT;
726
727         pmu = uncore_event_to_pmu(event);
728         /* no device found for this pmu */
729         if (pmu->func_id < 0)
730                 return -ENOENT;
731
732         /* Sampling not supported yet */
733         if (hwc->sample_period)
734                 return -EINVAL;
735
736         /*
737          * Place all uncore events for a particular physical package
738          * onto a single cpu
739          */
740         if (event->cpu < 0)
741                 return -EINVAL;
742         box = uncore_pmu_to_box(pmu, event->cpu);
743         if (!box || box->cpu < 0)
744                 return -EINVAL;
745         event->cpu = box->cpu;
746         event->pmu_private = box;
747
748         event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
749
750         event->hw.idx = -1;
751         event->hw.last_tag = ~0ULL;
752         event->hw.extra_reg.idx = EXTRA_REG_NONE;
753         event->hw.branch_reg.idx = EXTRA_REG_NONE;
754
755         if (event->attr.config == UNCORE_FIXED_EVENT) {
756                 /* no fixed counter */
757                 if (!pmu->type->fixed_ctl)
758                         return -EINVAL;
759                 /*
760                  * if there is only one fixed counter, only the first pmu
761                  * can access the fixed counter
762                  */
763                 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
764                         return -EINVAL;
765
766                 /* fixed counters have event field hardcoded to zero */
767                 hwc->config = 0ULL;
768         } else if (is_freerunning_event(event)) {
769                 hwc->config = event->attr.config;
770                 if (!check_valid_freerunning_event(box, event))
771                         return -EINVAL;
772                 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
773                 /*
774                  * The free running counter event and free running counter
775                  * are always 1:1 mapped.
776                  * The free running counter is always active.
777                  * Assign the free running counter here.
778                  */
779                 event->hw.event_base = uncore_freerunning_counter(box, event);
780         } else {
781                 hwc->config = event->attr.config &
782                               (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
783                 if (pmu->type->ops->hw_config) {
784                         ret = pmu->type->ops->hw_config(box, event);
785                         if (ret)
786                                 return ret;
787                 }
788         }
789
790         if (event->group_leader != event)
791                 ret = uncore_validate_group(pmu, event);
792         else
793                 ret = 0;
794
795         return ret;
796 }
797
798 static void uncore_pmu_enable(struct pmu *pmu)
799 {
800         struct intel_uncore_pmu *uncore_pmu;
801         struct intel_uncore_box *box;
802
803         uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
804
805         box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
806         if (!box)
807                 return;
808
809         if (uncore_pmu->type->ops->enable_box)
810                 uncore_pmu->type->ops->enable_box(box);
811 }
812
813 static void uncore_pmu_disable(struct pmu *pmu)
814 {
815         struct intel_uncore_pmu *uncore_pmu;
816         struct intel_uncore_box *box;
817
818         uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
819
820         box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
821         if (!box)
822                 return;
823
824         if (uncore_pmu->type->ops->disable_box)
825                 uncore_pmu->type->ops->disable_box(box);
826 }
827
828 static ssize_t uncore_get_attr_cpumask(struct device *dev,
829                                 struct device_attribute *attr, char *buf)
830 {
831         return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
832 }
833
834 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
835
836 static struct attribute *uncore_pmu_attrs[] = {
837         &dev_attr_cpumask.attr,
838         NULL,
839 };
840
841 static const struct attribute_group uncore_pmu_attr_group = {
842         .attrs = uncore_pmu_attrs,
843 };
844
845 static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
846 {
847         struct intel_uncore_type *type = pmu->type;
848
849         /*
850          * No uncore block name in discovery table.
851          * Use uncore_type_&typeid_&boxid as name.
852          */
853         if (!type->name) {
854                 if (type->num_boxes == 1)
855                         sprintf(pmu->name, "uncore_type_%u", type->type_id);
856                 else {
857                         sprintf(pmu->name, "uncore_type_%u_%d",
858                                 type->type_id, type->box_ids[pmu->pmu_idx]);
859                 }
860                 return;
861         }
862
863         if (type->num_boxes == 1) {
864                 if (strlen(type->name) > 0)
865                         sprintf(pmu->name, "uncore_%s", type->name);
866                 else
867                         sprintf(pmu->name, "uncore");
868         } else
869                 sprintf(pmu->name, "uncore_%s_%d", type->name, pmu->pmu_idx);
870
871 }
872
873 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
874 {
875         int ret;
876
877         if (!pmu->type->pmu) {
878                 pmu->pmu = (struct pmu) {
879                         .attr_groups    = pmu->type->attr_groups,
880                         .task_ctx_nr    = perf_invalid_context,
881                         .pmu_enable     = uncore_pmu_enable,
882                         .pmu_disable    = uncore_pmu_disable,
883                         .event_init     = uncore_pmu_event_init,
884                         .add            = uncore_pmu_event_add,
885                         .del            = uncore_pmu_event_del,
886                         .start          = uncore_pmu_event_start,
887                         .stop           = uncore_pmu_event_stop,
888                         .read           = uncore_pmu_event_read,
889                         .module         = THIS_MODULE,
890                         .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
891                         .attr_update    = pmu->type->attr_update,
892                 };
893         } else {
894                 pmu->pmu = *pmu->type->pmu;
895                 pmu->pmu.attr_groups = pmu->type->attr_groups;
896                 pmu->pmu.attr_update = pmu->type->attr_update;
897         }
898
899         uncore_get_pmu_name(pmu);
900
901         ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
902         if (!ret)
903                 pmu->registered = true;
904         return ret;
905 }
906
907 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
908 {
909         if (!pmu->registered)
910                 return;
911         perf_pmu_unregister(&pmu->pmu);
912         pmu->registered = false;
913 }
914
915 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
916 {
917         int die;
918
919         for (die = 0; die < uncore_max_dies(); die++)
920                 kfree(pmu->boxes[die]);
921         kfree(pmu->boxes);
922 }
923
924 static void uncore_type_exit(struct intel_uncore_type *type)
925 {
926         struct intel_uncore_pmu *pmu = type->pmus;
927         int i;
928
929         if (type->cleanup_mapping)
930                 type->cleanup_mapping(type);
931
932         if (pmu) {
933                 for (i = 0; i < type->num_boxes; i++, pmu++) {
934                         uncore_pmu_unregister(pmu);
935                         uncore_free_boxes(pmu);
936                 }
937                 kfree(type->pmus);
938                 type->pmus = NULL;
939         }
940         if (type->box_ids) {
941                 kfree(type->box_ids);
942                 type->box_ids = NULL;
943         }
944         kfree(type->events_group);
945         type->events_group = NULL;
946 }
947
948 static void uncore_types_exit(struct intel_uncore_type **types)
949 {
950         for (; *types; types++)
951                 uncore_type_exit(*types);
952 }
953
954 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
955 {
956         struct intel_uncore_pmu *pmus;
957         size_t size;
958         int i, j;
959
960         pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
961         if (!pmus)
962                 return -ENOMEM;
963
964         size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
965
966         for (i = 0; i < type->num_boxes; i++) {
967                 pmus[i].func_id = setid ? i : -1;
968                 pmus[i].pmu_idx = i;
969                 pmus[i].type    = type;
970                 pmus[i].boxes   = kzalloc(size, GFP_KERNEL);
971                 if (!pmus[i].boxes)
972                         goto err;
973         }
974
975         type->pmus = pmus;
976         type->unconstrainted = (struct event_constraint)
977                 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
978                                 0, type->num_counters, 0, 0);
979
980         if (type->event_descs) {
981                 struct {
982                         struct attribute_group group;
983                         struct attribute *attrs[];
984                 } *attr_group;
985                 for (i = 0; type->event_descs[i].attr.attr.name; i++);
986
987                 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
988                                                                 GFP_KERNEL);
989                 if (!attr_group)
990                         goto err;
991
992                 attr_group->group.name = "events";
993                 attr_group->group.attrs = attr_group->attrs;
994
995                 for (j = 0; j < i; j++)
996                         attr_group->attrs[j] = &type->event_descs[j].attr.attr;
997
998                 type->events_group = &attr_group->group;
999         }
1000
1001         type->pmu_group = &uncore_pmu_attr_group;
1002
1003         if (type->set_mapping)
1004                 type->set_mapping(type);
1005
1006         return 0;
1007
1008 err:
1009         for (i = 0; i < type->num_boxes; i++)
1010                 kfree(pmus[i].boxes);
1011         kfree(pmus);
1012
1013         return -ENOMEM;
1014 }
1015
1016 static int __init
1017 uncore_types_init(struct intel_uncore_type **types, bool setid)
1018 {
1019         int ret;
1020
1021         for (; *types; types++) {
1022                 ret = uncore_type_init(*types, setid);
1023                 if (ret)
1024                         return ret;
1025         }
1026         return 0;
1027 }
1028
1029 /*
1030  * Get the die information of a PCI device.
1031  * @pdev: The PCI device.
1032  * @die: The die id which the device maps to.
1033  */
1034 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
1035 {
1036         *die = uncore_pcibus_to_dieid(pdev->bus);
1037         if (*die < 0)
1038                 return -EINVAL;
1039
1040         return 0;
1041 }
1042
1043 static struct intel_uncore_pmu *
1044 uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
1045 {
1046         struct intel_uncore_type **types = uncore_pci_uncores;
1047         struct intel_uncore_type *type;
1048         u64 box_ctl;
1049         int i, die;
1050
1051         for (; *types; types++) {
1052                 type = *types;
1053                 for (die = 0; die < __uncore_max_dies; die++) {
1054                         for (i = 0; i < type->num_boxes; i++) {
1055                                 if (!type->box_ctls[die])
1056                                         continue;
1057                                 box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1058                                 if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) &&
1059                                     pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) &&
1060                                     pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
1061                                         return &type->pmus[i];
1062                         }
1063                 }
1064         }
1065
1066         return NULL;
1067 }
1068
1069 /*
1070  * Find the PMU of a PCI device.
1071  * @pdev: The PCI device.
1072  * @ids: The ID table of the available PCI devices with a PMU.
1073  *       If NULL, search the whole uncore_pci_uncores.
1074  */
1075 static struct intel_uncore_pmu *
1076 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1077 {
1078         struct intel_uncore_pmu *pmu = NULL;
1079         struct intel_uncore_type *type;
1080         kernel_ulong_t data;
1081         unsigned int devfn;
1082
1083         if (!ids)
1084                 return uncore_pci_find_dev_pmu_from_types(pdev);
1085
1086         while (ids && ids->vendor) {
1087                 if ((ids->vendor == pdev->vendor) &&
1088                     (ids->device == pdev->device)) {
1089                         data = ids->driver_data;
1090                         devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1091                                           UNCORE_PCI_DEV_FUNC(data));
1092                         if (devfn == pdev->devfn) {
1093                                 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1094                                 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1095                                 break;
1096                         }
1097                 }
1098                 ids++;
1099         }
1100         return pmu;
1101 }
1102
1103 /*
1104  * Register the PMU for a PCI device
1105  * @pdev: The PCI device.
1106  * @type: The corresponding PMU type of the device.
1107  * @pmu: The corresponding PMU of the device.
1108  * @die: The die id which the device maps to.
1109  */
1110 static int uncore_pci_pmu_register(struct pci_dev *pdev,
1111                                    struct intel_uncore_type *type,
1112                                    struct intel_uncore_pmu *pmu,
1113                                    int die)
1114 {
1115         struct intel_uncore_box *box;
1116         int ret;
1117
1118         if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1119                 return -EINVAL;
1120
1121         box = uncore_alloc_box(type, NUMA_NO_NODE);
1122         if (!box)
1123                 return -ENOMEM;
1124
1125         if (pmu->func_id < 0)
1126                 pmu->func_id = pdev->devfn;
1127         else
1128                 WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1129
1130         atomic_inc(&box->refcnt);
1131         box->dieid = die;
1132         box->pci_dev = pdev;
1133         box->pmu = pmu;
1134         uncore_box_init(box);
1135
1136         pmu->boxes[die] = box;
1137         if (atomic_inc_return(&pmu->activeboxes) > 1)
1138                 return 0;
1139
1140         /* First active box registers the pmu */
1141         ret = uncore_pmu_register(pmu);
1142         if (ret) {
1143                 pmu->boxes[die] = NULL;
1144                 uncore_box_exit(box);
1145                 kfree(box);
1146         }
1147         return ret;
1148 }
1149
1150 /*
1151  * add a pci uncore device
1152  */
1153 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1154 {
1155         struct intel_uncore_type *type;
1156         struct intel_uncore_pmu *pmu = NULL;
1157         int die, ret;
1158
1159         ret = uncore_pci_get_dev_die_info(pdev, &die);
1160         if (ret)
1161                 return ret;
1162
1163         if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1164                 int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1165
1166                 uncore_extra_pci_dev[die].dev[idx] = pdev;
1167                 pci_set_drvdata(pdev, NULL);
1168                 return 0;
1169         }
1170
1171         type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1172
1173         /*
1174          * Some platforms, e.g.  Knights Landing, use a common PCI device ID
1175          * for multiple instances of an uncore PMU device type. We should check
1176          * PCI slot and func to indicate the uncore box.
1177          */
1178         if (id->driver_data & ~0xffff) {
1179                 struct pci_driver *pci_drv = pdev->driver;
1180
1181                 pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1182                 if (pmu == NULL)
1183                         return -ENODEV;
1184         } else {
1185                 /*
1186                  * for performance monitoring unit with multiple boxes,
1187                  * each box has a different function id.
1188                  */
1189                 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1190         }
1191
1192         ret = uncore_pci_pmu_register(pdev, type, pmu, die);
1193
1194         pci_set_drvdata(pdev, pmu->boxes[die]);
1195
1196         return ret;
1197 }
1198
1199 /*
1200  * Unregister the PMU of a PCI device
1201  * @pmu: The corresponding PMU is unregistered.
1202  * @die: The die id which the device maps to.
1203  */
1204 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
1205 {
1206         struct intel_uncore_box *box = pmu->boxes[die];
1207
1208         pmu->boxes[die] = NULL;
1209         if (atomic_dec_return(&pmu->activeboxes) == 0)
1210                 uncore_pmu_unregister(pmu);
1211         uncore_box_exit(box);
1212         kfree(box);
1213 }
1214
1215 static void uncore_pci_remove(struct pci_dev *pdev)
1216 {
1217         struct intel_uncore_box *box;
1218         struct intel_uncore_pmu *pmu;
1219         int i, die;
1220
1221         if (uncore_pci_get_dev_die_info(pdev, &die))
1222                 return;
1223
1224         box = pci_get_drvdata(pdev);
1225         if (!box) {
1226                 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1227                         if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1228                                 uncore_extra_pci_dev[die].dev[i] = NULL;
1229                                 break;
1230                         }
1231                 }
1232                 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1233                 return;
1234         }
1235
1236         pmu = box->pmu;
1237
1238         pci_set_drvdata(pdev, NULL);
1239
1240         uncore_pci_pmu_unregister(pmu, die);
1241 }
1242
1243 static int uncore_bus_notify(struct notifier_block *nb,
1244                              unsigned long action, void *data,
1245                              const struct pci_device_id *ids)
1246 {
1247         struct device *dev = data;
1248         struct pci_dev *pdev = to_pci_dev(dev);
1249         struct intel_uncore_pmu *pmu;
1250         int die;
1251
1252         /* Unregister the PMU when the device is going to be deleted. */
1253         if (action != BUS_NOTIFY_DEL_DEVICE)
1254                 return NOTIFY_DONE;
1255
1256         pmu = uncore_pci_find_dev_pmu(pdev, ids);
1257         if (!pmu)
1258                 return NOTIFY_DONE;
1259
1260         if (uncore_pci_get_dev_die_info(pdev, &die))
1261                 return NOTIFY_DONE;
1262
1263         uncore_pci_pmu_unregister(pmu, die);
1264
1265         return NOTIFY_OK;
1266 }
1267
1268 static int uncore_pci_sub_bus_notify(struct notifier_block *nb,
1269                                      unsigned long action, void *data)
1270 {
1271         return uncore_bus_notify(nb, action, data,
1272                                  uncore_pci_sub_driver->id_table);
1273 }
1274
1275 static struct notifier_block uncore_pci_sub_notifier = {
1276         .notifier_call = uncore_pci_sub_bus_notify,
1277 };
1278
1279 static void uncore_pci_sub_driver_init(void)
1280 {
1281         const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
1282         struct intel_uncore_type *type;
1283         struct intel_uncore_pmu *pmu;
1284         struct pci_dev *pci_sub_dev;
1285         bool notify = false;
1286         unsigned int devfn;
1287         int die;
1288
1289         while (ids && ids->vendor) {
1290                 pci_sub_dev = NULL;
1291                 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
1292                 /*
1293                  * Search the available device, and register the
1294                  * corresponding PMU.
1295                  */
1296                 while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
1297                                                      ids->device, pci_sub_dev))) {
1298                         devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
1299                                           UNCORE_PCI_DEV_FUNC(ids->driver_data));
1300                         if (devfn != pci_sub_dev->devfn)
1301                                 continue;
1302
1303                         pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
1304                         if (!pmu)
1305                                 continue;
1306
1307                         if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
1308                                 continue;
1309
1310                         if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
1311                                                      die))
1312                                 notify = true;
1313                 }
1314                 ids++;
1315         }
1316
1317         if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier))
1318                 notify = false;
1319
1320         if (!notify)
1321                 uncore_pci_sub_driver = NULL;
1322 }
1323
1324 static int uncore_pci_bus_notify(struct notifier_block *nb,
1325                                      unsigned long action, void *data)
1326 {
1327         return uncore_bus_notify(nb, action, data, NULL);
1328 }
1329
1330 static struct notifier_block uncore_pci_notifier = {
1331         .notifier_call = uncore_pci_bus_notify,
1332 };
1333
1334
1335 static void uncore_pci_pmus_register(void)
1336 {
1337         struct intel_uncore_type **types = uncore_pci_uncores;
1338         struct intel_uncore_type *type;
1339         struct intel_uncore_pmu *pmu;
1340         struct pci_dev *pdev;
1341         u64 box_ctl;
1342         int i, die;
1343
1344         for (; *types; types++) {
1345                 type = *types;
1346                 for (die = 0; die < __uncore_max_dies; die++) {
1347                         for (i = 0; i < type->num_boxes; i++) {
1348                                 if (!type->box_ctls[die])
1349                                         continue;
1350                                 box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1351                                 pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl),
1352                                                                    UNCORE_DISCOVERY_PCI_BUS(box_ctl),
1353                                                                    UNCORE_DISCOVERY_PCI_DEVFN(box_ctl));
1354                                 if (!pdev)
1355                                         continue;
1356                                 pmu = &type->pmus[i];
1357
1358                                 uncore_pci_pmu_register(pdev, type, pmu, die);
1359                         }
1360                 }
1361         }
1362
1363         bus_register_notifier(&pci_bus_type, &uncore_pci_notifier);
1364 }
1365
1366 static int __init uncore_pci_init(void)
1367 {
1368         size_t size;
1369         int ret;
1370
1371         size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1372         uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1373         if (!uncore_extra_pci_dev) {
1374                 ret = -ENOMEM;
1375                 goto err;
1376         }
1377
1378         ret = uncore_types_init(uncore_pci_uncores, false);
1379         if (ret)
1380                 goto errtype;
1381
1382         if (uncore_pci_driver) {
1383                 uncore_pci_driver->probe = uncore_pci_probe;
1384                 uncore_pci_driver->remove = uncore_pci_remove;
1385
1386                 ret = pci_register_driver(uncore_pci_driver);
1387                 if (ret)
1388                         goto errtype;
1389         } else
1390                 uncore_pci_pmus_register();
1391
1392         if (uncore_pci_sub_driver)
1393                 uncore_pci_sub_driver_init();
1394
1395         pcidrv_registered = true;
1396         return 0;
1397
1398 errtype:
1399         uncore_types_exit(uncore_pci_uncores);
1400         kfree(uncore_extra_pci_dev);
1401         uncore_extra_pci_dev = NULL;
1402         uncore_free_pcibus_map();
1403 err:
1404         uncore_pci_uncores = empty_uncore;
1405         return ret;
1406 }
1407
1408 static void uncore_pci_exit(void)
1409 {
1410         if (pcidrv_registered) {
1411                 pcidrv_registered = false;
1412                 if (uncore_pci_sub_driver)
1413                         bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier);
1414                 if (uncore_pci_driver)
1415                         pci_unregister_driver(uncore_pci_driver);
1416                 else
1417                         bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier);
1418                 uncore_types_exit(uncore_pci_uncores);
1419                 kfree(uncore_extra_pci_dev);
1420                 uncore_free_pcibus_map();
1421         }
1422 }
1423
1424 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1425                                    int new_cpu)
1426 {
1427         struct intel_uncore_pmu *pmu = type->pmus;
1428         struct intel_uncore_box *box;
1429         int i, die;
1430
1431         die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1432         for (i = 0; i < type->num_boxes; i++, pmu++) {
1433                 box = pmu->boxes[die];
1434                 if (!box)
1435                         continue;
1436
1437                 if (old_cpu < 0) {
1438                         WARN_ON_ONCE(box->cpu != -1);
1439                         box->cpu = new_cpu;
1440                         continue;
1441                 }
1442
1443                 WARN_ON_ONCE(box->cpu != old_cpu);
1444                 box->cpu = -1;
1445                 if (new_cpu < 0)
1446                         continue;
1447
1448                 uncore_pmu_cancel_hrtimer(box);
1449                 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1450                 box->cpu = new_cpu;
1451         }
1452 }
1453
1454 static void uncore_change_context(struct intel_uncore_type **uncores,
1455                                   int old_cpu, int new_cpu)
1456 {
1457         for (; *uncores; uncores++)
1458                 uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1459 }
1460
1461 static void uncore_box_unref(struct intel_uncore_type **types, int id)
1462 {
1463         struct intel_uncore_type *type;
1464         struct intel_uncore_pmu *pmu;
1465         struct intel_uncore_box *box;
1466         int i;
1467
1468         for (; *types; types++) {
1469                 type = *types;
1470                 pmu = type->pmus;
1471                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1472                         box = pmu->boxes[id];
1473                         if (box && atomic_dec_return(&box->refcnt) == 0)
1474                                 uncore_box_exit(box);
1475                 }
1476         }
1477 }
1478
1479 static int uncore_event_cpu_offline(unsigned int cpu)
1480 {
1481         int die, target;
1482
1483         /* Check if exiting cpu is used for collecting uncore events */
1484         if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1485                 goto unref;
1486         /* Find a new cpu to collect uncore events */
1487         target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1488
1489         /* Migrate uncore events to the new target */
1490         if (target < nr_cpu_ids)
1491                 cpumask_set_cpu(target, &uncore_cpu_mask);
1492         else
1493                 target = -1;
1494
1495         uncore_change_context(uncore_msr_uncores, cpu, target);
1496         uncore_change_context(uncore_mmio_uncores, cpu, target);
1497         uncore_change_context(uncore_pci_uncores, cpu, target);
1498
1499 unref:
1500         /* Clear the references */
1501         die = topology_logical_die_id(cpu);
1502         uncore_box_unref(uncore_msr_uncores, die);
1503         uncore_box_unref(uncore_mmio_uncores, die);
1504         return 0;
1505 }
1506
1507 static int allocate_boxes(struct intel_uncore_type **types,
1508                          unsigned int die, unsigned int cpu)
1509 {
1510         struct intel_uncore_box *box, *tmp;
1511         struct intel_uncore_type *type;
1512         struct intel_uncore_pmu *pmu;
1513         LIST_HEAD(allocated);
1514         int i;
1515
1516         /* Try to allocate all required boxes */
1517         for (; *types; types++) {
1518                 type = *types;
1519                 pmu = type->pmus;
1520                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1521                         if (pmu->boxes[die])
1522                                 continue;
1523                         box = uncore_alloc_box(type, cpu_to_node(cpu));
1524                         if (!box)
1525                                 goto cleanup;
1526                         box->pmu = pmu;
1527                         box->dieid = die;
1528                         list_add(&box->active_list, &allocated);
1529                 }
1530         }
1531         /* Install them in the pmus */
1532         list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1533                 list_del_init(&box->active_list);
1534                 box->pmu->boxes[die] = box;
1535         }
1536         return 0;
1537
1538 cleanup:
1539         list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1540                 list_del_init(&box->active_list);
1541                 kfree(box);
1542         }
1543         return -ENOMEM;
1544 }
1545
1546 static int uncore_box_ref(struct intel_uncore_type **types,
1547                           int id, unsigned int cpu)
1548 {
1549         struct intel_uncore_type *type;
1550         struct intel_uncore_pmu *pmu;
1551         struct intel_uncore_box *box;
1552         int i, ret;
1553
1554         ret = allocate_boxes(types, id, cpu);
1555         if (ret)
1556                 return ret;
1557
1558         for (; *types; types++) {
1559                 type = *types;
1560                 pmu = type->pmus;
1561                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1562                         box = pmu->boxes[id];
1563                         if (box && atomic_inc_return(&box->refcnt) == 1)
1564                                 uncore_box_init(box);
1565                 }
1566         }
1567         return 0;
1568 }
1569
1570 static int uncore_event_cpu_online(unsigned int cpu)
1571 {
1572         int die, target, msr_ret, mmio_ret;
1573
1574         die = topology_logical_die_id(cpu);
1575         msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1576         mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1577         if (msr_ret && mmio_ret)
1578                 return -ENOMEM;
1579
1580         /*
1581          * Check if there is an online cpu in the package
1582          * which collects uncore events already.
1583          */
1584         target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1585         if (target < nr_cpu_ids)
1586                 return 0;
1587
1588         cpumask_set_cpu(cpu, &uncore_cpu_mask);
1589
1590         if (!msr_ret)
1591                 uncore_change_context(uncore_msr_uncores, -1, cpu);
1592         if (!mmio_ret)
1593                 uncore_change_context(uncore_mmio_uncores, -1, cpu);
1594         uncore_change_context(uncore_pci_uncores, -1, cpu);
1595         return 0;
1596 }
1597
1598 static int __init type_pmu_register(struct intel_uncore_type *type)
1599 {
1600         int i, ret;
1601
1602         for (i = 0; i < type->num_boxes; i++) {
1603                 ret = uncore_pmu_register(&type->pmus[i]);
1604                 if (ret)
1605                         return ret;
1606         }
1607         return 0;
1608 }
1609
1610 static int __init uncore_msr_pmus_register(void)
1611 {
1612         struct intel_uncore_type **types = uncore_msr_uncores;
1613         int ret;
1614
1615         for (; *types; types++) {
1616                 ret = type_pmu_register(*types);
1617                 if (ret)
1618                         return ret;
1619         }
1620         return 0;
1621 }
1622
1623 static int __init uncore_cpu_init(void)
1624 {
1625         int ret;
1626
1627         ret = uncore_types_init(uncore_msr_uncores, true);
1628         if (ret)
1629                 goto err;
1630
1631         ret = uncore_msr_pmus_register();
1632         if (ret)
1633                 goto err;
1634         return 0;
1635 err:
1636         uncore_types_exit(uncore_msr_uncores);
1637         uncore_msr_uncores = empty_uncore;
1638         return ret;
1639 }
1640
1641 static int __init uncore_mmio_init(void)
1642 {
1643         struct intel_uncore_type **types = uncore_mmio_uncores;
1644         int ret;
1645
1646         ret = uncore_types_init(types, true);
1647         if (ret)
1648                 goto err;
1649
1650         for (; *types; types++) {
1651                 ret = type_pmu_register(*types);
1652                 if (ret)
1653                         goto err;
1654         }
1655         return 0;
1656 err:
1657         uncore_types_exit(uncore_mmio_uncores);
1658         uncore_mmio_uncores = empty_uncore;
1659         return ret;
1660 }
1661
1662 struct intel_uncore_init_fun {
1663         void    (*cpu_init)(void);
1664         int     (*pci_init)(void);
1665         void    (*mmio_init)(void);
1666 };
1667
1668 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1669         .cpu_init = nhm_uncore_cpu_init,
1670 };
1671
1672 static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1673         .cpu_init = snb_uncore_cpu_init,
1674         .pci_init = snb_uncore_pci_init,
1675 };
1676
1677 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1678         .cpu_init = snb_uncore_cpu_init,
1679         .pci_init = ivb_uncore_pci_init,
1680 };
1681
1682 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1683         .cpu_init = snb_uncore_cpu_init,
1684         .pci_init = hsw_uncore_pci_init,
1685 };
1686
1687 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1688         .cpu_init = snb_uncore_cpu_init,
1689         .pci_init = bdw_uncore_pci_init,
1690 };
1691
1692 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1693         .cpu_init = snbep_uncore_cpu_init,
1694         .pci_init = snbep_uncore_pci_init,
1695 };
1696
1697 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1698         .cpu_init = nhmex_uncore_cpu_init,
1699 };
1700
1701 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1702         .cpu_init = ivbep_uncore_cpu_init,
1703         .pci_init = ivbep_uncore_pci_init,
1704 };
1705
1706 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1707         .cpu_init = hswep_uncore_cpu_init,
1708         .pci_init = hswep_uncore_pci_init,
1709 };
1710
1711 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1712         .cpu_init = bdx_uncore_cpu_init,
1713         .pci_init = bdx_uncore_pci_init,
1714 };
1715
1716 static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1717         .cpu_init = knl_uncore_cpu_init,
1718         .pci_init = knl_uncore_pci_init,
1719 };
1720
1721 static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1722         .cpu_init = skl_uncore_cpu_init,
1723         .pci_init = skl_uncore_pci_init,
1724 };
1725
1726 static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1727         .cpu_init = skx_uncore_cpu_init,
1728         .pci_init = skx_uncore_pci_init,
1729 };
1730
1731 static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1732         .cpu_init = icl_uncore_cpu_init,
1733         .pci_init = skl_uncore_pci_init,
1734 };
1735
1736 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1737         .cpu_init = tgl_uncore_cpu_init,
1738         .mmio_init = tgl_uncore_mmio_init,
1739 };
1740
1741 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1742         .cpu_init = tgl_uncore_cpu_init,
1743         .mmio_init = tgl_l_uncore_mmio_init,
1744 };
1745
1746 static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
1747         .cpu_init = tgl_uncore_cpu_init,
1748         .pci_init = skl_uncore_pci_init,
1749 };
1750
1751 static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
1752         .cpu_init = adl_uncore_cpu_init,
1753         .mmio_init = tgl_uncore_mmio_init,
1754 };
1755
1756 static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1757         .cpu_init = icx_uncore_cpu_init,
1758         .pci_init = icx_uncore_pci_init,
1759         .mmio_init = icx_uncore_mmio_init,
1760 };
1761
1762 static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1763         .cpu_init = snr_uncore_cpu_init,
1764         .pci_init = snr_uncore_pci_init,
1765         .mmio_init = snr_uncore_mmio_init,
1766 };
1767
1768 static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
1769         .cpu_init = intel_uncore_generic_uncore_cpu_init,
1770         .pci_init = intel_uncore_generic_uncore_pci_init,
1771         .mmio_init = intel_uncore_generic_uncore_mmio_init,
1772 };
1773
1774 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1775         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &nhm_uncore_init),
1776         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &nhm_uncore_init),
1777         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &nhm_uncore_init),
1778         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &nhm_uncore_init),
1779         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &snb_uncore_init),
1780         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &ivb_uncore_init),
1781         X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &hsw_uncore_init),
1782         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &hsw_uncore_init),
1783         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &hsw_uncore_init),
1784         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &bdw_uncore_init),
1785         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &bdw_uncore_init),
1786         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &snbep_uncore_init),
1787         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &nhmex_uncore_init),
1788         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &nhmex_uncore_init),
1789         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &ivbep_uncore_init),
1790         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &hswep_uncore_init),
1791         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &bdx_uncore_init),
1792         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &bdx_uncore_init),
1793         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &knl_uncore_init),
1794         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &knl_uncore_init),
1795         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &skl_uncore_init),
1796         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &skl_uncore_init),
1797         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &skx_uncore_init),
1798         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &skl_uncore_init),
1799         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &skl_uncore_init),
1800         X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,         &skl_uncore_init),
1801         X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,           &skl_uncore_init),
1802         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           &icl_uncore_init),
1803         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,        &icl_uncore_init),
1804         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,             &icl_uncore_init),
1805         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &icx_uncore_init),
1806         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &icx_uncore_init),
1807         X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         &tgl_l_uncore_init),
1808         X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           &tgl_uncore_init),
1809         X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &rkl_uncore_init),
1810         X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &adl_uncore_init),
1811         X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &adl_uncore_init),
1812         X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &snr_uncore_init),
1813         {},
1814 };
1815 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1816
1817 static int __init intel_uncore_init(void)
1818 {
1819         const struct x86_cpu_id *id;
1820         struct intel_uncore_init_fun *uncore_init;
1821         int pret = 0, cret = 0, mret = 0, ret;
1822
1823         if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1824                 return -ENODEV;
1825
1826         __uncore_max_dies =
1827                 topology_max_packages() * topology_max_die_per_package();
1828
1829         id = x86_match_cpu(intel_uncore_match);
1830         if (!id) {
1831                 if (!uncore_no_discover && intel_uncore_has_discovery_tables())
1832                         uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
1833                 else
1834                         return -ENODEV;
1835         } else
1836                 uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1837
1838         if (uncore_init->pci_init) {
1839                 pret = uncore_init->pci_init();
1840                 if (!pret)
1841                         pret = uncore_pci_init();
1842         }
1843
1844         if (uncore_init->cpu_init) {
1845                 uncore_init->cpu_init();
1846                 cret = uncore_cpu_init();
1847         }
1848
1849         if (uncore_init->mmio_init) {
1850                 uncore_init->mmio_init();
1851                 mret = uncore_mmio_init();
1852         }
1853
1854         if (cret && pret && mret) {
1855                 ret = -ENODEV;
1856                 goto free_discovery;
1857         }
1858
1859         /* Install hotplug callbacks to setup the targets for each package */
1860         ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1861                                 "perf/x86/intel/uncore:online",
1862                                 uncore_event_cpu_online,
1863                                 uncore_event_cpu_offline);
1864         if (ret)
1865                 goto err;
1866         return 0;
1867
1868 err:
1869         uncore_types_exit(uncore_msr_uncores);
1870         uncore_types_exit(uncore_mmio_uncores);
1871         uncore_pci_exit();
1872 free_discovery:
1873         intel_uncore_clear_discovery_tables();
1874         return ret;
1875 }
1876 module_init(intel_uncore_init);
1877
1878 static void __exit intel_uncore_exit(void)
1879 {
1880         cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1881         uncore_types_exit(uncore_msr_uncores);
1882         uncore_types_exit(uncore_mmio_uncores);
1883         uncore_pci_exit();
1884         intel_uncore_clear_discovery_tables();
1885 }
1886 module_exit(intel_uncore_exit);