perf/x86/intel/uncore: Factor out uncore_pci_pmu_register()
[linux-2.6-microblaze.git] / arch / x86 / events / intel / uncore.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/module.h>
3
4 #include <asm/cpu_device_id.h>
5 #include <asm/intel-family.h>
6 #include "uncore.h"
7
8 static struct intel_uncore_type *empty_uncore[] = { NULL, };
9 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
10 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
11 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
12
13 static bool pcidrv_registered;
14 struct pci_driver *uncore_pci_driver;
15 /* pci bus to socket mapping */
16 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
17 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
18 struct pci_extra_dev *uncore_extra_pci_dev;
19 int __uncore_max_dies;
20
21 /* mask of cpus that collect uncore events */
22 static cpumask_t uncore_cpu_mask;
23
24 /* constraint for the fixed counter */
25 static struct event_constraint uncore_constraint_fixed =
26         EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
27 struct event_constraint uncore_constraint_empty =
28         EVENT_CONSTRAINT(0, 0, 0);
29
30 MODULE_LICENSE("GPL");
31
32 int uncore_pcibus_to_physid(struct pci_bus *bus)
33 {
34         struct pci2phy_map *map;
35         int phys_id = -1;
36
37         raw_spin_lock(&pci2phy_map_lock);
38         list_for_each_entry(map, &pci2phy_map_head, list) {
39                 if (map->segment == pci_domain_nr(bus)) {
40                         phys_id = map->pbus_to_physid[bus->number];
41                         break;
42                 }
43         }
44         raw_spin_unlock(&pci2phy_map_lock);
45
46         return phys_id;
47 }
48
49 static void uncore_free_pcibus_map(void)
50 {
51         struct pci2phy_map *map, *tmp;
52
53         list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
54                 list_del(&map->list);
55                 kfree(map);
56         }
57 }
58
59 struct pci2phy_map *__find_pci2phy_map(int segment)
60 {
61         struct pci2phy_map *map, *alloc = NULL;
62         int i;
63
64         lockdep_assert_held(&pci2phy_map_lock);
65
66 lookup:
67         list_for_each_entry(map, &pci2phy_map_head, list) {
68                 if (map->segment == segment)
69                         goto end;
70         }
71
72         if (!alloc) {
73                 raw_spin_unlock(&pci2phy_map_lock);
74                 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
75                 raw_spin_lock(&pci2phy_map_lock);
76
77                 if (!alloc)
78                         return NULL;
79
80                 goto lookup;
81         }
82
83         map = alloc;
84         alloc = NULL;
85         map->segment = segment;
86         for (i = 0; i < 256; i++)
87                 map->pbus_to_physid[i] = -1;
88         list_add_tail(&map->list, &pci2phy_map_head);
89
90 end:
91         kfree(alloc);
92         return map;
93 }
94
95 ssize_t uncore_event_show(struct kobject *kobj,
96                           struct kobj_attribute *attr, char *buf)
97 {
98         struct uncore_event_desc *event =
99                 container_of(attr, struct uncore_event_desc, attr);
100         return sprintf(buf, "%s", event->config);
101 }
102
103 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
104 {
105         unsigned int dieid = topology_logical_die_id(cpu);
106
107         /*
108          * The unsigned check also catches the '-1' return value for non
109          * existent mappings in the topology map.
110          */
111         return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
112 }
113
114 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
115 {
116         u64 count;
117
118         rdmsrl(event->hw.event_base, count);
119
120         return count;
121 }
122
123 void uncore_mmio_exit_box(struct intel_uncore_box *box)
124 {
125         if (box->io_addr)
126                 iounmap(box->io_addr);
127 }
128
129 u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
130                              struct perf_event *event)
131 {
132         if (!box->io_addr)
133                 return 0;
134
135         if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
136                 return 0;
137
138         return readq(box->io_addr + event->hw.event_base);
139 }
140
141 /*
142  * generic get constraint function for shared match/mask registers.
143  */
144 struct event_constraint *
145 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
146 {
147         struct intel_uncore_extra_reg *er;
148         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
149         struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
150         unsigned long flags;
151         bool ok = false;
152
153         /*
154          * reg->alloc can be set due to existing state, so for fake box we
155          * need to ignore this, otherwise we might fail to allocate proper
156          * fake state for this extra reg constraint.
157          */
158         if (reg1->idx == EXTRA_REG_NONE ||
159             (!uncore_box_is_fake(box) && reg1->alloc))
160                 return NULL;
161
162         er = &box->shared_regs[reg1->idx];
163         raw_spin_lock_irqsave(&er->lock, flags);
164         if (!atomic_read(&er->ref) ||
165             (er->config1 == reg1->config && er->config2 == reg2->config)) {
166                 atomic_inc(&er->ref);
167                 er->config1 = reg1->config;
168                 er->config2 = reg2->config;
169                 ok = true;
170         }
171         raw_spin_unlock_irqrestore(&er->lock, flags);
172
173         if (ok) {
174                 if (!uncore_box_is_fake(box))
175                         reg1->alloc = 1;
176                 return NULL;
177         }
178
179         return &uncore_constraint_empty;
180 }
181
182 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
183 {
184         struct intel_uncore_extra_reg *er;
185         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
186
187         /*
188          * Only put constraint if extra reg was actually allocated. Also
189          * takes care of event which do not use an extra shared reg.
190          *
191          * Also, if this is a fake box we shouldn't touch any event state
192          * (reg->alloc) and we don't care about leaving inconsistent box
193          * state either since it will be thrown out.
194          */
195         if (uncore_box_is_fake(box) || !reg1->alloc)
196                 return;
197
198         er = &box->shared_regs[reg1->idx];
199         atomic_dec(&er->ref);
200         reg1->alloc = 0;
201 }
202
203 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
204 {
205         struct intel_uncore_extra_reg *er;
206         unsigned long flags;
207         u64 config;
208
209         er = &box->shared_regs[idx];
210
211         raw_spin_lock_irqsave(&er->lock, flags);
212         config = er->config;
213         raw_spin_unlock_irqrestore(&er->lock, flags);
214
215         return config;
216 }
217
218 static void uncore_assign_hw_event(struct intel_uncore_box *box,
219                                    struct perf_event *event, int idx)
220 {
221         struct hw_perf_event *hwc = &event->hw;
222
223         hwc->idx = idx;
224         hwc->last_tag = ++box->tags[idx];
225
226         if (uncore_pmc_fixed(hwc->idx)) {
227                 hwc->event_base = uncore_fixed_ctr(box);
228                 hwc->config_base = uncore_fixed_ctl(box);
229                 return;
230         }
231
232         hwc->config_base = uncore_event_ctl(box, hwc->idx);
233         hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
234 }
235
236 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
237 {
238         u64 prev_count, new_count, delta;
239         int shift;
240
241         if (uncore_pmc_freerunning(event->hw.idx))
242                 shift = 64 - uncore_freerunning_bits(box, event);
243         else if (uncore_pmc_fixed(event->hw.idx))
244                 shift = 64 - uncore_fixed_ctr_bits(box);
245         else
246                 shift = 64 - uncore_perf_ctr_bits(box);
247
248         /* the hrtimer might modify the previous event value */
249 again:
250         prev_count = local64_read(&event->hw.prev_count);
251         new_count = uncore_read_counter(box, event);
252         if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
253                 goto again;
254
255         delta = (new_count << shift) - (prev_count << shift);
256         delta >>= shift;
257
258         local64_add(delta, &event->count);
259 }
260
261 /*
262  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
263  * for SandyBridge. So we use hrtimer to periodically poll the counter
264  * to avoid overflow.
265  */
266 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
267 {
268         struct intel_uncore_box *box;
269         struct perf_event *event;
270         unsigned long flags;
271         int bit;
272
273         box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
274         if (!box->n_active || box->cpu != smp_processor_id())
275                 return HRTIMER_NORESTART;
276         /*
277          * disable local interrupt to prevent uncore_pmu_event_start/stop
278          * to interrupt the update process
279          */
280         local_irq_save(flags);
281
282         /*
283          * handle boxes with an active event list as opposed to active
284          * counters
285          */
286         list_for_each_entry(event, &box->active_list, active_entry) {
287                 uncore_perf_event_update(box, event);
288         }
289
290         for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
291                 uncore_perf_event_update(box, box->events[bit]);
292
293         local_irq_restore(flags);
294
295         hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
296         return HRTIMER_RESTART;
297 }
298
299 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
300 {
301         hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
302                       HRTIMER_MODE_REL_PINNED);
303 }
304
305 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
306 {
307         hrtimer_cancel(&box->hrtimer);
308 }
309
310 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
311 {
312         hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
313         box->hrtimer.function = uncore_pmu_hrtimer;
314 }
315
316 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
317                                                  int node)
318 {
319         int i, size, numshared = type->num_shared_regs ;
320         struct intel_uncore_box *box;
321
322         size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
323
324         box = kzalloc_node(size, GFP_KERNEL, node);
325         if (!box)
326                 return NULL;
327
328         for (i = 0; i < numshared; i++)
329                 raw_spin_lock_init(&box->shared_regs[i].lock);
330
331         uncore_pmu_init_hrtimer(box);
332         box->cpu = -1;
333         box->pci_phys_id = -1;
334         box->dieid = -1;
335
336         /* set default hrtimer timeout */
337         box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
338
339         INIT_LIST_HEAD(&box->active_list);
340
341         return box;
342 }
343
344 /*
345  * Using uncore_pmu_event_init pmu event_init callback
346  * as a detection point for uncore events.
347  */
348 static int uncore_pmu_event_init(struct perf_event *event);
349
350 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
351 {
352         return &box->pmu->pmu == event->pmu;
353 }
354
355 static int
356 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
357                       bool dogrp)
358 {
359         struct perf_event *event;
360         int n, max_count;
361
362         max_count = box->pmu->type->num_counters;
363         if (box->pmu->type->fixed_ctl)
364                 max_count++;
365
366         if (box->n_events >= max_count)
367                 return -EINVAL;
368
369         n = box->n_events;
370
371         if (is_box_event(box, leader)) {
372                 box->event_list[n] = leader;
373                 n++;
374         }
375
376         if (!dogrp)
377                 return n;
378
379         for_each_sibling_event(event, leader) {
380                 if (!is_box_event(box, event) ||
381                     event->state <= PERF_EVENT_STATE_OFF)
382                         continue;
383
384                 if (n >= max_count)
385                         return -EINVAL;
386
387                 box->event_list[n] = event;
388                 n++;
389         }
390         return n;
391 }
392
393 static struct event_constraint *
394 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
395 {
396         struct intel_uncore_type *type = box->pmu->type;
397         struct event_constraint *c;
398
399         if (type->ops->get_constraint) {
400                 c = type->ops->get_constraint(box, event);
401                 if (c)
402                         return c;
403         }
404
405         if (event->attr.config == UNCORE_FIXED_EVENT)
406                 return &uncore_constraint_fixed;
407
408         if (type->constraints) {
409                 for_each_event_constraint(c, type->constraints) {
410                         if ((event->hw.config & c->cmask) == c->code)
411                                 return c;
412                 }
413         }
414
415         return &type->unconstrainted;
416 }
417
418 static void uncore_put_event_constraint(struct intel_uncore_box *box,
419                                         struct perf_event *event)
420 {
421         if (box->pmu->type->ops->put_constraint)
422                 box->pmu->type->ops->put_constraint(box, event);
423 }
424
425 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
426 {
427         unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
428         struct event_constraint *c;
429         int i, wmin, wmax, ret = 0;
430         struct hw_perf_event *hwc;
431
432         bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
433
434         for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
435                 c = uncore_get_event_constraint(box, box->event_list[i]);
436                 box->event_constraint[i] = c;
437                 wmin = min(wmin, c->weight);
438                 wmax = max(wmax, c->weight);
439         }
440
441         /* fastpath, try to reuse previous register */
442         for (i = 0; i < n; i++) {
443                 hwc = &box->event_list[i]->hw;
444                 c = box->event_constraint[i];
445
446                 /* never assigned */
447                 if (hwc->idx == -1)
448                         break;
449
450                 /* constraint still honored */
451                 if (!test_bit(hwc->idx, c->idxmsk))
452                         break;
453
454                 /* not already used */
455                 if (test_bit(hwc->idx, used_mask))
456                         break;
457
458                 __set_bit(hwc->idx, used_mask);
459                 if (assign)
460                         assign[i] = hwc->idx;
461         }
462         /* slow path */
463         if (i != n)
464                 ret = perf_assign_events(box->event_constraint, n,
465                                          wmin, wmax, n, assign);
466
467         if (!assign || ret) {
468                 for (i = 0; i < n; i++)
469                         uncore_put_event_constraint(box, box->event_list[i]);
470         }
471         return ret ? -EINVAL : 0;
472 }
473
474 void uncore_pmu_event_start(struct perf_event *event, int flags)
475 {
476         struct intel_uncore_box *box = uncore_event_to_box(event);
477         int idx = event->hw.idx;
478
479         if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
480                 return;
481
482         /*
483          * Free running counter is read-only and always active.
484          * Use the current counter value as start point.
485          * There is no overflow interrupt for free running counter.
486          * Use hrtimer to periodically poll the counter to avoid overflow.
487          */
488         if (uncore_pmc_freerunning(event->hw.idx)) {
489                 list_add_tail(&event->active_entry, &box->active_list);
490                 local64_set(&event->hw.prev_count,
491                             uncore_read_counter(box, event));
492                 if (box->n_active++ == 0)
493                         uncore_pmu_start_hrtimer(box);
494                 return;
495         }
496
497         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
498                 return;
499
500         event->hw.state = 0;
501         box->events[idx] = event;
502         box->n_active++;
503         __set_bit(idx, box->active_mask);
504
505         local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
506         uncore_enable_event(box, event);
507
508         if (box->n_active == 1)
509                 uncore_pmu_start_hrtimer(box);
510 }
511
512 void uncore_pmu_event_stop(struct perf_event *event, int flags)
513 {
514         struct intel_uncore_box *box = uncore_event_to_box(event);
515         struct hw_perf_event *hwc = &event->hw;
516
517         /* Cannot disable free running counter which is read-only */
518         if (uncore_pmc_freerunning(hwc->idx)) {
519                 list_del(&event->active_entry);
520                 if (--box->n_active == 0)
521                         uncore_pmu_cancel_hrtimer(box);
522                 uncore_perf_event_update(box, event);
523                 return;
524         }
525
526         if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
527                 uncore_disable_event(box, event);
528                 box->n_active--;
529                 box->events[hwc->idx] = NULL;
530                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
531                 hwc->state |= PERF_HES_STOPPED;
532
533                 if (box->n_active == 0)
534                         uncore_pmu_cancel_hrtimer(box);
535         }
536
537         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
538                 /*
539                  * Drain the remaining delta count out of a event
540                  * that we are disabling:
541                  */
542                 uncore_perf_event_update(box, event);
543                 hwc->state |= PERF_HES_UPTODATE;
544         }
545 }
546
547 int uncore_pmu_event_add(struct perf_event *event, int flags)
548 {
549         struct intel_uncore_box *box = uncore_event_to_box(event);
550         struct hw_perf_event *hwc = &event->hw;
551         int assign[UNCORE_PMC_IDX_MAX];
552         int i, n, ret;
553
554         if (!box)
555                 return -ENODEV;
556
557         /*
558          * The free funning counter is assigned in event_init().
559          * The free running counter event and free running counter
560          * are 1:1 mapped. It doesn't need to be tracked in event_list.
561          */
562         if (uncore_pmc_freerunning(hwc->idx)) {
563                 if (flags & PERF_EF_START)
564                         uncore_pmu_event_start(event, 0);
565                 return 0;
566         }
567
568         ret = n = uncore_collect_events(box, event, false);
569         if (ret < 0)
570                 return ret;
571
572         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
573         if (!(flags & PERF_EF_START))
574                 hwc->state |= PERF_HES_ARCH;
575
576         ret = uncore_assign_events(box, assign, n);
577         if (ret)
578                 return ret;
579
580         /* save events moving to new counters */
581         for (i = 0; i < box->n_events; i++) {
582                 event = box->event_list[i];
583                 hwc = &event->hw;
584
585                 if (hwc->idx == assign[i] &&
586                         hwc->last_tag == box->tags[assign[i]])
587                         continue;
588                 /*
589                  * Ensure we don't accidentally enable a stopped
590                  * counter simply because we rescheduled.
591                  */
592                 if (hwc->state & PERF_HES_STOPPED)
593                         hwc->state |= PERF_HES_ARCH;
594
595                 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
596         }
597
598         /* reprogram moved events into new counters */
599         for (i = 0; i < n; i++) {
600                 event = box->event_list[i];
601                 hwc = &event->hw;
602
603                 if (hwc->idx != assign[i] ||
604                         hwc->last_tag != box->tags[assign[i]])
605                         uncore_assign_hw_event(box, event, assign[i]);
606                 else if (i < box->n_events)
607                         continue;
608
609                 if (hwc->state & PERF_HES_ARCH)
610                         continue;
611
612                 uncore_pmu_event_start(event, 0);
613         }
614         box->n_events = n;
615
616         return 0;
617 }
618
619 void uncore_pmu_event_del(struct perf_event *event, int flags)
620 {
621         struct intel_uncore_box *box = uncore_event_to_box(event);
622         int i;
623
624         uncore_pmu_event_stop(event, PERF_EF_UPDATE);
625
626         /*
627          * The event for free running counter is not tracked by event_list.
628          * It doesn't need to force event->hw.idx = -1 to reassign the counter.
629          * Because the event and the free running counter are 1:1 mapped.
630          */
631         if (uncore_pmc_freerunning(event->hw.idx))
632                 return;
633
634         for (i = 0; i < box->n_events; i++) {
635                 if (event == box->event_list[i]) {
636                         uncore_put_event_constraint(box, event);
637
638                         for (++i; i < box->n_events; i++)
639                                 box->event_list[i - 1] = box->event_list[i];
640
641                         --box->n_events;
642                         break;
643                 }
644         }
645
646         event->hw.idx = -1;
647         event->hw.last_tag = ~0ULL;
648 }
649
650 void uncore_pmu_event_read(struct perf_event *event)
651 {
652         struct intel_uncore_box *box = uncore_event_to_box(event);
653         uncore_perf_event_update(box, event);
654 }
655
656 /*
657  * validation ensures the group can be loaded onto the
658  * PMU if it was the only group available.
659  */
660 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
661                                 struct perf_event *event)
662 {
663         struct perf_event *leader = event->group_leader;
664         struct intel_uncore_box *fake_box;
665         int ret = -EINVAL, n;
666
667         /* The free running counter is always active. */
668         if (uncore_pmc_freerunning(event->hw.idx))
669                 return 0;
670
671         fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
672         if (!fake_box)
673                 return -ENOMEM;
674
675         fake_box->pmu = pmu;
676         /*
677          * the event is not yet connected with its
678          * siblings therefore we must first collect
679          * existing siblings, then add the new event
680          * before we can simulate the scheduling
681          */
682         n = uncore_collect_events(fake_box, leader, true);
683         if (n < 0)
684                 goto out;
685
686         fake_box->n_events = n;
687         n = uncore_collect_events(fake_box, event, false);
688         if (n < 0)
689                 goto out;
690
691         fake_box->n_events = n;
692
693         ret = uncore_assign_events(fake_box, NULL, n);
694 out:
695         kfree(fake_box);
696         return ret;
697 }
698
699 static int uncore_pmu_event_init(struct perf_event *event)
700 {
701         struct intel_uncore_pmu *pmu;
702         struct intel_uncore_box *box;
703         struct hw_perf_event *hwc = &event->hw;
704         int ret;
705
706         if (event->attr.type != event->pmu->type)
707                 return -ENOENT;
708
709         pmu = uncore_event_to_pmu(event);
710         /* no device found for this pmu */
711         if (pmu->func_id < 0)
712                 return -ENOENT;
713
714         /* Sampling not supported yet */
715         if (hwc->sample_period)
716                 return -EINVAL;
717
718         /*
719          * Place all uncore events for a particular physical package
720          * onto a single cpu
721          */
722         if (event->cpu < 0)
723                 return -EINVAL;
724         box = uncore_pmu_to_box(pmu, event->cpu);
725         if (!box || box->cpu < 0)
726                 return -EINVAL;
727         event->cpu = box->cpu;
728         event->pmu_private = box;
729
730         event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
731
732         event->hw.idx = -1;
733         event->hw.last_tag = ~0ULL;
734         event->hw.extra_reg.idx = EXTRA_REG_NONE;
735         event->hw.branch_reg.idx = EXTRA_REG_NONE;
736
737         if (event->attr.config == UNCORE_FIXED_EVENT) {
738                 /* no fixed counter */
739                 if (!pmu->type->fixed_ctl)
740                         return -EINVAL;
741                 /*
742                  * if there is only one fixed counter, only the first pmu
743                  * can access the fixed counter
744                  */
745                 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
746                         return -EINVAL;
747
748                 /* fixed counters have event field hardcoded to zero */
749                 hwc->config = 0ULL;
750         } else if (is_freerunning_event(event)) {
751                 hwc->config = event->attr.config;
752                 if (!check_valid_freerunning_event(box, event))
753                         return -EINVAL;
754                 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
755                 /*
756                  * The free running counter event and free running counter
757                  * are always 1:1 mapped.
758                  * The free running counter is always active.
759                  * Assign the free running counter here.
760                  */
761                 event->hw.event_base = uncore_freerunning_counter(box, event);
762         } else {
763                 hwc->config = event->attr.config &
764                               (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
765                 if (pmu->type->ops->hw_config) {
766                         ret = pmu->type->ops->hw_config(box, event);
767                         if (ret)
768                                 return ret;
769                 }
770         }
771
772         if (event->group_leader != event)
773                 ret = uncore_validate_group(pmu, event);
774         else
775                 ret = 0;
776
777         return ret;
778 }
779
780 static void uncore_pmu_enable(struct pmu *pmu)
781 {
782         struct intel_uncore_pmu *uncore_pmu;
783         struct intel_uncore_box *box;
784
785         uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
786         if (!uncore_pmu)
787                 return;
788
789         box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
790         if (!box)
791                 return;
792
793         if (uncore_pmu->type->ops->enable_box)
794                 uncore_pmu->type->ops->enable_box(box);
795 }
796
797 static void uncore_pmu_disable(struct pmu *pmu)
798 {
799         struct intel_uncore_pmu *uncore_pmu;
800         struct intel_uncore_box *box;
801
802         uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
803         if (!uncore_pmu)
804                 return;
805
806         box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
807         if (!box)
808                 return;
809
810         if (uncore_pmu->type->ops->disable_box)
811                 uncore_pmu->type->ops->disable_box(box);
812 }
813
814 static ssize_t uncore_get_attr_cpumask(struct device *dev,
815                                 struct device_attribute *attr, char *buf)
816 {
817         return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
818 }
819
820 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
821
822 static struct attribute *uncore_pmu_attrs[] = {
823         &dev_attr_cpumask.attr,
824         NULL,
825 };
826
827 static const struct attribute_group uncore_pmu_attr_group = {
828         .attrs = uncore_pmu_attrs,
829 };
830
831 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
832 {
833         int ret;
834
835         if (!pmu->type->pmu) {
836                 pmu->pmu = (struct pmu) {
837                         .attr_groups    = pmu->type->attr_groups,
838                         .task_ctx_nr    = perf_invalid_context,
839                         .pmu_enable     = uncore_pmu_enable,
840                         .pmu_disable    = uncore_pmu_disable,
841                         .event_init     = uncore_pmu_event_init,
842                         .add            = uncore_pmu_event_add,
843                         .del            = uncore_pmu_event_del,
844                         .start          = uncore_pmu_event_start,
845                         .stop           = uncore_pmu_event_stop,
846                         .read           = uncore_pmu_event_read,
847                         .module         = THIS_MODULE,
848                         .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
849                         .attr_update    = pmu->type->attr_update,
850                 };
851         } else {
852                 pmu->pmu = *pmu->type->pmu;
853                 pmu->pmu.attr_groups = pmu->type->attr_groups;
854                 pmu->pmu.attr_update = pmu->type->attr_update;
855         }
856
857         if (pmu->type->num_boxes == 1) {
858                 if (strlen(pmu->type->name) > 0)
859                         sprintf(pmu->name, "uncore_%s", pmu->type->name);
860                 else
861                         sprintf(pmu->name, "uncore");
862         } else {
863                 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
864                         pmu->pmu_idx);
865         }
866
867         ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
868         if (!ret)
869                 pmu->registered = true;
870         return ret;
871 }
872
873 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
874 {
875         if (!pmu->registered)
876                 return;
877         perf_pmu_unregister(&pmu->pmu);
878         pmu->registered = false;
879 }
880
881 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
882 {
883         int die;
884
885         for (die = 0; die < uncore_max_dies(); die++)
886                 kfree(pmu->boxes[die]);
887         kfree(pmu->boxes);
888 }
889
890 static void uncore_type_exit(struct intel_uncore_type *type)
891 {
892         struct intel_uncore_pmu *pmu = type->pmus;
893         int i;
894
895         if (type->cleanup_mapping)
896                 type->cleanup_mapping(type);
897
898         if (pmu) {
899                 for (i = 0; i < type->num_boxes; i++, pmu++) {
900                         uncore_pmu_unregister(pmu);
901                         uncore_free_boxes(pmu);
902                 }
903                 kfree(type->pmus);
904                 type->pmus = NULL;
905         }
906         kfree(type->events_group);
907         type->events_group = NULL;
908 }
909
910 static void uncore_types_exit(struct intel_uncore_type **types)
911 {
912         for (; *types; types++)
913                 uncore_type_exit(*types);
914 }
915
916 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
917 {
918         struct intel_uncore_pmu *pmus;
919         size_t size;
920         int i, j;
921
922         pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
923         if (!pmus)
924                 return -ENOMEM;
925
926         size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
927
928         for (i = 0; i < type->num_boxes; i++) {
929                 pmus[i].func_id = setid ? i : -1;
930                 pmus[i].pmu_idx = i;
931                 pmus[i].type    = type;
932                 pmus[i].boxes   = kzalloc(size, GFP_KERNEL);
933                 if (!pmus[i].boxes)
934                         goto err;
935         }
936
937         type->pmus = pmus;
938         type->unconstrainted = (struct event_constraint)
939                 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
940                                 0, type->num_counters, 0, 0);
941
942         if (type->event_descs) {
943                 struct {
944                         struct attribute_group group;
945                         struct attribute *attrs[];
946                 } *attr_group;
947                 for (i = 0; type->event_descs[i].attr.attr.name; i++);
948
949                 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
950                                                                 GFP_KERNEL);
951                 if (!attr_group)
952                         goto err;
953
954                 attr_group->group.name = "events";
955                 attr_group->group.attrs = attr_group->attrs;
956
957                 for (j = 0; j < i; j++)
958                         attr_group->attrs[j] = &type->event_descs[j].attr.attr;
959
960                 type->events_group = &attr_group->group;
961         }
962
963         type->pmu_group = &uncore_pmu_attr_group;
964
965         if (type->set_mapping)
966                 type->set_mapping(type);
967
968         return 0;
969
970 err:
971         for (i = 0; i < type->num_boxes; i++)
972                 kfree(pmus[i].boxes);
973         kfree(pmus);
974
975         return -ENOMEM;
976 }
977
978 static int __init
979 uncore_types_init(struct intel_uncore_type **types, bool setid)
980 {
981         int ret;
982
983         for (; *types; types++) {
984                 ret = uncore_type_init(*types, setid);
985                 if (ret)
986                         return ret;
987         }
988         return 0;
989 }
990
991 /*
992  * Get the die information of a PCI device.
993  * @pdev: The PCI device.
994  * @phys_id: The physical socket id which the device maps to.
995  * @die: The die id which the device maps to.
996  */
997 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev,
998                                        int *phys_id, int *die)
999 {
1000         *phys_id = uncore_pcibus_to_physid(pdev->bus);
1001         if (*phys_id < 0)
1002                 return -ENODEV;
1003
1004         *die = (topology_max_die_per_package() > 1) ? *phys_id :
1005                                 topology_phys_to_logical_pkg(*phys_id);
1006         if (*die < 0)
1007                 return -EINVAL;
1008
1009         return 0;
1010 }
1011
1012 /*
1013  * Find the PMU of a PCI device.
1014  * @pdev: The PCI device.
1015  * @ids: The ID table of the available PCI devices with a PMU.
1016  */
1017 static struct intel_uncore_pmu *
1018 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1019 {
1020         struct intel_uncore_pmu *pmu = NULL;
1021         struct intel_uncore_type *type;
1022         kernel_ulong_t data;
1023         unsigned int devfn;
1024
1025         while (ids && ids->vendor) {
1026                 if ((ids->vendor == pdev->vendor) &&
1027                     (ids->device == pdev->device)) {
1028                         data = ids->driver_data;
1029                         devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1030                                           UNCORE_PCI_DEV_FUNC(data));
1031                         if (devfn == pdev->devfn) {
1032                                 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1033                                 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1034                                 break;
1035                         }
1036                 }
1037                 ids++;
1038         }
1039         return pmu;
1040 }
1041
1042 /*
1043  * Register the PMU for a PCI device
1044  * @pdev: The PCI device.
1045  * @type: The corresponding PMU type of the device.
1046  * @pmu: The corresponding PMU of the device.
1047  * @phys_id: The physical socket id which the device maps to.
1048  * @die: The die id which the device maps to.
1049  */
1050 static int uncore_pci_pmu_register(struct pci_dev *pdev,
1051                                    struct intel_uncore_type *type,
1052                                    struct intel_uncore_pmu *pmu,
1053                                    int phys_id, int die)
1054 {
1055         struct intel_uncore_box *box;
1056         int ret;
1057
1058         if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1059                 return -EINVAL;
1060
1061         box = uncore_alloc_box(type, NUMA_NO_NODE);
1062         if (!box)
1063                 return -ENOMEM;
1064
1065         if (pmu->func_id < 0)
1066                 pmu->func_id = pdev->devfn;
1067         else
1068                 WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1069
1070         atomic_inc(&box->refcnt);
1071         box->pci_phys_id = phys_id;
1072         box->dieid = die;
1073         box->pci_dev = pdev;
1074         box->pmu = pmu;
1075         uncore_box_init(box);
1076
1077         pmu->boxes[die] = box;
1078         if (atomic_inc_return(&pmu->activeboxes) > 1)
1079                 return 0;
1080
1081         /* First active box registers the pmu */
1082         ret = uncore_pmu_register(pmu);
1083         if (ret) {
1084                 pmu->boxes[die] = NULL;
1085                 uncore_box_exit(box);
1086                 kfree(box);
1087         }
1088         return ret;
1089 }
1090
1091 /*
1092  * add a pci uncore device
1093  */
1094 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1095 {
1096         struct intel_uncore_type *type;
1097         struct intel_uncore_pmu *pmu = NULL;
1098         int phys_id, die, ret;
1099
1100         ret = uncore_pci_get_dev_die_info(pdev, &phys_id, &die);
1101         if (ret)
1102                 return ret;
1103
1104         if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1105                 int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1106
1107                 uncore_extra_pci_dev[die].dev[idx] = pdev;
1108                 pci_set_drvdata(pdev, NULL);
1109                 return 0;
1110         }
1111
1112         type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1113
1114         /*
1115          * Some platforms, e.g.  Knights Landing, use a common PCI device ID
1116          * for multiple instances of an uncore PMU device type. We should check
1117          * PCI slot and func to indicate the uncore box.
1118          */
1119         if (id->driver_data & ~0xffff) {
1120                 struct pci_driver *pci_drv = pdev->driver;
1121
1122                 pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1123                 if (pmu == NULL)
1124                         return -ENODEV;
1125         } else {
1126                 /*
1127                  * for performance monitoring unit with multiple boxes,
1128                  * each box has a different function id.
1129                  */
1130                 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1131         }
1132
1133         ret = uncore_pci_pmu_register(pdev, type, pmu, phys_id, die);
1134
1135         pci_set_drvdata(pdev, pmu->boxes[die]);
1136
1137         return ret;
1138 }
1139
1140 static void uncore_pci_remove(struct pci_dev *pdev)
1141 {
1142         struct intel_uncore_box *box;
1143         struct intel_uncore_pmu *pmu;
1144         int i, phys_id, die;
1145
1146         phys_id = uncore_pcibus_to_physid(pdev->bus);
1147
1148         box = pci_get_drvdata(pdev);
1149         if (!box) {
1150                 die = (topology_max_die_per_package() > 1) ? phys_id :
1151                                         topology_phys_to_logical_pkg(phys_id);
1152                 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1153                         if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1154                                 uncore_extra_pci_dev[die].dev[i] = NULL;
1155                                 break;
1156                         }
1157                 }
1158                 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1159                 return;
1160         }
1161
1162         pmu = box->pmu;
1163         if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
1164                 return;
1165
1166         pci_set_drvdata(pdev, NULL);
1167         pmu->boxes[box->dieid] = NULL;
1168         if (atomic_dec_return(&pmu->activeboxes) == 0)
1169                 uncore_pmu_unregister(pmu);
1170         uncore_box_exit(box);
1171         kfree(box);
1172 }
1173
1174 static int __init uncore_pci_init(void)
1175 {
1176         size_t size;
1177         int ret;
1178
1179         size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1180         uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1181         if (!uncore_extra_pci_dev) {
1182                 ret = -ENOMEM;
1183                 goto err;
1184         }
1185
1186         ret = uncore_types_init(uncore_pci_uncores, false);
1187         if (ret)
1188                 goto errtype;
1189
1190         uncore_pci_driver->probe = uncore_pci_probe;
1191         uncore_pci_driver->remove = uncore_pci_remove;
1192
1193         ret = pci_register_driver(uncore_pci_driver);
1194         if (ret)
1195                 goto errtype;
1196
1197         pcidrv_registered = true;
1198         return 0;
1199
1200 errtype:
1201         uncore_types_exit(uncore_pci_uncores);
1202         kfree(uncore_extra_pci_dev);
1203         uncore_extra_pci_dev = NULL;
1204         uncore_free_pcibus_map();
1205 err:
1206         uncore_pci_uncores = empty_uncore;
1207         return ret;
1208 }
1209
1210 static void uncore_pci_exit(void)
1211 {
1212         if (pcidrv_registered) {
1213                 pcidrv_registered = false;
1214                 pci_unregister_driver(uncore_pci_driver);
1215                 uncore_types_exit(uncore_pci_uncores);
1216                 kfree(uncore_extra_pci_dev);
1217                 uncore_free_pcibus_map();
1218         }
1219 }
1220
1221 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1222                                    int new_cpu)
1223 {
1224         struct intel_uncore_pmu *pmu = type->pmus;
1225         struct intel_uncore_box *box;
1226         int i, die;
1227
1228         die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1229         for (i = 0; i < type->num_boxes; i++, pmu++) {
1230                 box = pmu->boxes[die];
1231                 if (!box)
1232                         continue;
1233
1234                 if (old_cpu < 0) {
1235                         WARN_ON_ONCE(box->cpu != -1);
1236                         box->cpu = new_cpu;
1237                         continue;
1238                 }
1239
1240                 WARN_ON_ONCE(box->cpu != old_cpu);
1241                 box->cpu = -1;
1242                 if (new_cpu < 0)
1243                         continue;
1244
1245                 uncore_pmu_cancel_hrtimer(box);
1246                 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1247                 box->cpu = new_cpu;
1248         }
1249 }
1250
1251 static void uncore_change_context(struct intel_uncore_type **uncores,
1252                                   int old_cpu, int new_cpu)
1253 {
1254         for (; *uncores; uncores++)
1255                 uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1256 }
1257
1258 static void uncore_box_unref(struct intel_uncore_type **types, int id)
1259 {
1260         struct intel_uncore_type *type;
1261         struct intel_uncore_pmu *pmu;
1262         struct intel_uncore_box *box;
1263         int i;
1264
1265         for (; *types; types++) {
1266                 type = *types;
1267                 pmu = type->pmus;
1268                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1269                         box = pmu->boxes[id];
1270                         if (box && atomic_dec_return(&box->refcnt) == 0)
1271                                 uncore_box_exit(box);
1272                 }
1273         }
1274 }
1275
1276 static int uncore_event_cpu_offline(unsigned int cpu)
1277 {
1278         int die, target;
1279
1280         /* Check if exiting cpu is used for collecting uncore events */
1281         if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1282                 goto unref;
1283         /* Find a new cpu to collect uncore events */
1284         target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1285
1286         /* Migrate uncore events to the new target */
1287         if (target < nr_cpu_ids)
1288                 cpumask_set_cpu(target, &uncore_cpu_mask);
1289         else
1290                 target = -1;
1291
1292         uncore_change_context(uncore_msr_uncores, cpu, target);
1293         uncore_change_context(uncore_mmio_uncores, cpu, target);
1294         uncore_change_context(uncore_pci_uncores, cpu, target);
1295
1296 unref:
1297         /* Clear the references */
1298         die = topology_logical_die_id(cpu);
1299         uncore_box_unref(uncore_msr_uncores, die);
1300         uncore_box_unref(uncore_mmio_uncores, die);
1301         return 0;
1302 }
1303
1304 static int allocate_boxes(struct intel_uncore_type **types,
1305                          unsigned int die, unsigned int cpu)
1306 {
1307         struct intel_uncore_box *box, *tmp;
1308         struct intel_uncore_type *type;
1309         struct intel_uncore_pmu *pmu;
1310         LIST_HEAD(allocated);
1311         int i;
1312
1313         /* Try to allocate all required boxes */
1314         for (; *types; types++) {
1315                 type = *types;
1316                 pmu = type->pmus;
1317                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1318                         if (pmu->boxes[die])
1319                                 continue;
1320                         box = uncore_alloc_box(type, cpu_to_node(cpu));
1321                         if (!box)
1322                                 goto cleanup;
1323                         box->pmu = pmu;
1324                         box->dieid = die;
1325                         list_add(&box->active_list, &allocated);
1326                 }
1327         }
1328         /* Install them in the pmus */
1329         list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1330                 list_del_init(&box->active_list);
1331                 box->pmu->boxes[die] = box;
1332         }
1333         return 0;
1334
1335 cleanup:
1336         list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1337                 list_del_init(&box->active_list);
1338                 kfree(box);
1339         }
1340         return -ENOMEM;
1341 }
1342
1343 static int uncore_box_ref(struct intel_uncore_type **types,
1344                           int id, unsigned int cpu)
1345 {
1346         struct intel_uncore_type *type;
1347         struct intel_uncore_pmu *pmu;
1348         struct intel_uncore_box *box;
1349         int i, ret;
1350
1351         ret = allocate_boxes(types, id, cpu);
1352         if (ret)
1353                 return ret;
1354
1355         for (; *types; types++) {
1356                 type = *types;
1357                 pmu = type->pmus;
1358                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1359                         box = pmu->boxes[id];
1360                         if (box && atomic_inc_return(&box->refcnt) == 1)
1361                                 uncore_box_init(box);
1362                 }
1363         }
1364         return 0;
1365 }
1366
1367 static int uncore_event_cpu_online(unsigned int cpu)
1368 {
1369         int die, target, msr_ret, mmio_ret;
1370
1371         die = topology_logical_die_id(cpu);
1372         msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1373         mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1374         if (msr_ret && mmio_ret)
1375                 return -ENOMEM;
1376
1377         /*
1378          * Check if there is an online cpu in the package
1379          * which collects uncore events already.
1380          */
1381         target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1382         if (target < nr_cpu_ids)
1383                 return 0;
1384
1385         cpumask_set_cpu(cpu, &uncore_cpu_mask);
1386
1387         if (!msr_ret)
1388                 uncore_change_context(uncore_msr_uncores, -1, cpu);
1389         if (!mmio_ret)
1390                 uncore_change_context(uncore_mmio_uncores, -1, cpu);
1391         uncore_change_context(uncore_pci_uncores, -1, cpu);
1392         return 0;
1393 }
1394
1395 static int __init type_pmu_register(struct intel_uncore_type *type)
1396 {
1397         int i, ret;
1398
1399         for (i = 0; i < type->num_boxes; i++) {
1400                 ret = uncore_pmu_register(&type->pmus[i]);
1401                 if (ret)
1402                         return ret;
1403         }
1404         return 0;
1405 }
1406
1407 static int __init uncore_msr_pmus_register(void)
1408 {
1409         struct intel_uncore_type **types = uncore_msr_uncores;
1410         int ret;
1411
1412         for (; *types; types++) {
1413                 ret = type_pmu_register(*types);
1414                 if (ret)
1415                         return ret;
1416         }
1417         return 0;
1418 }
1419
1420 static int __init uncore_cpu_init(void)
1421 {
1422         int ret;
1423
1424         ret = uncore_types_init(uncore_msr_uncores, true);
1425         if (ret)
1426                 goto err;
1427
1428         ret = uncore_msr_pmus_register();
1429         if (ret)
1430                 goto err;
1431         return 0;
1432 err:
1433         uncore_types_exit(uncore_msr_uncores);
1434         uncore_msr_uncores = empty_uncore;
1435         return ret;
1436 }
1437
1438 static int __init uncore_mmio_init(void)
1439 {
1440         struct intel_uncore_type **types = uncore_mmio_uncores;
1441         int ret;
1442
1443         ret = uncore_types_init(types, true);
1444         if (ret)
1445                 goto err;
1446
1447         for (; *types; types++) {
1448                 ret = type_pmu_register(*types);
1449                 if (ret)
1450                         goto err;
1451         }
1452         return 0;
1453 err:
1454         uncore_types_exit(uncore_mmio_uncores);
1455         uncore_mmio_uncores = empty_uncore;
1456         return ret;
1457 }
1458
1459 struct intel_uncore_init_fun {
1460         void    (*cpu_init)(void);
1461         int     (*pci_init)(void);
1462         void    (*mmio_init)(void);
1463 };
1464
1465 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1466         .cpu_init = nhm_uncore_cpu_init,
1467 };
1468
1469 static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1470         .cpu_init = snb_uncore_cpu_init,
1471         .pci_init = snb_uncore_pci_init,
1472 };
1473
1474 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1475         .cpu_init = snb_uncore_cpu_init,
1476         .pci_init = ivb_uncore_pci_init,
1477 };
1478
1479 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1480         .cpu_init = snb_uncore_cpu_init,
1481         .pci_init = hsw_uncore_pci_init,
1482 };
1483
1484 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1485         .cpu_init = snb_uncore_cpu_init,
1486         .pci_init = bdw_uncore_pci_init,
1487 };
1488
1489 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1490         .cpu_init = snbep_uncore_cpu_init,
1491         .pci_init = snbep_uncore_pci_init,
1492 };
1493
1494 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1495         .cpu_init = nhmex_uncore_cpu_init,
1496 };
1497
1498 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1499         .cpu_init = ivbep_uncore_cpu_init,
1500         .pci_init = ivbep_uncore_pci_init,
1501 };
1502
1503 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1504         .cpu_init = hswep_uncore_cpu_init,
1505         .pci_init = hswep_uncore_pci_init,
1506 };
1507
1508 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1509         .cpu_init = bdx_uncore_cpu_init,
1510         .pci_init = bdx_uncore_pci_init,
1511 };
1512
1513 static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1514         .cpu_init = knl_uncore_cpu_init,
1515         .pci_init = knl_uncore_pci_init,
1516 };
1517
1518 static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1519         .cpu_init = skl_uncore_cpu_init,
1520         .pci_init = skl_uncore_pci_init,
1521 };
1522
1523 static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1524         .cpu_init = skx_uncore_cpu_init,
1525         .pci_init = skx_uncore_pci_init,
1526 };
1527
1528 static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1529         .cpu_init = icl_uncore_cpu_init,
1530         .pci_init = skl_uncore_pci_init,
1531 };
1532
1533 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1534         .cpu_init = icl_uncore_cpu_init,
1535         .mmio_init = tgl_uncore_mmio_init,
1536 };
1537
1538 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1539         .cpu_init = icl_uncore_cpu_init,
1540         .mmio_init = tgl_l_uncore_mmio_init,
1541 };
1542
1543 static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1544         .cpu_init = icx_uncore_cpu_init,
1545         .pci_init = icx_uncore_pci_init,
1546         .mmio_init = icx_uncore_mmio_init,
1547 };
1548
1549 static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1550         .cpu_init = snr_uncore_cpu_init,
1551         .pci_init = snr_uncore_pci_init,
1552         .mmio_init = snr_uncore_mmio_init,
1553 };
1554
1555 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1556         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &nhm_uncore_init),
1557         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &nhm_uncore_init),
1558         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &nhm_uncore_init),
1559         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &nhm_uncore_init),
1560         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &snb_uncore_init),
1561         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &ivb_uncore_init),
1562         X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &hsw_uncore_init),
1563         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &hsw_uncore_init),
1564         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &hsw_uncore_init),
1565         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &bdw_uncore_init),
1566         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &bdw_uncore_init),
1567         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &snbep_uncore_init),
1568         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &nhmex_uncore_init),
1569         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &nhmex_uncore_init),
1570         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &ivbep_uncore_init),
1571         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &hswep_uncore_init),
1572         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &bdx_uncore_init),
1573         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &bdx_uncore_init),
1574         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &knl_uncore_init),
1575         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &knl_uncore_init),
1576         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &skl_uncore_init),
1577         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &skl_uncore_init),
1578         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &skx_uncore_init),
1579         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &skl_uncore_init),
1580         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &skl_uncore_init),
1581         X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,         &skl_uncore_init),
1582         X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,           &skl_uncore_init),
1583         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           &icl_uncore_init),
1584         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,        &icl_uncore_init),
1585         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,             &icl_uncore_init),
1586         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &icx_uncore_init),
1587         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &icx_uncore_init),
1588         X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         &tgl_l_uncore_init),
1589         X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           &tgl_uncore_init),
1590         X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &snr_uncore_init),
1591         {},
1592 };
1593 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1594
1595 static int __init intel_uncore_init(void)
1596 {
1597         const struct x86_cpu_id *id;
1598         struct intel_uncore_init_fun *uncore_init;
1599         int pret = 0, cret = 0, mret = 0, ret;
1600
1601         id = x86_match_cpu(intel_uncore_match);
1602         if (!id)
1603                 return -ENODEV;
1604
1605         if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1606                 return -ENODEV;
1607
1608         __uncore_max_dies =
1609                 topology_max_packages() * topology_max_die_per_package();
1610
1611         uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1612         if (uncore_init->pci_init) {
1613                 pret = uncore_init->pci_init();
1614                 if (!pret)
1615                         pret = uncore_pci_init();
1616         }
1617
1618         if (uncore_init->cpu_init) {
1619                 uncore_init->cpu_init();
1620                 cret = uncore_cpu_init();
1621         }
1622
1623         if (uncore_init->mmio_init) {
1624                 uncore_init->mmio_init();
1625                 mret = uncore_mmio_init();
1626         }
1627
1628         if (cret && pret && mret)
1629                 return -ENODEV;
1630
1631         /* Install hotplug callbacks to setup the targets for each package */
1632         ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1633                                 "perf/x86/intel/uncore:online",
1634                                 uncore_event_cpu_online,
1635                                 uncore_event_cpu_offline);
1636         if (ret)
1637                 goto err;
1638         return 0;
1639
1640 err:
1641         uncore_types_exit(uncore_msr_uncores);
1642         uncore_types_exit(uncore_mmio_uncores);
1643         uncore_pci_exit();
1644         return ret;
1645 }
1646 module_init(intel_uncore_init);
1647
1648 static void __exit intel_uncore_exit(void)
1649 {
1650         cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1651         uncore_types_exit(uncore_msr_uncores);
1652         uncore_types_exit(uncore_mmio_uncores);
1653         uncore_pci_exit();
1654 }
1655 module_exit(intel_uncore_exit);