1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/module.h>
4 #include <asm/cpu_device_id.h>
5 #include <asm/intel-family.h>
8 static struct intel_uncore_type *empty_uncore[] = { NULL, };
9 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
10 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
11 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
13 static bool pcidrv_registered;
14 struct pci_driver *uncore_pci_driver;
15 /* pci bus to socket mapping */
16 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
17 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
18 struct pci_extra_dev *uncore_extra_pci_dev;
19 int __uncore_max_dies;
21 /* mask of cpus that collect uncore events */
22 static cpumask_t uncore_cpu_mask;
24 /* constraint for the fixed counter */
25 static struct event_constraint uncore_constraint_fixed =
26 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
27 struct event_constraint uncore_constraint_empty =
28 EVENT_CONSTRAINT(0, 0, 0);
30 MODULE_LICENSE("GPL");
32 int uncore_pcibus_to_physid(struct pci_bus *bus)
34 struct pci2phy_map *map;
37 raw_spin_lock(&pci2phy_map_lock);
38 list_for_each_entry(map, &pci2phy_map_head, list) {
39 if (map->segment == pci_domain_nr(bus)) {
40 phys_id = map->pbus_to_physid[bus->number];
44 raw_spin_unlock(&pci2phy_map_lock);
49 static void uncore_free_pcibus_map(void)
51 struct pci2phy_map *map, *tmp;
53 list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
59 struct pci2phy_map *__find_pci2phy_map(int segment)
61 struct pci2phy_map *map, *alloc = NULL;
64 lockdep_assert_held(&pci2phy_map_lock);
67 list_for_each_entry(map, &pci2phy_map_head, list) {
68 if (map->segment == segment)
73 raw_spin_unlock(&pci2phy_map_lock);
74 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
75 raw_spin_lock(&pci2phy_map_lock);
85 map->segment = segment;
86 for (i = 0; i < 256; i++)
87 map->pbus_to_physid[i] = -1;
88 list_add_tail(&map->list, &pci2phy_map_head);
95 ssize_t uncore_event_show(struct kobject *kobj,
96 struct kobj_attribute *attr, char *buf)
98 struct uncore_event_desc *event =
99 container_of(attr, struct uncore_event_desc, attr);
100 return sprintf(buf, "%s", event->config);
103 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
105 unsigned int dieid = topology_logical_die_id(cpu);
108 * The unsigned check also catches the '-1' return value for non
109 * existent mappings in the topology map.
111 return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
114 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
118 rdmsrl(event->hw.event_base, count);
123 void uncore_mmio_exit_box(struct intel_uncore_box *box)
126 iounmap(box->io_addr);
129 u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
130 struct perf_event *event)
135 if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
138 return readq(box->io_addr + event->hw.event_base);
142 * generic get constraint function for shared match/mask registers.
144 struct event_constraint *
145 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
147 struct intel_uncore_extra_reg *er;
148 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
149 struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
154 * reg->alloc can be set due to existing state, so for fake box we
155 * need to ignore this, otherwise we might fail to allocate proper
156 * fake state for this extra reg constraint.
158 if (reg1->idx == EXTRA_REG_NONE ||
159 (!uncore_box_is_fake(box) && reg1->alloc))
162 er = &box->shared_regs[reg1->idx];
163 raw_spin_lock_irqsave(&er->lock, flags);
164 if (!atomic_read(&er->ref) ||
165 (er->config1 == reg1->config && er->config2 == reg2->config)) {
166 atomic_inc(&er->ref);
167 er->config1 = reg1->config;
168 er->config2 = reg2->config;
171 raw_spin_unlock_irqrestore(&er->lock, flags);
174 if (!uncore_box_is_fake(box))
179 return &uncore_constraint_empty;
182 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
184 struct intel_uncore_extra_reg *er;
185 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
188 * Only put constraint if extra reg was actually allocated. Also
189 * takes care of event which do not use an extra shared reg.
191 * Also, if this is a fake box we shouldn't touch any event state
192 * (reg->alloc) and we don't care about leaving inconsistent box
193 * state either since it will be thrown out.
195 if (uncore_box_is_fake(box) || !reg1->alloc)
198 er = &box->shared_regs[reg1->idx];
199 atomic_dec(&er->ref);
203 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
205 struct intel_uncore_extra_reg *er;
209 er = &box->shared_regs[idx];
211 raw_spin_lock_irqsave(&er->lock, flags);
213 raw_spin_unlock_irqrestore(&er->lock, flags);
218 static void uncore_assign_hw_event(struct intel_uncore_box *box,
219 struct perf_event *event, int idx)
221 struct hw_perf_event *hwc = &event->hw;
224 hwc->last_tag = ++box->tags[idx];
226 if (uncore_pmc_fixed(hwc->idx)) {
227 hwc->event_base = uncore_fixed_ctr(box);
228 hwc->config_base = uncore_fixed_ctl(box);
232 hwc->config_base = uncore_event_ctl(box, hwc->idx);
233 hwc->event_base = uncore_perf_ctr(box, hwc->idx);
236 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
238 u64 prev_count, new_count, delta;
241 if (uncore_pmc_freerunning(event->hw.idx))
242 shift = 64 - uncore_freerunning_bits(box, event);
243 else if (uncore_pmc_fixed(event->hw.idx))
244 shift = 64 - uncore_fixed_ctr_bits(box);
246 shift = 64 - uncore_perf_ctr_bits(box);
248 /* the hrtimer might modify the previous event value */
250 prev_count = local64_read(&event->hw.prev_count);
251 new_count = uncore_read_counter(box, event);
252 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
255 delta = (new_count << shift) - (prev_count << shift);
258 local64_add(delta, &event->count);
262 * The overflow interrupt is unavailable for SandyBridge-EP, is broken
263 * for SandyBridge. So we use hrtimer to periodically poll the counter
266 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
268 struct intel_uncore_box *box;
269 struct perf_event *event;
273 box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
274 if (!box->n_active || box->cpu != smp_processor_id())
275 return HRTIMER_NORESTART;
277 * disable local interrupt to prevent uncore_pmu_event_start/stop
278 * to interrupt the update process
280 local_irq_save(flags);
283 * handle boxes with an active event list as opposed to active
286 list_for_each_entry(event, &box->active_list, active_entry) {
287 uncore_perf_event_update(box, event);
290 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
291 uncore_perf_event_update(box, box->events[bit]);
293 local_irq_restore(flags);
295 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
296 return HRTIMER_RESTART;
299 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
301 hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
302 HRTIMER_MODE_REL_PINNED);
305 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
307 hrtimer_cancel(&box->hrtimer);
310 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
312 hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
313 box->hrtimer.function = uncore_pmu_hrtimer;
316 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
319 int i, size, numshared = type->num_shared_regs ;
320 struct intel_uncore_box *box;
322 size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
324 box = kzalloc_node(size, GFP_KERNEL, node);
328 for (i = 0; i < numshared; i++)
329 raw_spin_lock_init(&box->shared_regs[i].lock);
331 uncore_pmu_init_hrtimer(box);
333 box->pci_phys_id = -1;
336 /* set default hrtimer timeout */
337 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
339 INIT_LIST_HEAD(&box->active_list);
345 * Using uncore_pmu_event_init pmu event_init callback
346 * as a detection point for uncore events.
348 static int uncore_pmu_event_init(struct perf_event *event);
350 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
352 return &box->pmu->pmu == event->pmu;
356 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
359 struct perf_event *event;
362 max_count = box->pmu->type->num_counters;
363 if (box->pmu->type->fixed_ctl)
366 if (box->n_events >= max_count)
371 if (is_box_event(box, leader)) {
372 box->event_list[n] = leader;
379 for_each_sibling_event(event, leader) {
380 if (!is_box_event(box, event) ||
381 event->state <= PERF_EVENT_STATE_OFF)
387 box->event_list[n] = event;
393 static struct event_constraint *
394 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
396 struct intel_uncore_type *type = box->pmu->type;
397 struct event_constraint *c;
399 if (type->ops->get_constraint) {
400 c = type->ops->get_constraint(box, event);
405 if (event->attr.config == UNCORE_FIXED_EVENT)
406 return &uncore_constraint_fixed;
408 if (type->constraints) {
409 for_each_event_constraint(c, type->constraints) {
410 if ((event->hw.config & c->cmask) == c->code)
415 return &type->unconstrainted;
418 static void uncore_put_event_constraint(struct intel_uncore_box *box,
419 struct perf_event *event)
421 if (box->pmu->type->ops->put_constraint)
422 box->pmu->type->ops->put_constraint(box, event);
425 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
427 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
428 struct event_constraint *c;
429 int i, wmin, wmax, ret = 0;
430 struct hw_perf_event *hwc;
432 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
434 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
435 c = uncore_get_event_constraint(box, box->event_list[i]);
436 box->event_constraint[i] = c;
437 wmin = min(wmin, c->weight);
438 wmax = max(wmax, c->weight);
441 /* fastpath, try to reuse previous register */
442 for (i = 0; i < n; i++) {
443 hwc = &box->event_list[i]->hw;
444 c = box->event_constraint[i];
450 /* constraint still honored */
451 if (!test_bit(hwc->idx, c->idxmsk))
454 /* not already used */
455 if (test_bit(hwc->idx, used_mask))
458 __set_bit(hwc->idx, used_mask);
460 assign[i] = hwc->idx;
464 ret = perf_assign_events(box->event_constraint, n,
465 wmin, wmax, n, assign);
467 if (!assign || ret) {
468 for (i = 0; i < n; i++)
469 uncore_put_event_constraint(box, box->event_list[i]);
471 return ret ? -EINVAL : 0;
474 void uncore_pmu_event_start(struct perf_event *event, int flags)
476 struct intel_uncore_box *box = uncore_event_to_box(event);
477 int idx = event->hw.idx;
479 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
483 * Free running counter is read-only and always active.
484 * Use the current counter value as start point.
485 * There is no overflow interrupt for free running counter.
486 * Use hrtimer to periodically poll the counter to avoid overflow.
488 if (uncore_pmc_freerunning(event->hw.idx)) {
489 list_add_tail(&event->active_entry, &box->active_list);
490 local64_set(&event->hw.prev_count,
491 uncore_read_counter(box, event));
492 if (box->n_active++ == 0)
493 uncore_pmu_start_hrtimer(box);
497 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
501 box->events[idx] = event;
503 __set_bit(idx, box->active_mask);
505 local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
506 uncore_enable_event(box, event);
508 if (box->n_active == 1)
509 uncore_pmu_start_hrtimer(box);
512 void uncore_pmu_event_stop(struct perf_event *event, int flags)
514 struct intel_uncore_box *box = uncore_event_to_box(event);
515 struct hw_perf_event *hwc = &event->hw;
517 /* Cannot disable free running counter which is read-only */
518 if (uncore_pmc_freerunning(hwc->idx)) {
519 list_del(&event->active_entry);
520 if (--box->n_active == 0)
521 uncore_pmu_cancel_hrtimer(box);
522 uncore_perf_event_update(box, event);
526 if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
527 uncore_disable_event(box, event);
529 box->events[hwc->idx] = NULL;
530 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
531 hwc->state |= PERF_HES_STOPPED;
533 if (box->n_active == 0)
534 uncore_pmu_cancel_hrtimer(box);
537 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
539 * Drain the remaining delta count out of a event
540 * that we are disabling:
542 uncore_perf_event_update(box, event);
543 hwc->state |= PERF_HES_UPTODATE;
547 int uncore_pmu_event_add(struct perf_event *event, int flags)
549 struct intel_uncore_box *box = uncore_event_to_box(event);
550 struct hw_perf_event *hwc = &event->hw;
551 int assign[UNCORE_PMC_IDX_MAX];
558 * The free funning counter is assigned in event_init().
559 * The free running counter event and free running counter
560 * are 1:1 mapped. It doesn't need to be tracked in event_list.
562 if (uncore_pmc_freerunning(hwc->idx)) {
563 if (flags & PERF_EF_START)
564 uncore_pmu_event_start(event, 0);
568 ret = n = uncore_collect_events(box, event, false);
572 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
573 if (!(flags & PERF_EF_START))
574 hwc->state |= PERF_HES_ARCH;
576 ret = uncore_assign_events(box, assign, n);
580 /* save events moving to new counters */
581 for (i = 0; i < box->n_events; i++) {
582 event = box->event_list[i];
585 if (hwc->idx == assign[i] &&
586 hwc->last_tag == box->tags[assign[i]])
589 * Ensure we don't accidentally enable a stopped
590 * counter simply because we rescheduled.
592 if (hwc->state & PERF_HES_STOPPED)
593 hwc->state |= PERF_HES_ARCH;
595 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
598 /* reprogram moved events into new counters */
599 for (i = 0; i < n; i++) {
600 event = box->event_list[i];
603 if (hwc->idx != assign[i] ||
604 hwc->last_tag != box->tags[assign[i]])
605 uncore_assign_hw_event(box, event, assign[i]);
606 else if (i < box->n_events)
609 if (hwc->state & PERF_HES_ARCH)
612 uncore_pmu_event_start(event, 0);
619 void uncore_pmu_event_del(struct perf_event *event, int flags)
621 struct intel_uncore_box *box = uncore_event_to_box(event);
624 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
627 * The event for free running counter is not tracked by event_list.
628 * It doesn't need to force event->hw.idx = -1 to reassign the counter.
629 * Because the event and the free running counter are 1:1 mapped.
631 if (uncore_pmc_freerunning(event->hw.idx))
634 for (i = 0; i < box->n_events; i++) {
635 if (event == box->event_list[i]) {
636 uncore_put_event_constraint(box, event);
638 for (++i; i < box->n_events; i++)
639 box->event_list[i - 1] = box->event_list[i];
647 event->hw.last_tag = ~0ULL;
650 void uncore_pmu_event_read(struct perf_event *event)
652 struct intel_uncore_box *box = uncore_event_to_box(event);
653 uncore_perf_event_update(box, event);
657 * validation ensures the group can be loaded onto the
658 * PMU if it was the only group available.
660 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
661 struct perf_event *event)
663 struct perf_event *leader = event->group_leader;
664 struct intel_uncore_box *fake_box;
665 int ret = -EINVAL, n;
667 /* The free running counter is always active. */
668 if (uncore_pmc_freerunning(event->hw.idx))
671 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
677 * the event is not yet connected with its
678 * siblings therefore we must first collect
679 * existing siblings, then add the new event
680 * before we can simulate the scheduling
682 n = uncore_collect_events(fake_box, leader, true);
686 fake_box->n_events = n;
687 n = uncore_collect_events(fake_box, event, false);
691 fake_box->n_events = n;
693 ret = uncore_assign_events(fake_box, NULL, n);
699 static int uncore_pmu_event_init(struct perf_event *event)
701 struct intel_uncore_pmu *pmu;
702 struct intel_uncore_box *box;
703 struct hw_perf_event *hwc = &event->hw;
706 if (event->attr.type != event->pmu->type)
709 pmu = uncore_event_to_pmu(event);
710 /* no device found for this pmu */
711 if (pmu->func_id < 0)
714 /* Sampling not supported yet */
715 if (hwc->sample_period)
719 * Place all uncore events for a particular physical package
724 box = uncore_pmu_to_box(pmu, event->cpu);
725 if (!box || box->cpu < 0)
727 event->cpu = box->cpu;
728 event->pmu_private = box;
730 event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
733 event->hw.last_tag = ~0ULL;
734 event->hw.extra_reg.idx = EXTRA_REG_NONE;
735 event->hw.branch_reg.idx = EXTRA_REG_NONE;
737 if (event->attr.config == UNCORE_FIXED_EVENT) {
738 /* no fixed counter */
739 if (!pmu->type->fixed_ctl)
742 * if there is only one fixed counter, only the first pmu
743 * can access the fixed counter
745 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
748 /* fixed counters have event field hardcoded to zero */
750 } else if (is_freerunning_event(event)) {
751 hwc->config = event->attr.config;
752 if (!check_valid_freerunning_event(box, event))
754 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
756 * The free running counter event and free running counter
757 * are always 1:1 mapped.
758 * The free running counter is always active.
759 * Assign the free running counter here.
761 event->hw.event_base = uncore_freerunning_counter(box, event);
763 hwc->config = event->attr.config &
764 (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
765 if (pmu->type->ops->hw_config) {
766 ret = pmu->type->ops->hw_config(box, event);
772 if (event->group_leader != event)
773 ret = uncore_validate_group(pmu, event);
780 static void uncore_pmu_enable(struct pmu *pmu)
782 struct intel_uncore_pmu *uncore_pmu;
783 struct intel_uncore_box *box;
785 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
789 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
793 if (uncore_pmu->type->ops->enable_box)
794 uncore_pmu->type->ops->enable_box(box);
797 static void uncore_pmu_disable(struct pmu *pmu)
799 struct intel_uncore_pmu *uncore_pmu;
800 struct intel_uncore_box *box;
802 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
806 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
810 if (uncore_pmu->type->ops->disable_box)
811 uncore_pmu->type->ops->disable_box(box);
814 static ssize_t uncore_get_attr_cpumask(struct device *dev,
815 struct device_attribute *attr, char *buf)
817 return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
820 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
822 static struct attribute *uncore_pmu_attrs[] = {
823 &dev_attr_cpumask.attr,
827 static const struct attribute_group uncore_pmu_attr_group = {
828 .attrs = uncore_pmu_attrs,
831 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
835 if (!pmu->type->pmu) {
836 pmu->pmu = (struct pmu) {
837 .attr_groups = pmu->type->attr_groups,
838 .task_ctx_nr = perf_invalid_context,
839 .pmu_enable = uncore_pmu_enable,
840 .pmu_disable = uncore_pmu_disable,
841 .event_init = uncore_pmu_event_init,
842 .add = uncore_pmu_event_add,
843 .del = uncore_pmu_event_del,
844 .start = uncore_pmu_event_start,
845 .stop = uncore_pmu_event_stop,
846 .read = uncore_pmu_event_read,
847 .module = THIS_MODULE,
848 .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
849 .attr_update = pmu->type->attr_update,
852 pmu->pmu = *pmu->type->pmu;
853 pmu->pmu.attr_groups = pmu->type->attr_groups;
854 pmu->pmu.attr_update = pmu->type->attr_update;
857 if (pmu->type->num_boxes == 1) {
858 if (strlen(pmu->type->name) > 0)
859 sprintf(pmu->name, "uncore_%s", pmu->type->name);
861 sprintf(pmu->name, "uncore");
863 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
867 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
869 pmu->registered = true;
873 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
875 if (!pmu->registered)
877 perf_pmu_unregister(&pmu->pmu);
878 pmu->registered = false;
881 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
885 for (die = 0; die < uncore_max_dies(); die++)
886 kfree(pmu->boxes[die]);
890 static void uncore_type_exit(struct intel_uncore_type *type)
892 struct intel_uncore_pmu *pmu = type->pmus;
895 if (type->cleanup_mapping)
896 type->cleanup_mapping(type);
899 for (i = 0; i < type->num_boxes; i++, pmu++) {
900 uncore_pmu_unregister(pmu);
901 uncore_free_boxes(pmu);
906 kfree(type->events_group);
907 type->events_group = NULL;
910 static void uncore_types_exit(struct intel_uncore_type **types)
912 for (; *types; types++)
913 uncore_type_exit(*types);
916 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
918 struct intel_uncore_pmu *pmus;
922 pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
926 size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
928 for (i = 0; i < type->num_boxes; i++) {
929 pmus[i].func_id = setid ? i : -1;
932 pmus[i].boxes = kzalloc(size, GFP_KERNEL);
938 type->unconstrainted = (struct event_constraint)
939 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
940 0, type->num_counters, 0, 0);
942 if (type->event_descs) {
944 struct attribute_group group;
945 struct attribute *attrs[];
947 for (i = 0; type->event_descs[i].attr.attr.name; i++);
949 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
954 attr_group->group.name = "events";
955 attr_group->group.attrs = attr_group->attrs;
957 for (j = 0; j < i; j++)
958 attr_group->attrs[j] = &type->event_descs[j].attr.attr;
960 type->events_group = &attr_group->group;
963 type->pmu_group = &uncore_pmu_attr_group;
965 if (type->set_mapping)
966 type->set_mapping(type);
971 for (i = 0; i < type->num_boxes; i++)
972 kfree(pmus[i].boxes);
979 uncore_types_init(struct intel_uncore_type **types, bool setid)
983 for (; *types; types++) {
984 ret = uncore_type_init(*types, setid);
992 * add a pci uncore device
994 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
996 struct intel_uncore_type *type;
997 struct intel_uncore_pmu *pmu = NULL;
998 struct intel_uncore_box *box;
999 int phys_id, die, ret;
1001 phys_id = uncore_pcibus_to_physid(pdev->bus);
1005 die = (topology_max_die_per_package() > 1) ? phys_id :
1006 topology_phys_to_logical_pkg(phys_id);
1010 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1011 int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1013 uncore_extra_pci_dev[die].dev[idx] = pdev;
1014 pci_set_drvdata(pdev, NULL);
1018 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1021 * Some platforms, e.g. Knights Landing, use a common PCI device ID
1022 * for multiple instances of an uncore PMU device type. We should check
1023 * PCI slot and func to indicate the uncore box.
1025 if (id->driver_data & ~0xffff) {
1026 struct pci_driver *pci_drv = pdev->driver;
1027 const struct pci_device_id *ids = pci_drv->id_table;
1030 while (ids && ids->vendor) {
1031 if ((ids->vendor == pdev->vendor) &&
1032 (ids->device == pdev->device)) {
1033 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
1034 UNCORE_PCI_DEV_FUNC(ids->driver_data));
1035 if (devfn == pdev->devfn) {
1036 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
1046 * for performance monitoring unit with multiple boxes,
1047 * each box has a different function id.
1049 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1052 if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1055 box = uncore_alloc_box(type, NUMA_NO_NODE);
1059 if (pmu->func_id < 0)
1060 pmu->func_id = pdev->devfn;
1062 WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1064 atomic_inc(&box->refcnt);
1065 box->pci_phys_id = phys_id;
1067 box->pci_dev = pdev;
1069 uncore_box_init(box);
1070 pci_set_drvdata(pdev, box);
1072 pmu->boxes[die] = box;
1073 if (atomic_inc_return(&pmu->activeboxes) > 1)
1076 /* First active box registers the pmu */
1077 ret = uncore_pmu_register(pmu);
1079 pci_set_drvdata(pdev, NULL);
1080 pmu->boxes[die] = NULL;
1081 uncore_box_exit(box);
1087 static void uncore_pci_remove(struct pci_dev *pdev)
1089 struct intel_uncore_box *box;
1090 struct intel_uncore_pmu *pmu;
1091 int i, phys_id, die;
1093 phys_id = uncore_pcibus_to_physid(pdev->bus);
1095 box = pci_get_drvdata(pdev);
1097 die = (topology_max_die_per_package() > 1) ? phys_id :
1098 topology_phys_to_logical_pkg(phys_id);
1099 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1100 if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1101 uncore_extra_pci_dev[die].dev[i] = NULL;
1105 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1110 if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
1113 pci_set_drvdata(pdev, NULL);
1114 pmu->boxes[box->dieid] = NULL;
1115 if (atomic_dec_return(&pmu->activeboxes) == 0)
1116 uncore_pmu_unregister(pmu);
1117 uncore_box_exit(box);
1121 static int __init uncore_pci_init(void)
1126 size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1127 uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1128 if (!uncore_extra_pci_dev) {
1133 ret = uncore_types_init(uncore_pci_uncores, false);
1137 uncore_pci_driver->probe = uncore_pci_probe;
1138 uncore_pci_driver->remove = uncore_pci_remove;
1140 ret = pci_register_driver(uncore_pci_driver);
1144 pcidrv_registered = true;
1148 uncore_types_exit(uncore_pci_uncores);
1149 kfree(uncore_extra_pci_dev);
1150 uncore_extra_pci_dev = NULL;
1151 uncore_free_pcibus_map();
1153 uncore_pci_uncores = empty_uncore;
1157 static void uncore_pci_exit(void)
1159 if (pcidrv_registered) {
1160 pcidrv_registered = false;
1161 pci_unregister_driver(uncore_pci_driver);
1162 uncore_types_exit(uncore_pci_uncores);
1163 kfree(uncore_extra_pci_dev);
1164 uncore_free_pcibus_map();
1168 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1171 struct intel_uncore_pmu *pmu = type->pmus;
1172 struct intel_uncore_box *box;
1175 die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1176 for (i = 0; i < type->num_boxes; i++, pmu++) {
1177 box = pmu->boxes[die];
1182 WARN_ON_ONCE(box->cpu != -1);
1187 WARN_ON_ONCE(box->cpu != old_cpu);
1192 uncore_pmu_cancel_hrtimer(box);
1193 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1198 static void uncore_change_context(struct intel_uncore_type **uncores,
1199 int old_cpu, int new_cpu)
1201 for (; *uncores; uncores++)
1202 uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1205 static void uncore_box_unref(struct intel_uncore_type **types, int id)
1207 struct intel_uncore_type *type;
1208 struct intel_uncore_pmu *pmu;
1209 struct intel_uncore_box *box;
1212 for (; *types; types++) {
1215 for (i = 0; i < type->num_boxes; i++, pmu++) {
1216 box = pmu->boxes[id];
1217 if (box && atomic_dec_return(&box->refcnt) == 0)
1218 uncore_box_exit(box);
1223 static int uncore_event_cpu_offline(unsigned int cpu)
1227 /* Check if exiting cpu is used for collecting uncore events */
1228 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1230 /* Find a new cpu to collect uncore events */
1231 target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1233 /* Migrate uncore events to the new target */
1234 if (target < nr_cpu_ids)
1235 cpumask_set_cpu(target, &uncore_cpu_mask);
1239 uncore_change_context(uncore_msr_uncores, cpu, target);
1240 uncore_change_context(uncore_mmio_uncores, cpu, target);
1241 uncore_change_context(uncore_pci_uncores, cpu, target);
1244 /* Clear the references */
1245 die = topology_logical_die_id(cpu);
1246 uncore_box_unref(uncore_msr_uncores, die);
1247 uncore_box_unref(uncore_mmio_uncores, die);
1251 static int allocate_boxes(struct intel_uncore_type **types,
1252 unsigned int die, unsigned int cpu)
1254 struct intel_uncore_box *box, *tmp;
1255 struct intel_uncore_type *type;
1256 struct intel_uncore_pmu *pmu;
1257 LIST_HEAD(allocated);
1260 /* Try to allocate all required boxes */
1261 for (; *types; types++) {
1264 for (i = 0; i < type->num_boxes; i++, pmu++) {
1265 if (pmu->boxes[die])
1267 box = uncore_alloc_box(type, cpu_to_node(cpu));
1272 list_add(&box->active_list, &allocated);
1275 /* Install them in the pmus */
1276 list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1277 list_del_init(&box->active_list);
1278 box->pmu->boxes[die] = box;
1283 list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1284 list_del_init(&box->active_list);
1290 static int uncore_box_ref(struct intel_uncore_type **types,
1291 int id, unsigned int cpu)
1293 struct intel_uncore_type *type;
1294 struct intel_uncore_pmu *pmu;
1295 struct intel_uncore_box *box;
1298 ret = allocate_boxes(types, id, cpu);
1302 for (; *types; types++) {
1305 for (i = 0; i < type->num_boxes; i++, pmu++) {
1306 box = pmu->boxes[id];
1307 if (box && atomic_inc_return(&box->refcnt) == 1)
1308 uncore_box_init(box);
1314 static int uncore_event_cpu_online(unsigned int cpu)
1316 int die, target, msr_ret, mmio_ret;
1318 die = topology_logical_die_id(cpu);
1319 msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1320 mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1321 if (msr_ret && mmio_ret)
1325 * Check if there is an online cpu in the package
1326 * which collects uncore events already.
1328 target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1329 if (target < nr_cpu_ids)
1332 cpumask_set_cpu(cpu, &uncore_cpu_mask);
1335 uncore_change_context(uncore_msr_uncores, -1, cpu);
1337 uncore_change_context(uncore_mmio_uncores, -1, cpu);
1338 uncore_change_context(uncore_pci_uncores, -1, cpu);
1342 static int __init type_pmu_register(struct intel_uncore_type *type)
1346 for (i = 0; i < type->num_boxes; i++) {
1347 ret = uncore_pmu_register(&type->pmus[i]);
1354 static int __init uncore_msr_pmus_register(void)
1356 struct intel_uncore_type **types = uncore_msr_uncores;
1359 for (; *types; types++) {
1360 ret = type_pmu_register(*types);
1367 static int __init uncore_cpu_init(void)
1371 ret = uncore_types_init(uncore_msr_uncores, true);
1375 ret = uncore_msr_pmus_register();
1380 uncore_types_exit(uncore_msr_uncores);
1381 uncore_msr_uncores = empty_uncore;
1385 static int __init uncore_mmio_init(void)
1387 struct intel_uncore_type **types = uncore_mmio_uncores;
1390 ret = uncore_types_init(types, true);
1394 for (; *types; types++) {
1395 ret = type_pmu_register(*types);
1401 uncore_types_exit(uncore_mmio_uncores);
1402 uncore_mmio_uncores = empty_uncore;
1406 struct intel_uncore_init_fun {
1407 void (*cpu_init)(void);
1408 int (*pci_init)(void);
1409 void (*mmio_init)(void);
1412 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1413 .cpu_init = nhm_uncore_cpu_init,
1416 static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1417 .cpu_init = snb_uncore_cpu_init,
1418 .pci_init = snb_uncore_pci_init,
1421 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1422 .cpu_init = snb_uncore_cpu_init,
1423 .pci_init = ivb_uncore_pci_init,
1426 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1427 .cpu_init = snb_uncore_cpu_init,
1428 .pci_init = hsw_uncore_pci_init,
1431 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1432 .cpu_init = snb_uncore_cpu_init,
1433 .pci_init = bdw_uncore_pci_init,
1436 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1437 .cpu_init = snbep_uncore_cpu_init,
1438 .pci_init = snbep_uncore_pci_init,
1441 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1442 .cpu_init = nhmex_uncore_cpu_init,
1445 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1446 .cpu_init = ivbep_uncore_cpu_init,
1447 .pci_init = ivbep_uncore_pci_init,
1450 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1451 .cpu_init = hswep_uncore_cpu_init,
1452 .pci_init = hswep_uncore_pci_init,
1455 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1456 .cpu_init = bdx_uncore_cpu_init,
1457 .pci_init = bdx_uncore_pci_init,
1460 static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1461 .cpu_init = knl_uncore_cpu_init,
1462 .pci_init = knl_uncore_pci_init,
1465 static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1466 .cpu_init = skl_uncore_cpu_init,
1467 .pci_init = skl_uncore_pci_init,
1470 static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1471 .cpu_init = skx_uncore_cpu_init,
1472 .pci_init = skx_uncore_pci_init,
1475 static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1476 .cpu_init = icl_uncore_cpu_init,
1477 .pci_init = skl_uncore_pci_init,
1480 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1481 .cpu_init = icl_uncore_cpu_init,
1482 .mmio_init = tgl_uncore_mmio_init,
1485 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1486 .cpu_init = icl_uncore_cpu_init,
1487 .mmio_init = tgl_l_uncore_mmio_init,
1490 static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1491 .cpu_init = icx_uncore_cpu_init,
1492 .pci_init = icx_uncore_pci_init,
1493 .mmio_init = icx_uncore_mmio_init,
1496 static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1497 .cpu_init = snr_uncore_cpu_init,
1498 .pci_init = snr_uncore_pci_init,
1499 .mmio_init = snr_uncore_mmio_init,
1502 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1503 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_uncore_init),
1504 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_uncore_init),
1505 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_uncore_init),
1506 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_uncore_init),
1507 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_uncore_init),
1508 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &ivb_uncore_init),
1509 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &hsw_uncore_init),
1510 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hsw_uncore_init),
1511 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &hsw_uncore_init),
1512 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &bdw_uncore_init),
1513 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &bdw_uncore_init),
1514 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snbep_uncore_init),
1515 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhmex_uncore_init),
1516 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhmex_uncore_init),
1517 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ivbep_uncore_init),
1518 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &hswep_uncore_init),
1519 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &bdx_uncore_init),
1520 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &bdx_uncore_init),
1521 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_uncore_init),
1522 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_uncore_init),
1523 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &skl_uncore_init),
1524 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &skl_uncore_init),
1525 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &skx_uncore_init),
1526 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &skl_uncore_init),
1527 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &skl_uncore_init),
1528 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &skl_uncore_init),
1529 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &skl_uncore_init),
1530 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_uncore_init),
1531 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_uncore_init),
1532 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_uncore_init),
1533 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_uncore_init),
1534 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_uncore_init),
1535 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_l_uncore_init),
1536 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_uncore_init),
1537 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
1540 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1542 static int __init intel_uncore_init(void)
1544 const struct x86_cpu_id *id;
1545 struct intel_uncore_init_fun *uncore_init;
1546 int pret = 0, cret = 0, mret = 0, ret;
1548 id = x86_match_cpu(intel_uncore_match);
1552 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1556 topology_max_packages() * topology_max_die_per_package();
1558 uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1559 if (uncore_init->pci_init) {
1560 pret = uncore_init->pci_init();
1562 pret = uncore_pci_init();
1565 if (uncore_init->cpu_init) {
1566 uncore_init->cpu_init();
1567 cret = uncore_cpu_init();
1570 if (uncore_init->mmio_init) {
1571 uncore_init->mmio_init();
1572 mret = uncore_mmio_init();
1575 if (cret && pret && mret)
1578 /* Install hotplug callbacks to setup the targets for each package */
1579 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1580 "perf/x86/intel/uncore:online",
1581 uncore_event_cpu_online,
1582 uncore_event_cpu_offline);
1588 uncore_types_exit(uncore_msr_uncores);
1589 uncore_types_exit(uncore_mmio_uncores);
1593 module_init(intel_uncore_init);
1595 static void __exit intel_uncore_exit(void)
1597 cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1598 uncore_types_exit(uncore_msr_uncores);
1599 uncore_types_exit(uncore_mmio_uncores);
1602 module_exit(intel_uncore_exit);