1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/module.h>
4 #include <asm/cpu_device_id.h>
5 #include <asm/intel-family.h>
8 static struct intel_uncore_type *empty_uncore[] = { NULL, };
9 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
10 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
11 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
13 static bool pcidrv_registered;
14 struct pci_driver *uncore_pci_driver;
15 /* pci bus to socket mapping */
16 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
17 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
18 struct pci_extra_dev *uncore_extra_pci_dev;
19 int __uncore_max_dies;
21 /* mask of cpus that collect uncore events */
22 static cpumask_t uncore_cpu_mask;
24 /* constraint for the fixed counter */
25 static struct event_constraint uncore_constraint_fixed =
26 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
27 struct event_constraint uncore_constraint_empty =
28 EVENT_CONSTRAINT(0, 0, 0);
30 MODULE_LICENSE("GPL");
32 int uncore_pcibus_to_physid(struct pci_bus *bus)
34 struct pci2phy_map *map;
37 raw_spin_lock(&pci2phy_map_lock);
38 list_for_each_entry(map, &pci2phy_map_head, list) {
39 if (map->segment == pci_domain_nr(bus)) {
40 phys_id = map->pbus_to_physid[bus->number];
44 raw_spin_unlock(&pci2phy_map_lock);
49 static void uncore_free_pcibus_map(void)
51 struct pci2phy_map *map, *tmp;
53 list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
59 struct pci2phy_map *__find_pci2phy_map(int segment)
61 struct pci2phy_map *map, *alloc = NULL;
64 lockdep_assert_held(&pci2phy_map_lock);
67 list_for_each_entry(map, &pci2phy_map_head, list) {
68 if (map->segment == segment)
73 raw_spin_unlock(&pci2phy_map_lock);
74 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
75 raw_spin_lock(&pci2phy_map_lock);
85 map->segment = segment;
86 for (i = 0; i < 256; i++)
87 map->pbus_to_physid[i] = -1;
88 list_add_tail(&map->list, &pci2phy_map_head);
95 ssize_t uncore_event_show(struct kobject *kobj,
96 struct kobj_attribute *attr, char *buf)
98 struct uncore_event_desc *event =
99 container_of(attr, struct uncore_event_desc, attr);
100 return sprintf(buf, "%s", event->config);
103 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
105 unsigned int dieid = topology_logical_die_id(cpu);
108 * The unsigned check also catches the '-1' return value for non
109 * existent mappings in the topology map.
111 return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
114 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
118 rdmsrl(event->hw.event_base, count);
123 void uncore_mmio_exit_box(struct intel_uncore_box *box)
126 iounmap(box->io_addr);
129 u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
130 struct perf_event *event)
135 if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
138 return readq(box->io_addr + event->hw.event_base);
142 * generic get constraint function for shared match/mask registers.
144 struct event_constraint *
145 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
147 struct intel_uncore_extra_reg *er;
148 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
149 struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
154 * reg->alloc can be set due to existing state, so for fake box we
155 * need to ignore this, otherwise we might fail to allocate proper
156 * fake state for this extra reg constraint.
158 if (reg1->idx == EXTRA_REG_NONE ||
159 (!uncore_box_is_fake(box) && reg1->alloc))
162 er = &box->shared_regs[reg1->idx];
163 raw_spin_lock_irqsave(&er->lock, flags);
164 if (!atomic_read(&er->ref) ||
165 (er->config1 == reg1->config && er->config2 == reg2->config)) {
166 atomic_inc(&er->ref);
167 er->config1 = reg1->config;
168 er->config2 = reg2->config;
171 raw_spin_unlock_irqrestore(&er->lock, flags);
174 if (!uncore_box_is_fake(box))
179 return &uncore_constraint_empty;
182 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
184 struct intel_uncore_extra_reg *er;
185 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
188 * Only put constraint if extra reg was actually allocated. Also
189 * takes care of event which do not use an extra shared reg.
191 * Also, if this is a fake box we shouldn't touch any event state
192 * (reg->alloc) and we don't care about leaving inconsistent box
193 * state either since it will be thrown out.
195 if (uncore_box_is_fake(box) || !reg1->alloc)
198 er = &box->shared_regs[reg1->idx];
199 atomic_dec(&er->ref);
203 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
205 struct intel_uncore_extra_reg *er;
209 er = &box->shared_regs[idx];
211 raw_spin_lock_irqsave(&er->lock, flags);
213 raw_spin_unlock_irqrestore(&er->lock, flags);
218 static void uncore_assign_hw_event(struct intel_uncore_box *box,
219 struct perf_event *event, int idx)
221 struct hw_perf_event *hwc = &event->hw;
224 hwc->last_tag = ++box->tags[idx];
226 if (uncore_pmc_fixed(hwc->idx)) {
227 hwc->event_base = uncore_fixed_ctr(box);
228 hwc->config_base = uncore_fixed_ctl(box);
232 hwc->config_base = uncore_event_ctl(box, hwc->idx);
233 hwc->event_base = uncore_perf_ctr(box, hwc->idx);
236 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
238 u64 prev_count, new_count, delta;
241 if (uncore_pmc_freerunning(event->hw.idx))
242 shift = 64 - uncore_freerunning_bits(box, event);
243 else if (uncore_pmc_fixed(event->hw.idx))
244 shift = 64 - uncore_fixed_ctr_bits(box);
246 shift = 64 - uncore_perf_ctr_bits(box);
248 /* the hrtimer might modify the previous event value */
250 prev_count = local64_read(&event->hw.prev_count);
251 new_count = uncore_read_counter(box, event);
252 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
255 delta = (new_count << shift) - (prev_count << shift);
258 local64_add(delta, &event->count);
262 * The overflow interrupt is unavailable for SandyBridge-EP, is broken
263 * for SandyBridge. So we use hrtimer to periodically poll the counter
266 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
268 struct intel_uncore_box *box;
269 struct perf_event *event;
273 box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
274 if (!box->n_active || box->cpu != smp_processor_id())
275 return HRTIMER_NORESTART;
277 * disable local interrupt to prevent uncore_pmu_event_start/stop
278 * to interrupt the update process
280 local_irq_save(flags);
283 * handle boxes with an active event list as opposed to active
286 list_for_each_entry(event, &box->active_list, active_entry) {
287 uncore_perf_event_update(box, event);
290 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
291 uncore_perf_event_update(box, box->events[bit]);
293 local_irq_restore(flags);
295 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
296 return HRTIMER_RESTART;
299 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
301 hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
302 HRTIMER_MODE_REL_PINNED);
305 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
307 hrtimer_cancel(&box->hrtimer);
310 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
312 hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
313 box->hrtimer.function = uncore_pmu_hrtimer;
316 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
319 int i, size, numshared = type->num_shared_regs ;
320 struct intel_uncore_box *box;
322 size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
324 box = kzalloc_node(size, GFP_KERNEL, node);
328 for (i = 0; i < numshared; i++)
329 raw_spin_lock_init(&box->shared_regs[i].lock);
331 uncore_pmu_init_hrtimer(box);
333 box->pci_phys_id = -1;
336 /* set default hrtimer timeout */
337 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
339 INIT_LIST_HEAD(&box->active_list);
345 * Using uncore_pmu_event_init pmu event_init callback
346 * as a detection point for uncore events.
348 static int uncore_pmu_event_init(struct perf_event *event);
350 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
352 return &box->pmu->pmu == event->pmu;
356 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
359 struct perf_event *event;
362 max_count = box->pmu->type->num_counters;
363 if (box->pmu->type->fixed_ctl)
366 if (box->n_events >= max_count)
371 if (is_box_event(box, leader)) {
372 box->event_list[n] = leader;
379 for_each_sibling_event(event, leader) {
380 if (!is_box_event(box, event) ||
381 event->state <= PERF_EVENT_STATE_OFF)
387 box->event_list[n] = event;
393 static struct event_constraint *
394 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
396 struct intel_uncore_type *type = box->pmu->type;
397 struct event_constraint *c;
399 if (type->ops->get_constraint) {
400 c = type->ops->get_constraint(box, event);
405 if (event->attr.config == UNCORE_FIXED_EVENT)
406 return &uncore_constraint_fixed;
408 if (type->constraints) {
409 for_each_event_constraint(c, type->constraints) {
410 if ((event->hw.config & c->cmask) == c->code)
415 return &type->unconstrainted;
418 static void uncore_put_event_constraint(struct intel_uncore_box *box,
419 struct perf_event *event)
421 if (box->pmu->type->ops->put_constraint)
422 box->pmu->type->ops->put_constraint(box, event);
425 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
427 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
428 struct event_constraint *c;
429 int i, wmin, wmax, ret = 0;
430 struct hw_perf_event *hwc;
432 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
434 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
435 c = uncore_get_event_constraint(box, box->event_list[i]);
436 box->event_constraint[i] = c;
437 wmin = min(wmin, c->weight);
438 wmax = max(wmax, c->weight);
441 /* fastpath, try to reuse previous register */
442 for (i = 0; i < n; i++) {
443 hwc = &box->event_list[i]->hw;
444 c = box->event_constraint[i];
450 /* constraint still honored */
451 if (!test_bit(hwc->idx, c->idxmsk))
454 /* not already used */
455 if (test_bit(hwc->idx, used_mask))
458 __set_bit(hwc->idx, used_mask);
460 assign[i] = hwc->idx;
464 ret = perf_assign_events(box->event_constraint, n,
465 wmin, wmax, n, assign);
467 if (!assign || ret) {
468 for (i = 0; i < n; i++)
469 uncore_put_event_constraint(box, box->event_list[i]);
471 return ret ? -EINVAL : 0;
474 void uncore_pmu_event_start(struct perf_event *event, int flags)
476 struct intel_uncore_box *box = uncore_event_to_box(event);
477 int idx = event->hw.idx;
479 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
483 * Free running counter is read-only and always active.
484 * Use the current counter value as start point.
485 * There is no overflow interrupt for free running counter.
486 * Use hrtimer to periodically poll the counter to avoid overflow.
488 if (uncore_pmc_freerunning(event->hw.idx)) {
489 list_add_tail(&event->active_entry, &box->active_list);
490 local64_set(&event->hw.prev_count,
491 uncore_read_counter(box, event));
492 if (box->n_active++ == 0)
493 uncore_pmu_start_hrtimer(box);
497 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
501 box->events[idx] = event;
503 __set_bit(idx, box->active_mask);
505 local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
506 uncore_enable_event(box, event);
508 if (box->n_active == 1)
509 uncore_pmu_start_hrtimer(box);
512 void uncore_pmu_event_stop(struct perf_event *event, int flags)
514 struct intel_uncore_box *box = uncore_event_to_box(event);
515 struct hw_perf_event *hwc = &event->hw;
517 /* Cannot disable free running counter which is read-only */
518 if (uncore_pmc_freerunning(hwc->idx)) {
519 list_del(&event->active_entry);
520 if (--box->n_active == 0)
521 uncore_pmu_cancel_hrtimer(box);
522 uncore_perf_event_update(box, event);
526 if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
527 uncore_disable_event(box, event);
529 box->events[hwc->idx] = NULL;
530 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
531 hwc->state |= PERF_HES_STOPPED;
533 if (box->n_active == 0)
534 uncore_pmu_cancel_hrtimer(box);
537 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
539 * Drain the remaining delta count out of a event
540 * that we are disabling:
542 uncore_perf_event_update(box, event);
543 hwc->state |= PERF_HES_UPTODATE;
547 int uncore_pmu_event_add(struct perf_event *event, int flags)
549 struct intel_uncore_box *box = uncore_event_to_box(event);
550 struct hw_perf_event *hwc = &event->hw;
551 int assign[UNCORE_PMC_IDX_MAX];
558 * The free funning counter is assigned in event_init().
559 * The free running counter event and free running counter
560 * are 1:1 mapped. It doesn't need to be tracked in event_list.
562 if (uncore_pmc_freerunning(hwc->idx)) {
563 if (flags & PERF_EF_START)
564 uncore_pmu_event_start(event, 0);
568 ret = n = uncore_collect_events(box, event, false);
572 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
573 if (!(flags & PERF_EF_START))
574 hwc->state |= PERF_HES_ARCH;
576 ret = uncore_assign_events(box, assign, n);
580 /* save events moving to new counters */
581 for (i = 0; i < box->n_events; i++) {
582 event = box->event_list[i];
585 if (hwc->idx == assign[i] &&
586 hwc->last_tag == box->tags[assign[i]])
589 * Ensure we don't accidentally enable a stopped
590 * counter simply because we rescheduled.
592 if (hwc->state & PERF_HES_STOPPED)
593 hwc->state |= PERF_HES_ARCH;
595 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
598 /* reprogram moved events into new counters */
599 for (i = 0; i < n; i++) {
600 event = box->event_list[i];
603 if (hwc->idx != assign[i] ||
604 hwc->last_tag != box->tags[assign[i]])
605 uncore_assign_hw_event(box, event, assign[i]);
606 else if (i < box->n_events)
609 if (hwc->state & PERF_HES_ARCH)
612 uncore_pmu_event_start(event, 0);
619 void uncore_pmu_event_del(struct perf_event *event, int flags)
621 struct intel_uncore_box *box = uncore_event_to_box(event);
624 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
627 * The event for free running counter is not tracked by event_list.
628 * It doesn't need to force event->hw.idx = -1 to reassign the counter.
629 * Because the event and the free running counter are 1:1 mapped.
631 if (uncore_pmc_freerunning(event->hw.idx))
634 for (i = 0; i < box->n_events; i++) {
635 if (event == box->event_list[i]) {
636 uncore_put_event_constraint(box, event);
638 for (++i; i < box->n_events; i++)
639 box->event_list[i - 1] = box->event_list[i];
647 event->hw.last_tag = ~0ULL;
650 void uncore_pmu_event_read(struct perf_event *event)
652 struct intel_uncore_box *box = uncore_event_to_box(event);
653 uncore_perf_event_update(box, event);
657 * validation ensures the group can be loaded onto the
658 * PMU if it was the only group available.
660 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
661 struct perf_event *event)
663 struct perf_event *leader = event->group_leader;
664 struct intel_uncore_box *fake_box;
665 int ret = -EINVAL, n;
667 /* The free running counter is always active. */
668 if (uncore_pmc_freerunning(event->hw.idx))
671 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
677 * the event is not yet connected with its
678 * siblings therefore we must first collect
679 * existing siblings, then add the new event
680 * before we can simulate the scheduling
682 n = uncore_collect_events(fake_box, leader, true);
686 fake_box->n_events = n;
687 n = uncore_collect_events(fake_box, event, false);
691 fake_box->n_events = n;
693 ret = uncore_assign_events(fake_box, NULL, n);
699 static int uncore_pmu_event_init(struct perf_event *event)
701 struct intel_uncore_pmu *pmu;
702 struct intel_uncore_box *box;
703 struct hw_perf_event *hwc = &event->hw;
706 if (event->attr.type != event->pmu->type)
709 pmu = uncore_event_to_pmu(event);
710 /* no device found for this pmu */
711 if (pmu->func_id < 0)
714 /* Sampling not supported yet */
715 if (hwc->sample_period)
719 * Place all uncore events for a particular physical package
724 box = uncore_pmu_to_box(pmu, event->cpu);
725 if (!box || box->cpu < 0)
727 event->cpu = box->cpu;
728 event->pmu_private = box;
730 event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
733 event->hw.last_tag = ~0ULL;
734 event->hw.extra_reg.idx = EXTRA_REG_NONE;
735 event->hw.branch_reg.idx = EXTRA_REG_NONE;
737 if (event->attr.config == UNCORE_FIXED_EVENT) {
738 /* no fixed counter */
739 if (!pmu->type->fixed_ctl)
742 * if there is only one fixed counter, only the first pmu
743 * can access the fixed counter
745 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
748 /* fixed counters have event field hardcoded to zero */
750 } else if (is_freerunning_event(event)) {
751 hwc->config = event->attr.config;
752 if (!check_valid_freerunning_event(box, event))
754 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
756 * The free running counter event and free running counter
757 * are always 1:1 mapped.
758 * The free running counter is always active.
759 * Assign the free running counter here.
761 event->hw.event_base = uncore_freerunning_counter(box, event);
763 hwc->config = event->attr.config &
764 (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
765 if (pmu->type->ops->hw_config) {
766 ret = pmu->type->ops->hw_config(box, event);
772 if (event->group_leader != event)
773 ret = uncore_validate_group(pmu, event);
780 static void uncore_pmu_enable(struct pmu *pmu)
782 struct intel_uncore_pmu *uncore_pmu;
783 struct intel_uncore_box *box;
785 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
789 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
793 if (uncore_pmu->type->ops->enable_box)
794 uncore_pmu->type->ops->enable_box(box);
797 static void uncore_pmu_disable(struct pmu *pmu)
799 struct intel_uncore_pmu *uncore_pmu;
800 struct intel_uncore_box *box;
802 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
806 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
810 if (uncore_pmu->type->ops->disable_box)
811 uncore_pmu->type->ops->disable_box(box);
814 static ssize_t uncore_get_attr_cpumask(struct device *dev,
815 struct device_attribute *attr, char *buf)
817 return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
820 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
822 static struct attribute *uncore_pmu_attrs[] = {
823 &dev_attr_cpumask.attr,
827 static const struct attribute_group uncore_pmu_attr_group = {
828 .attrs = uncore_pmu_attrs,
831 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
835 if (!pmu->type->pmu) {
836 pmu->pmu = (struct pmu) {
837 .attr_groups = pmu->type->attr_groups,
838 .task_ctx_nr = perf_invalid_context,
839 .pmu_enable = uncore_pmu_enable,
840 .pmu_disable = uncore_pmu_disable,
841 .event_init = uncore_pmu_event_init,
842 .add = uncore_pmu_event_add,
843 .del = uncore_pmu_event_del,
844 .start = uncore_pmu_event_start,
845 .stop = uncore_pmu_event_stop,
846 .read = uncore_pmu_event_read,
847 .module = THIS_MODULE,
848 .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
849 .attr_update = pmu->type->attr_update,
852 pmu->pmu = *pmu->type->pmu;
853 pmu->pmu.attr_groups = pmu->type->attr_groups;
854 pmu->pmu.attr_update = pmu->type->attr_update;
857 if (pmu->type->num_boxes == 1) {
858 if (strlen(pmu->type->name) > 0)
859 sprintf(pmu->name, "uncore_%s", pmu->type->name);
861 sprintf(pmu->name, "uncore");
863 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
867 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
869 pmu->registered = true;
873 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
875 if (!pmu->registered)
877 perf_pmu_unregister(&pmu->pmu);
878 pmu->registered = false;
881 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
885 for (die = 0; die < uncore_max_dies(); die++)
886 kfree(pmu->boxes[die]);
890 static void uncore_type_exit(struct intel_uncore_type *type)
892 struct intel_uncore_pmu *pmu = type->pmus;
895 if (type->cleanup_mapping)
896 type->cleanup_mapping(type);
899 for (i = 0; i < type->num_boxes; i++, pmu++) {
900 uncore_pmu_unregister(pmu);
901 uncore_free_boxes(pmu);
906 kfree(type->events_group);
907 type->events_group = NULL;
910 static void uncore_types_exit(struct intel_uncore_type **types)
912 for (; *types; types++)
913 uncore_type_exit(*types);
916 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
918 struct intel_uncore_pmu *pmus;
922 pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
926 size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
928 for (i = 0; i < type->num_boxes; i++) {
929 pmus[i].func_id = setid ? i : -1;
932 pmus[i].boxes = kzalloc(size, GFP_KERNEL);
938 type->unconstrainted = (struct event_constraint)
939 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
940 0, type->num_counters, 0, 0);
942 if (type->event_descs) {
944 struct attribute_group group;
945 struct attribute *attrs[];
947 for (i = 0; type->event_descs[i].attr.attr.name; i++);
949 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
954 attr_group->group.name = "events";
955 attr_group->group.attrs = attr_group->attrs;
957 for (j = 0; j < i; j++)
958 attr_group->attrs[j] = &type->event_descs[j].attr.attr;
960 type->events_group = &attr_group->group;
963 type->pmu_group = &uncore_pmu_attr_group;
965 if (type->set_mapping)
966 type->set_mapping(type);
971 for (i = 0; i < type->num_boxes; i++)
972 kfree(pmus[i].boxes);
979 uncore_types_init(struct intel_uncore_type **types, bool setid)
983 for (; *types; types++) {
984 ret = uncore_type_init(*types, setid);
992 * Get the die information of a PCI device.
993 * @pdev: The PCI device.
994 * @phys_id: The physical socket id which the device maps to.
995 * @die: The die id which the device maps to.
997 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev,
998 int *phys_id, int *die)
1000 *phys_id = uncore_pcibus_to_physid(pdev->bus);
1004 *die = (topology_max_die_per_package() > 1) ? *phys_id :
1005 topology_phys_to_logical_pkg(*phys_id);
1013 * Find the PMU of a PCI device.
1014 * @pdev: The PCI device.
1015 * @ids: The ID table of the available PCI devices with a PMU.
1017 static struct intel_uncore_pmu *
1018 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1020 struct intel_uncore_pmu *pmu = NULL;
1021 struct intel_uncore_type *type;
1022 kernel_ulong_t data;
1025 while (ids && ids->vendor) {
1026 if ((ids->vendor == pdev->vendor) &&
1027 (ids->device == pdev->device)) {
1028 data = ids->driver_data;
1029 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1030 UNCORE_PCI_DEV_FUNC(data));
1031 if (devfn == pdev->devfn) {
1032 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1033 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1043 * Register the PMU for a PCI device
1044 * @pdev: The PCI device.
1045 * @type: The corresponding PMU type of the device.
1046 * @pmu: The corresponding PMU of the device.
1047 * @phys_id: The physical socket id which the device maps to.
1048 * @die: The die id which the device maps to.
1050 static int uncore_pci_pmu_register(struct pci_dev *pdev,
1051 struct intel_uncore_type *type,
1052 struct intel_uncore_pmu *pmu,
1053 int phys_id, int die)
1055 struct intel_uncore_box *box;
1058 if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1061 box = uncore_alloc_box(type, NUMA_NO_NODE);
1065 if (pmu->func_id < 0)
1066 pmu->func_id = pdev->devfn;
1068 WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1070 atomic_inc(&box->refcnt);
1071 box->pci_phys_id = phys_id;
1073 box->pci_dev = pdev;
1075 uncore_box_init(box);
1077 pmu->boxes[die] = box;
1078 if (atomic_inc_return(&pmu->activeboxes) > 1)
1081 /* First active box registers the pmu */
1082 ret = uncore_pmu_register(pmu);
1084 pmu->boxes[die] = NULL;
1085 uncore_box_exit(box);
1092 * add a pci uncore device
1094 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1096 struct intel_uncore_type *type;
1097 struct intel_uncore_pmu *pmu = NULL;
1098 int phys_id, die, ret;
1100 ret = uncore_pci_get_dev_die_info(pdev, &phys_id, &die);
1104 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1105 int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1107 uncore_extra_pci_dev[die].dev[idx] = pdev;
1108 pci_set_drvdata(pdev, NULL);
1112 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1115 * Some platforms, e.g. Knights Landing, use a common PCI device ID
1116 * for multiple instances of an uncore PMU device type. We should check
1117 * PCI slot and func to indicate the uncore box.
1119 if (id->driver_data & ~0xffff) {
1120 struct pci_driver *pci_drv = pdev->driver;
1122 pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1127 * for performance monitoring unit with multiple boxes,
1128 * each box has a different function id.
1130 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1133 ret = uncore_pci_pmu_register(pdev, type, pmu, phys_id, die);
1135 pci_set_drvdata(pdev, pmu->boxes[die]);
1140 static void uncore_pci_remove(struct pci_dev *pdev)
1142 struct intel_uncore_box *box;
1143 struct intel_uncore_pmu *pmu;
1144 int i, phys_id, die;
1146 phys_id = uncore_pcibus_to_physid(pdev->bus);
1148 box = pci_get_drvdata(pdev);
1150 die = (topology_max_die_per_package() > 1) ? phys_id :
1151 topology_phys_to_logical_pkg(phys_id);
1152 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1153 if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1154 uncore_extra_pci_dev[die].dev[i] = NULL;
1158 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1163 if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
1166 pci_set_drvdata(pdev, NULL);
1167 pmu->boxes[box->dieid] = NULL;
1168 if (atomic_dec_return(&pmu->activeboxes) == 0)
1169 uncore_pmu_unregister(pmu);
1170 uncore_box_exit(box);
1174 static int __init uncore_pci_init(void)
1179 size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1180 uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1181 if (!uncore_extra_pci_dev) {
1186 ret = uncore_types_init(uncore_pci_uncores, false);
1190 uncore_pci_driver->probe = uncore_pci_probe;
1191 uncore_pci_driver->remove = uncore_pci_remove;
1193 ret = pci_register_driver(uncore_pci_driver);
1197 pcidrv_registered = true;
1201 uncore_types_exit(uncore_pci_uncores);
1202 kfree(uncore_extra_pci_dev);
1203 uncore_extra_pci_dev = NULL;
1204 uncore_free_pcibus_map();
1206 uncore_pci_uncores = empty_uncore;
1210 static void uncore_pci_exit(void)
1212 if (pcidrv_registered) {
1213 pcidrv_registered = false;
1214 pci_unregister_driver(uncore_pci_driver);
1215 uncore_types_exit(uncore_pci_uncores);
1216 kfree(uncore_extra_pci_dev);
1217 uncore_free_pcibus_map();
1221 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1224 struct intel_uncore_pmu *pmu = type->pmus;
1225 struct intel_uncore_box *box;
1228 die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1229 for (i = 0; i < type->num_boxes; i++, pmu++) {
1230 box = pmu->boxes[die];
1235 WARN_ON_ONCE(box->cpu != -1);
1240 WARN_ON_ONCE(box->cpu != old_cpu);
1245 uncore_pmu_cancel_hrtimer(box);
1246 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1251 static void uncore_change_context(struct intel_uncore_type **uncores,
1252 int old_cpu, int new_cpu)
1254 for (; *uncores; uncores++)
1255 uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1258 static void uncore_box_unref(struct intel_uncore_type **types, int id)
1260 struct intel_uncore_type *type;
1261 struct intel_uncore_pmu *pmu;
1262 struct intel_uncore_box *box;
1265 for (; *types; types++) {
1268 for (i = 0; i < type->num_boxes; i++, pmu++) {
1269 box = pmu->boxes[id];
1270 if (box && atomic_dec_return(&box->refcnt) == 0)
1271 uncore_box_exit(box);
1276 static int uncore_event_cpu_offline(unsigned int cpu)
1280 /* Check if exiting cpu is used for collecting uncore events */
1281 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1283 /* Find a new cpu to collect uncore events */
1284 target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1286 /* Migrate uncore events to the new target */
1287 if (target < nr_cpu_ids)
1288 cpumask_set_cpu(target, &uncore_cpu_mask);
1292 uncore_change_context(uncore_msr_uncores, cpu, target);
1293 uncore_change_context(uncore_mmio_uncores, cpu, target);
1294 uncore_change_context(uncore_pci_uncores, cpu, target);
1297 /* Clear the references */
1298 die = topology_logical_die_id(cpu);
1299 uncore_box_unref(uncore_msr_uncores, die);
1300 uncore_box_unref(uncore_mmio_uncores, die);
1304 static int allocate_boxes(struct intel_uncore_type **types,
1305 unsigned int die, unsigned int cpu)
1307 struct intel_uncore_box *box, *tmp;
1308 struct intel_uncore_type *type;
1309 struct intel_uncore_pmu *pmu;
1310 LIST_HEAD(allocated);
1313 /* Try to allocate all required boxes */
1314 for (; *types; types++) {
1317 for (i = 0; i < type->num_boxes; i++, pmu++) {
1318 if (pmu->boxes[die])
1320 box = uncore_alloc_box(type, cpu_to_node(cpu));
1325 list_add(&box->active_list, &allocated);
1328 /* Install them in the pmus */
1329 list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1330 list_del_init(&box->active_list);
1331 box->pmu->boxes[die] = box;
1336 list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1337 list_del_init(&box->active_list);
1343 static int uncore_box_ref(struct intel_uncore_type **types,
1344 int id, unsigned int cpu)
1346 struct intel_uncore_type *type;
1347 struct intel_uncore_pmu *pmu;
1348 struct intel_uncore_box *box;
1351 ret = allocate_boxes(types, id, cpu);
1355 for (; *types; types++) {
1358 for (i = 0; i < type->num_boxes; i++, pmu++) {
1359 box = pmu->boxes[id];
1360 if (box && atomic_inc_return(&box->refcnt) == 1)
1361 uncore_box_init(box);
1367 static int uncore_event_cpu_online(unsigned int cpu)
1369 int die, target, msr_ret, mmio_ret;
1371 die = topology_logical_die_id(cpu);
1372 msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1373 mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1374 if (msr_ret && mmio_ret)
1378 * Check if there is an online cpu in the package
1379 * which collects uncore events already.
1381 target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1382 if (target < nr_cpu_ids)
1385 cpumask_set_cpu(cpu, &uncore_cpu_mask);
1388 uncore_change_context(uncore_msr_uncores, -1, cpu);
1390 uncore_change_context(uncore_mmio_uncores, -1, cpu);
1391 uncore_change_context(uncore_pci_uncores, -1, cpu);
1395 static int __init type_pmu_register(struct intel_uncore_type *type)
1399 for (i = 0; i < type->num_boxes; i++) {
1400 ret = uncore_pmu_register(&type->pmus[i]);
1407 static int __init uncore_msr_pmus_register(void)
1409 struct intel_uncore_type **types = uncore_msr_uncores;
1412 for (; *types; types++) {
1413 ret = type_pmu_register(*types);
1420 static int __init uncore_cpu_init(void)
1424 ret = uncore_types_init(uncore_msr_uncores, true);
1428 ret = uncore_msr_pmus_register();
1433 uncore_types_exit(uncore_msr_uncores);
1434 uncore_msr_uncores = empty_uncore;
1438 static int __init uncore_mmio_init(void)
1440 struct intel_uncore_type **types = uncore_mmio_uncores;
1443 ret = uncore_types_init(types, true);
1447 for (; *types; types++) {
1448 ret = type_pmu_register(*types);
1454 uncore_types_exit(uncore_mmio_uncores);
1455 uncore_mmio_uncores = empty_uncore;
1459 struct intel_uncore_init_fun {
1460 void (*cpu_init)(void);
1461 int (*pci_init)(void);
1462 void (*mmio_init)(void);
1465 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1466 .cpu_init = nhm_uncore_cpu_init,
1469 static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1470 .cpu_init = snb_uncore_cpu_init,
1471 .pci_init = snb_uncore_pci_init,
1474 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1475 .cpu_init = snb_uncore_cpu_init,
1476 .pci_init = ivb_uncore_pci_init,
1479 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1480 .cpu_init = snb_uncore_cpu_init,
1481 .pci_init = hsw_uncore_pci_init,
1484 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1485 .cpu_init = snb_uncore_cpu_init,
1486 .pci_init = bdw_uncore_pci_init,
1489 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1490 .cpu_init = snbep_uncore_cpu_init,
1491 .pci_init = snbep_uncore_pci_init,
1494 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1495 .cpu_init = nhmex_uncore_cpu_init,
1498 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1499 .cpu_init = ivbep_uncore_cpu_init,
1500 .pci_init = ivbep_uncore_pci_init,
1503 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1504 .cpu_init = hswep_uncore_cpu_init,
1505 .pci_init = hswep_uncore_pci_init,
1508 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1509 .cpu_init = bdx_uncore_cpu_init,
1510 .pci_init = bdx_uncore_pci_init,
1513 static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1514 .cpu_init = knl_uncore_cpu_init,
1515 .pci_init = knl_uncore_pci_init,
1518 static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1519 .cpu_init = skl_uncore_cpu_init,
1520 .pci_init = skl_uncore_pci_init,
1523 static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1524 .cpu_init = skx_uncore_cpu_init,
1525 .pci_init = skx_uncore_pci_init,
1528 static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1529 .cpu_init = icl_uncore_cpu_init,
1530 .pci_init = skl_uncore_pci_init,
1533 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1534 .cpu_init = icl_uncore_cpu_init,
1535 .mmio_init = tgl_uncore_mmio_init,
1538 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1539 .cpu_init = icl_uncore_cpu_init,
1540 .mmio_init = tgl_l_uncore_mmio_init,
1543 static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1544 .cpu_init = icx_uncore_cpu_init,
1545 .pci_init = icx_uncore_pci_init,
1546 .mmio_init = icx_uncore_mmio_init,
1549 static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1550 .cpu_init = snr_uncore_cpu_init,
1551 .pci_init = snr_uncore_pci_init,
1552 .mmio_init = snr_uncore_mmio_init,
1555 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1556 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_uncore_init),
1557 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_uncore_init),
1558 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_uncore_init),
1559 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_uncore_init),
1560 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_uncore_init),
1561 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &ivb_uncore_init),
1562 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &hsw_uncore_init),
1563 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hsw_uncore_init),
1564 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &hsw_uncore_init),
1565 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &bdw_uncore_init),
1566 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &bdw_uncore_init),
1567 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snbep_uncore_init),
1568 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhmex_uncore_init),
1569 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhmex_uncore_init),
1570 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ivbep_uncore_init),
1571 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &hswep_uncore_init),
1572 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &bdx_uncore_init),
1573 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &bdx_uncore_init),
1574 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_uncore_init),
1575 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_uncore_init),
1576 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &skl_uncore_init),
1577 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &skl_uncore_init),
1578 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &skx_uncore_init),
1579 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &skl_uncore_init),
1580 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &skl_uncore_init),
1581 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &skl_uncore_init),
1582 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &skl_uncore_init),
1583 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_uncore_init),
1584 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_uncore_init),
1585 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_uncore_init),
1586 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_uncore_init),
1587 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_uncore_init),
1588 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_l_uncore_init),
1589 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_uncore_init),
1590 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
1593 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1595 static int __init intel_uncore_init(void)
1597 const struct x86_cpu_id *id;
1598 struct intel_uncore_init_fun *uncore_init;
1599 int pret = 0, cret = 0, mret = 0, ret;
1601 id = x86_match_cpu(intel_uncore_match);
1605 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1609 topology_max_packages() * topology_max_die_per_package();
1611 uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1612 if (uncore_init->pci_init) {
1613 pret = uncore_init->pci_init();
1615 pret = uncore_pci_init();
1618 if (uncore_init->cpu_init) {
1619 uncore_init->cpu_init();
1620 cret = uncore_cpu_init();
1623 if (uncore_init->mmio_init) {
1624 uncore_init->mmio_init();
1625 mret = uncore_mmio_init();
1628 if (cret && pret && mret)
1631 /* Install hotplug callbacks to setup the targets for each package */
1632 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1633 "perf/x86/intel/uncore:online",
1634 uncore_event_cpu_online,
1635 uncore_event_cpu_offline);
1641 uncore_types_exit(uncore_msr_uncores);
1642 uncore_types_exit(uncore_mmio_uncores);
1646 module_init(intel_uncore_init);
1648 static void __exit intel_uncore_exit(void)
1650 cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1651 uncore_types_exit(uncore_msr_uncores);
1652 uncore_types_exit(uncore_mmio_uncores);
1655 module_exit(intel_uncore_exit);