2 * SPDX-License-Identifier: MIT
4 * Copyright © 2017-2018 Intel Corporation
8 #include <linux/pm_runtime.h>
10 #include "gt/intel_engine.h"
16 /* Frequency for the sampling timer for events which need it. */
18 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
20 #define ENGINE_SAMPLE_MASK \
21 (BIT(I915_SAMPLE_BUSY) | \
22 BIT(I915_SAMPLE_WAIT) | \
23 BIT(I915_SAMPLE_SEMA))
25 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
27 static cpumask_t i915_pmu_cpumask;
29 static u8 engine_config_sample(u64 config)
31 return config & I915_PMU_SAMPLE_MASK;
34 static u8 engine_event_sample(struct perf_event *event)
36 return engine_config_sample(event->attr.config);
39 static u8 engine_event_class(struct perf_event *event)
41 return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
44 static u8 engine_event_instance(struct perf_event *event)
46 return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
49 static bool is_engine_config(u64 config)
51 return config < __I915_PMU_OTHER(0);
54 static unsigned int config_enabled_bit(u64 config)
56 if (is_engine_config(config))
57 return engine_config_sample(config);
59 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
62 static u64 config_enabled_mask(u64 config)
64 return BIT_ULL(config_enabled_bit(config));
67 static bool is_engine_event(struct perf_event *event)
69 return is_engine_config(event->attr.config);
72 static unsigned int event_enabled_bit(struct perf_event *event)
74 return config_enabled_bit(event->attr.config);
77 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
82 * Only some counters need the sampling timer.
84 * We start with a bitmask of all currently enabled events.
86 enable = i915->pmu.enable;
89 * Mask out all the ones which do not need the timer, or in
90 * other words keep all the ones that could need the timer.
92 enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
93 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
97 * When the GPU is idle per-engine counters do not need to be
98 * running so clear those bits out.
101 enable &= ~ENGINE_SAMPLE_MASK;
103 * Also there is software busyness tracking available we do not
104 * need the timer for I915_SAMPLE_BUSY counter.
106 * Use RCS as proxy for all engines.
108 else if (intel_engine_supports_stats(i915->engine[RCS0]))
109 enable &= ~BIT(I915_SAMPLE_BUSY);
112 * If some bits remain it means we need the sampling timer running.
117 void i915_pmu_gt_parked(struct drm_i915_private *i915)
119 if (!i915->pmu.base.event_init)
122 spin_lock_irq(&i915->pmu.lock);
124 * Signal sampling timer to stop if only engine events are enabled and
127 i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
128 spin_unlock_irq(&i915->pmu.lock);
131 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
133 if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
134 i915->pmu.timer_enabled = true;
135 i915->pmu.timer_last = ktime_get();
136 hrtimer_start_range_ns(&i915->pmu.timer,
137 ns_to_ktime(PERIOD), 0,
138 HRTIMER_MODE_REL_PINNED);
142 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
144 if (!i915->pmu.base.event_init)
147 spin_lock_irq(&i915->pmu.lock);
149 * Re-enable sampling timer when GPU goes active.
151 __i915_pmu_maybe_start_timer(i915);
152 spin_unlock_irq(&i915->pmu.lock);
156 add_sample(struct i915_pmu_sample *sample, u32 val)
162 engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
164 struct intel_engine_cs *engine;
165 enum intel_engine_id id;
166 intel_wakeref_t wakeref;
169 if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
173 if (READ_ONCE(dev_priv->gt.awake))
174 wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm);
178 spin_lock_irqsave(&dev_priv->uncore.lock, flags);
179 for_each_engine(engine, dev_priv, id) {
180 struct intel_engine_pmu *pmu = &engine->pmu;
184 val = I915_READ_FW(RING_CTL(engine->mmio_base));
185 if (val == 0) /* powerwell off => engine idle */
189 add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
190 if (val & RING_WAIT_SEMAPHORE)
191 add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
194 * While waiting on a semaphore or event, MI_MODE reports the
195 * ring as idle. However, previously using the seqno, and with
196 * execlists sampling, we account for the ring waiting as the
197 * engine being busy. Therefore, we record the sample as being
198 * busy if either waiting or !idle.
200 busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
202 val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
203 busy = !(val & MODE_IDLE);
206 add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
208 spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
210 intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
214 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
216 sample->cur += mul_u32_u32(val, mul);
220 frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
222 if (dev_priv->pmu.enable &
223 config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
226 val = dev_priv->gt_pm.rps.cur_freq;
227 if (dev_priv->gt.awake) {
228 intel_wakeref_t wakeref;
230 with_intel_runtime_pm_if_in_use(&dev_priv->runtime_pm,
232 val = intel_uncore_read_notrace(&dev_priv->uncore,
234 val = intel_get_cagf(dev_priv, val);
238 add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
239 intel_gpu_freq(dev_priv, val),
243 if (dev_priv->pmu.enable &
244 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
245 add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ],
246 intel_gpu_freq(dev_priv,
247 dev_priv->gt_pm.rps.cur_freq),
252 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
254 struct drm_i915_private *i915 =
255 container_of(hrtimer, struct drm_i915_private, pmu.timer);
256 unsigned int period_ns;
259 if (!READ_ONCE(i915->pmu.timer_enabled))
260 return HRTIMER_NORESTART;
263 period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
264 i915->pmu.timer_last = now;
267 * Strictly speaking the passed in period may not be 100% accurate for
268 * all internal calculation, since some amount of time can be spent on
269 * grabbing the forcewake. However the potential error from timer call-
270 * back delay greatly dominates this so we keep it simple.
272 engines_sample(i915, period_ns);
273 frequency_sample(i915, period_ns);
275 hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
277 return HRTIMER_RESTART;
280 static u64 count_interrupts(struct drm_i915_private *i915)
282 /* open-coded kstat_irqs() */
283 struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
287 if (!desc || !desc->kstat_irqs)
290 for_each_possible_cpu(cpu)
291 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
296 static void engine_event_destroy(struct perf_event *event)
298 struct drm_i915_private *i915 =
299 container_of(event->pmu, typeof(*i915), pmu.base);
300 struct intel_engine_cs *engine;
302 engine = intel_engine_lookup_user(i915,
303 engine_event_class(event),
304 engine_event_instance(event));
305 if (WARN_ON_ONCE(!engine))
308 if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
309 intel_engine_supports_stats(engine))
310 intel_disable_engine_stats(engine);
313 static void i915_pmu_event_destroy(struct perf_event *event)
315 WARN_ON(event->parent);
317 if (is_engine_event(event))
318 engine_event_destroy(event);
322 engine_event_status(struct intel_engine_cs *engine,
323 enum drm_i915_pmu_engine_sample sample)
326 case I915_SAMPLE_BUSY:
327 case I915_SAMPLE_WAIT:
329 case I915_SAMPLE_SEMA:
330 if (INTEL_GEN(engine->i915) < 6)
341 config_status(struct drm_i915_private *i915, u64 config)
344 case I915_PMU_ACTUAL_FREQUENCY:
345 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
346 /* Requires a mutex for sampling! */
349 case I915_PMU_REQUESTED_FREQUENCY:
350 if (INTEL_GEN(i915) < 6)
353 case I915_PMU_INTERRUPTS:
355 case I915_PMU_RC6_RESIDENCY:
366 static int engine_event_init(struct perf_event *event)
368 struct drm_i915_private *i915 =
369 container_of(event->pmu, typeof(*i915), pmu.base);
370 struct intel_engine_cs *engine;
374 engine = intel_engine_lookup_user(i915, engine_event_class(event),
375 engine_event_instance(event));
379 sample = engine_event_sample(event);
380 ret = engine_event_status(engine, sample);
384 if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
385 ret = intel_enable_engine_stats(engine);
390 static int i915_pmu_event_init(struct perf_event *event)
392 struct drm_i915_private *i915 =
393 container_of(event->pmu, typeof(*i915), pmu.base);
396 if (event->attr.type != event->pmu->type)
399 /* unsupported modes and filters */
400 if (event->attr.sample_period) /* no sampling */
403 if (has_branch_stack(event))
409 /* only allow running on one cpu at a time */
410 if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
413 if (is_engine_event(event))
414 ret = engine_event_init(event);
416 ret = config_status(i915, event->attr.config);
421 event->destroy = i915_pmu_event_destroy;
426 static u64 __get_rc6(struct drm_i915_private *i915)
430 val = intel_rc6_residency_ns(i915,
431 IS_VALLEYVIEW(i915) ?
436 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
439 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
444 static u64 get_rc6(struct drm_i915_private *i915)
446 #if IS_ENABLED(CONFIG_PM)
447 struct intel_runtime_pm *rpm = &i915->runtime_pm;
448 intel_wakeref_t wakeref;
452 wakeref = intel_runtime_pm_get_if_in_use(rpm);
454 val = __get_rc6(i915);
455 intel_runtime_pm_put(rpm, wakeref);
458 * If we are coming back from being runtime suspended we must
459 * be careful not to report a larger value than returned
463 spin_lock_irqsave(&i915->pmu.lock, flags);
465 if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
466 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
467 i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
469 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
472 spin_unlock_irqrestore(&i915->pmu.lock, flags);
474 struct device *kdev = rpm->kdev;
477 * We are runtime suspended.
479 * Report the delta from when the device was suspended to now,
480 * on top of the last known real value, as the approximated RC6
483 spin_lock_irqsave(&i915->pmu.lock, flags);
486 * After the above branch intel_runtime_pm_get_if_in_use failed
487 * to get the runtime PM reference we cannot assume we are in
488 * runtime suspend since we can either: a) race with coming out
489 * of it before we took the power.lock, or b) there are other
490 * states than suspended which can bring us here.
492 * We need to double-check that we are indeed currently runtime
493 * suspended and if not we cannot do better than report the last
496 if (pm_runtime_status_suspended(kdev)) {
497 val = pm_runtime_suspended_time(kdev);
499 if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
500 i915->pmu.suspended_time_last = val;
502 val -= i915->pmu.suspended_time_last;
503 val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
505 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
506 } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
507 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
509 val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
512 spin_unlock_irqrestore(&i915->pmu.lock, flags);
517 return __get_rc6(i915);
521 static u64 __i915_pmu_event_read(struct perf_event *event)
523 struct drm_i915_private *i915 =
524 container_of(event->pmu, typeof(*i915), pmu.base);
527 if (is_engine_event(event)) {
528 u8 sample = engine_event_sample(event);
529 struct intel_engine_cs *engine;
531 engine = intel_engine_lookup_user(i915,
532 engine_event_class(event),
533 engine_event_instance(event));
535 if (WARN_ON_ONCE(!engine)) {
537 } else if (sample == I915_SAMPLE_BUSY &&
538 intel_engine_supports_stats(engine)) {
539 val = ktime_to_ns(intel_engine_get_busy_time(engine));
541 val = engine->pmu.sample[sample].cur;
544 switch (event->attr.config) {
545 case I915_PMU_ACTUAL_FREQUENCY:
547 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
548 USEC_PER_SEC /* to MHz */);
550 case I915_PMU_REQUESTED_FREQUENCY:
552 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
553 USEC_PER_SEC /* to MHz */);
555 case I915_PMU_INTERRUPTS:
556 val = count_interrupts(i915);
558 case I915_PMU_RC6_RESIDENCY:
567 static void i915_pmu_event_read(struct perf_event *event)
569 struct hw_perf_event *hwc = &event->hw;
573 prev = local64_read(&hwc->prev_count);
574 new = __i915_pmu_event_read(event);
576 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
579 local64_add(new - prev, &event->count);
582 static void i915_pmu_enable(struct perf_event *event)
584 struct drm_i915_private *i915 =
585 container_of(event->pmu, typeof(*i915), pmu.base);
586 unsigned int bit = event_enabled_bit(event);
589 spin_lock_irqsave(&i915->pmu.lock, flags);
592 * Update the bitmask of enabled events and increment
593 * the event reference counter.
595 BUILD_BUG_ON(ARRAY_SIZE(i915->pmu.enable_count) != I915_PMU_MASK_BITS);
596 GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
597 GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
598 i915->pmu.enable |= BIT_ULL(bit);
599 i915->pmu.enable_count[bit]++;
602 * Start the sampling timer if needed and not already enabled.
604 __i915_pmu_maybe_start_timer(i915);
607 * For per-engine events the bitmask and reference counting
608 * is stored per engine.
610 if (is_engine_event(event)) {
611 u8 sample = engine_event_sample(event);
612 struct intel_engine_cs *engine;
614 engine = intel_engine_lookup_user(i915,
615 engine_event_class(event),
616 engine_event_instance(event));
618 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
619 I915_ENGINE_SAMPLE_COUNT);
620 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
621 I915_ENGINE_SAMPLE_COUNT);
622 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
623 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
624 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
626 engine->pmu.enable |= BIT(sample);
627 engine->pmu.enable_count[sample]++;
630 spin_unlock_irqrestore(&i915->pmu.lock, flags);
633 * Store the current counter value so we can report the correct delta
634 * for all listeners. Even when the event was already enabled and has
635 * an existing non-zero value.
637 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
640 static void i915_pmu_disable(struct perf_event *event)
642 struct drm_i915_private *i915 =
643 container_of(event->pmu, typeof(*i915), pmu.base);
644 unsigned int bit = event_enabled_bit(event);
647 spin_lock_irqsave(&i915->pmu.lock, flags);
649 if (is_engine_event(event)) {
650 u8 sample = engine_event_sample(event);
651 struct intel_engine_cs *engine;
653 engine = intel_engine_lookup_user(i915,
654 engine_event_class(event),
655 engine_event_instance(event));
657 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
658 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
659 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
662 * Decrement the reference count and clear the enabled
663 * bitmask when the last listener on an event goes away.
665 if (--engine->pmu.enable_count[sample] == 0)
666 engine->pmu.enable &= ~BIT(sample);
669 GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
670 GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
672 * Decrement the reference count and clear the enabled
673 * bitmask when the last listener on an event goes away.
675 if (--i915->pmu.enable_count[bit] == 0) {
676 i915->pmu.enable &= ~BIT_ULL(bit);
677 i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
680 spin_unlock_irqrestore(&i915->pmu.lock, flags);
683 static void i915_pmu_event_start(struct perf_event *event, int flags)
685 i915_pmu_enable(event);
689 static void i915_pmu_event_stop(struct perf_event *event, int flags)
691 if (flags & PERF_EF_UPDATE)
692 i915_pmu_event_read(event);
693 i915_pmu_disable(event);
694 event->hw.state = PERF_HES_STOPPED;
697 static int i915_pmu_event_add(struct perf_event *event, int flags)
699 if (flags & PERF_EF_START)
700 i915_pmu_event_start(event, flags);
705 static void i915_pmu_event_del(struct perf_event *event, int flags)
707 i915_pmu_event_stop(event, PERF_EF_UPDATE);
710 static int i915_pmu_event_event_idx(struct perf_event *event)
715 struct i915_str_attribute {
716 struct device_attribute attr;
720 static ssize_t i915_pmu_format_show(struct device *dev,
721 struct device_attribute *attr, char *buf)
723 struct i915_str_attribute *eattr;
725 eattr = container_of(attr, struct i915_str_attribute, attr);
726 return sprintf(buf, "%s\n", eattr->str);
729 #define I915_PMU_FORMAT_ATTR(_name, _config) \
730 (&((struct i915_str_attribute[]) { \
731 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
735 static struct attribute *i915_pmu_format_attrs[] = {
736 I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
740 static const struct attribute_group i915_pmu_format_attr_group = {
742 .attrs = i915_pmu_format_attrs,
745 struct i915_ext_attribute {
746 struct device_attribute attr;
750 static ssize_t i915_pmu_event_show(struct device *dev,
751 struct device_attribute *attr, char *buf)
753 struct i915_ext_attribute *eattr;
755 eattr = container_of(attr, struct i915_ext_attribute, attr);
756 return sprintf(buf, "config=0x%lx\n", eattr->val);
759 static struct attribute_group i915_pmu_events_attr_group = {
761 /* Patch in attrs at runtime. */
765 i915_pmu_get_attr_cpumask(struct device *dev,
766 struct device_attribute *attr,
769 return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
772 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
774 static struct attribute *i915_cpumask_attrs[] = {
775 &dev_attr_cpumask.attr,
779 static const struct attribute_group i915_pmu_cpumask_attr_group = {
780 .attrs = i915_cpumask_attrs,
783 static const struct attribute_group *i915_pmu_attr_groups[] = {
784 &i915_pmu_format_attr_group,
785 &i915_pmu_events_attr_group,
786 &i915_pmu_cpumask_attr_group,
790 #define __event(__config, __name, __unit) \
792 .config = (__config), \
797 #define __engine_event(__sample, __name) \
799 .sample = (__sample), \
803 static struct i915_ext_attribute *
804 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
806 sysfs_attr_init(&attr->attr.attr);
807 attr->attr.attr.name = name;
808 attr->attr.attr.mode = 0444;
809 attr->attr.show = i915_pmu_event_show;
815 static struct perf_pmu_events_attr *
816 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
819 sysfs_attr_init(&attr->attr.attr);
820 attr->attr.attr.name = name;
821 attr->attr.attr.mode = 0444;
822 attr->attr.show = perf_event_sysfs_show;
823 attr->event_str = str;
828 static struct attribute **
829 create_event_attributes(struct drm_i915_private *i915)
831 static const struct {
836 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
837 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
838 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
839 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
841 static const struct {
842 enum drm_i915_pmu_engine_sample sample;
844 } engine_events[] = {
845 __engine_event(I915_SAMPLE_BUSY, "busy"),
846 __engine_event(I915_SAMPLE_SEMA, "sema"),
847 __engine_event(I915_SAMPLE_WAIT, "wait"),
849 unsigned int count = 0;
850 struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
851 struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
852 struct attribute **attr = NULL, **attr_iter;
853 struct intel_engine_cs *engine;
854 enum intel_engine_id id;
857 /* Count how many counters we will be exposing. */
858 for (i = 0; i < ARRAY_SIZE(events); i++) {
859 if (!config_status(i915, events[i].config))
863 for_each_engine(engine, i915, id) {
864 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
865 if (!engine_event_status(engine,
866 engine_events[i].sample))
871 /* Allocate attribute objects and table. */
872 i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
876 pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
880 /* Max one pointer of each attribute type plus a termination entry. */
881 attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
885 i915_iter = i915_attr;
889 /* Initialize supported non-engine counters. */
890 for (i = 0; i < ARRAY_SIZE(events); i++) {
893 if (config_status(i915, events[i].config))
896 str = kstrdup(events[i].name, GFP_KERNEL);
900 *attr_iter++ = &i915_iter->attr.attr;
901 i915_iter = add_i915_attr(i915_iter, str, events[i].config);
903 if (events[i].unit) {
904 str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
908 *attr_iter++ = &pmu_iter->attr.attr;
909 pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
913 /* Initialize supported engine counters. */
914 for_each_engine(engine, i915, id) {
915 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
918 if (engine_event_status(engine,
919 engine_events[i].sample))
922 str = kasprintf(GFP_KERNEL, "%s-%s",
923 engine->name, engine_events[i].name);
927 *attr_iter++ = &i915_iter->attr.attr;
929 add_i915_attr(i915_iter, str,
930 __I915_PMU_ENGINE(engine->uabi_class,
932 engine_events[i].sample));
934 str = kasprintf(GFP_KERNEL, "%s-%s.unit",
935 engine->name, engine_events[i].name);
939 *attr_iter++ = &pmu_iter->attr.attr;
940 pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
944 i915->pmu.i915_attr = i915_attr;
945 i915->pmu.pmu_attr = pmu_attr;
950 for (attr_iter = attr; *attr_iter; attr_iter++)
951 kfree((*attr_iter)->name);
961 static void free_event_attributes(struct drm_i915_private *i915)
963 struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
965 for (; *attr_iter; attr_iter++)
966 kfree((*attr_iter)->name);
968 kfree(i915_pmu_events_attr_group.attrs);
969 kfree(i915->pmu.i915_attr);
970 kfree(i915->pmu.pmu_attr);
972 i915_pmu_events_attr_group.attrs = NULL;
973 i915->pmu.i915_attr = NULL;
974 i915->pmu.pmu_attr = NULL;
977 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
979 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
981 GEM_BUG_ON(!pmu->base.event_init);
983 /* Select the first online CPU as a designated reader. */
984 if (!cpumask_weight(&i915_pmu_cpumask))
985 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
990 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
992 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
995 GEM_BUG_ON(!pmu->base.event_init);
997 if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
998 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
999 /* Migrate events if there is a valid target */
1000 if (target < nr_cpu_ids) {
1001 cpumask_set_cpu(target, &i915_pmu_cpumask);
1002 perf_pmu_migrate_context(&pmu->base, cpu, target);
1009 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1011 static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
1013 enum cpuhp_state slot;
1016 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1017 "perf/x86/intel/i915:online",
1018 i915_pmu_cpu_online,
1019 i915_pmu_cpu_offline);
1024 ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
1026 cpuhp_remove_multi_state(slot);
1034 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
1036 WARN_ON(cpuhp_slot == CPUHP_INVALID);
1037 WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
1038 cpuhp_remove_multi_state(cpuhp_slot);
1041 void i915_pmu_register(struct drm_i915_private *i915)
1045 if (INTEL_GEN(i915) <= 2) {
1046 DRM_INFO("PMU not supported for this GPU.");
1050 i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
1051 if (!i915_pmu_events_attr_group.attrs) {
1056 i915->pmu.base.attr_groups = i915_pmu_attr_groups;
1057 i915->pmu.base.task_ctx_nr = perf_invalid_context;
1058 i915->pmu.base.event_init = i915_pmu_event_init;
1059 i915->pmu.base.add = i915_pmu_event_add;
1060 i915->pmu.base.del = i915_pmu_event_del;
1061 i915->pmu.base.start = i915_pmu_event_start;
1062 i915->pmu.base.stop = i915_pmu_event_stop;
1063 i915->pmu.base.read = i915_pmu_event_read;
1064 i915->pmu.base.event_idx = i915_pmu_event_event_idx;
1066 spin_lock_init(&i915->pmu.lock);
1067 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1068 i915->pmu.timer.function = i915_sample;
1070 ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
1074 ret = i915_pmu_register_cpuhp_state(i915);
1081 perf_pmu_unregister(&i915->pmu.base);
1083 i915->pmu.base.event_init = NULL;
1084 free_event_attributes(i915);
1085 DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
1088 void i915_pmu_unregister(struct drm_i915_private *i915)
1090 if (!i915->pmu.base.event_init)
1093 WARN_ON(i915->pmu.enable);
1095 hrtimer_cancel(&i915->pmu.timer);
1097 i915_pmu_unregister_cpuhp_state(i915);
1099 perf_pmu_unregister(&i915->pmu.base);
1100 i915->pmu.base.event_init = NULL;
1101 free_event_attributes(i915);