Merge tag 'irq-core-2020-12-23' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / i915_pmu.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6
7 #include <linux/pm_runtime.h>
8
9 #include "gt/intel_engine.h"
10 #include "gt/intel_engine_pm.h"
11 #include "gt/intel_engine_user.h"
12 #include "gt/intel_gt_pm.h"
13 #include "gt/intel_rc6.h"
14 #include "gt/intel_rps.h"
15
16 #include "i915_drv.h"
17 #include "i915_pmu.h"
18 #include "intel_pm.h"
19
20 /* Frequency for the sampling timer for events which need it. */
21 #define FREQUENCY 200
22 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
23
24 #define ENGINE_SAMPLE_MASK \
25         (BIT(I915_SAMPLE_BUSY) | \
26          BIT(I915_SAMPLE_WAIT) | \
27          BIT(I915_SAMPLE_SEMA))
28
29 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
30
31 static cpumask_t i915_pmu_cpumask;
32 static unsigned int i915_pmu_target_cpu = -1;
33
34 static u8 engine_config_sample(u64 config)
35 {
36         return config & I915_PMU_SAMPLE_MASK;
37 }
38
39 static u8 engine_event_sample(struct perf_event *event)
40 {
41         return engine_config_sample(event->attr.config);
42 }
43
44 static u8 engine_event_class(struct perf_event *event)
45 {
46         return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
47 }
48
49 static u8 engine_event_instance(struct perf_event *event)
50 {
51         return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
52 }
53
54 static bool is_engine_config(u64 config)
55 {
56         return config < __I915_PMU_OTHER(0);
57 }
58
59 static unsigned int config_enabled_bit(u64 config)
60 {
61         if (is_engine_config(config))
62                 return engine_config_sample(config);
63         else
64                 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
65 }
66
67 static u64 config_enabled_mask(u64 config)
68 {
69         return BIT_ULL(config_enabled_bit(config));
70 }
71
72 static bool is_engine_event(struct perf_event *event)
73 {
74         return is_engine_config(event->attr.config);
75 }
76
77 static unsigned int event_enabled_bit(struct perf_event *event)
78 {
79         return config_enabled_bit(event->attr.config);
80 }
81
82 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
83 {
84         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
85         u64 enable;
86
87         /*
88          * Only some counters need the sampling timer.
89          *
90          * We start with a bitmask of all currently enabled events.
91          */
92         enable = pmu->enable;
93
94         /*
95          * Mask out all the ones which do not need the timer, or in
96          * other words keep all the ones that could need the timer.
97          */
98         enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
99                   config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
100                   ENGINE_SAMPLE_MASK;
101
102         /*
103          * When the GPU is idle per-engine counters do not need to be
104          * running so clear those bits out.
105          */
106         if (!gpu_active)
107                 enable &= ~ENGINE_SAMPLE_MASK;
108         /*
109          * Also there is software busyness tracking available we do not
110          * need the timer for I915_SAMPLE_BUSY counter.
111          */
112         else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
113                 enable &= ~BIT(I915_SAMPLE_BUSY);
114
115         /*
116          * If some bits remain it means we need the sampling timer running.
117          */
118         return enable;
119 }
120
121 static u64 __get_rc6(struct intel_gt *gt)
122 {
123         struct drm_i915_private *i915 = gt->i915;
124         u64 val;
125
126         val = intel_rc6_residency_ns(&gt->rc6,
127                                      IS_VALLEYVIEW(i915) ?
128                                      VLV_GT_RENDER_RC6 :
129                                      GEN6_GT_GFX_RC6);
130
131         if (HAS_RC6p(i915))
132                 val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6p);
133
134         if (HAS_RC6pp(i915))
135                 val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6pp);
136
137         return val;
138 }
139
140 #if IS_ENABLED(CONFIG_PM)
141
142 static inline s64 ktime_since(const ktime_t kt)
143 {
144         return ktime_to_ns(ktime_sub(ktime_get(), kt));
145 }
146
147 static u64 get_rc6(struct intel_gt *gt)
148 {
149         struct drm_i915_private *i915 = gt->i915;
150         struct i915_pmu *pmu = &i915->pmu;
151         unsigned long flags;
152         bool awake = false;
153         u64 val;
154
155         if (intel_gt_pm_get_if_awake(gt)) {
156                 val = __get_rc6(gt);
157                 intel_gt_pm_put_async(gt);
158                 awake = true;
159         }
160
161         spin_lock_irqsave(&pmu->lock, flags);
162
163         if (awake) {
164                 pmu->sample[__I915_SAMPLE_RC6].cur = val;
165         } else {
166                 /*
167                  * We think we are runtime suspended.
168                  *
169                  * Report the delta from when the device was suspended to now,
170                  * on top of the last known real value, as the approximated RC6
171                  * counter value.
172                  */
173                 val = ktime_since(pmu->sleep_last);
174                 val += pmu->sample[__I915_SAMPLE_RC6].cur;
175         }
176
177         if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
178                 val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
179         else
180                 pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
181
182         spin_unlock_irqrestore(&pmu->lock, flags);
183
184         return val;
185 }
186
187 static void park_rc6(struct drm_i915_private *i915)
188 {
189         struct i915_pmu *pmu = &i915->pmu;
190
191         if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
192                 pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
193
194         pmu->sleep_last = ktime_get();
195 }
196
197 #else
198
199 static u64 get_rc6(struct intel_gt *gt)
200 {
201         return __get_rc6(gt);
202 }
203
204 static void park_rc6(struct drm_i915_private *i915) {}
205
206 #endif
207
208 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
209 {
210         if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
211                 pmu->timer_enabled = true;
212                 pmu->timer_last = ktime_get();
213                 hrtimer_start_range_ns(&pmu->timer,
214                                        ns_to_ktime(PERIOD), 0,
215                                        HRTIMER_MODE_REL_PINNED);
216         }
217 }
218
219 void i915_pmu_gt_parked(struct drm_i915_private *i915)
220 {
221         struct i915_pmu *pmu = &i915->pmu;
222
223         if (!pmu->base.event_init)
224                 return;
225
226         spin_lock_irq(&pmu->lock);
227
228         park_rc6(i915);
229
230         /*
231          * Signal sampling timer to stop if only engine events are enabled and
232          * GPU went idle.
233          */
234         pmu->timer_enabled = pmu_needs_timer(pmu, false);
235
236         spin_unlock_irq(&pmu->lock);
237 }
238
239 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
240 {
241         struct i915_pmu *pmu = &i915->pmu;
242
243         if (!pmu->base.event_init)
244                 return;
245
246         spin_lock_irq(&pmu->lock);
247
248         /*
249          * Re-enable sampling timer when GPU goes active.
250          */
251         __i915_pmu_maybe_start_timer(pmu);
252
253         spin_unlock_irq(&pmu->lock);
254 }
255
256 static void
257 add_sample(struct i915_pmu_sample *sample, u32 val)
258 {
259         sample->cur += val;
260 }
261
262 static bool exclusive_mmio_access(const struct drm_i915_private *i915)
263 {
264         /*
265          * We have to avoid concurrent mmio cache line access on gen7 or
266          * risk a machine hang. For a fun history lesson dig out the old
267          * userspace intel_gpu_top and run it on Ivybridge or Haswell!
268          */
269         return IS_GEN(i915, 7);
270 }
271
272 static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
273 {
274         struct intel_engine_pmu *pmu = &engine->pmu;
275         bool busy;
276         u32 val;
277
278         val = ENGINE_READ_FW(engine, RING_CTL);
279         if (val == 0) /* powerwell off => engine idle */
280                 return;
281
282         if (val & RING_WAIT)
283                 add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
284         if (val & RING_WAIT_SEMAPHORE)
285                 add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
286
287         /* No need to sample when busy stats are supported. */
288         if (intel_engine_supports_stats(engine))
289                 return;
290
291         /*
292          * While waiting on a semaphore or event, MI_MODE reports the
293          * ring as idle. However, previously using the seqno, and with
294          * execlists sampling, we account for the ring waiting as the
295          * engine being busy. Therefore, we record the sample as being
296          * busy if either waiting or !idle.
297          */
298         busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
299         if (!busy) {
300                 val = ENGINE_READ_FW(engine, RING_MI_MODE);
301                 busy = !(val & MODE_IDLE);
302         }
303         if (busy)
304                 add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
305 }
306
307 static void
308 engines_sample(struct intel_gt *gt, unsigned int period_ns)
309 {
310         struct drm_i915_private *i915 = gt->i915;
311         struct intel_engine_cs *engine;
312         enum intel_engine_id id;
313         unsigned long flags;
314
315         if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
316                 return;
317
318         if (!intel_gt_pm_is_awake(gt))
319                 return;
320
321         for_each_engine(engine, gt, id) {
322                 if (!intel_engine_pm_get_if_awake(engine))
323                         continue;
324
325                 if (exclusive_mmio_access(i915)) {
326                         spin_lock_irqsave(&engine->uncore->lock, flags);
327                         engine_sample(engine, period_ns);
328                         spin_unlock_irqrestore(&engine->uncore->lock, flags);
329                 } else {
330                         engine_sample(engine, period_ns);
331                 }
332
333                 intel_engine_pm_put_async(engine);
334         }
335 }
336
337 static void
338 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
339 {
340         sample->cur += mul_u32_u32(val, mul);
341 }
342
343 static bool frequency_sampling_enabled(struct i915_pmu *pmu)
344 {
345         return pmu->enable &
346                (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
347                 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY));
348 }
349
350 static void
351 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
352 {
353         struct drm_i915_private *i915 = gt->i915;
354         struct intel_uncore *uncore = gt->uncore;
355         struct i915_pmu *pmu = &i915->pmu;
356         struct intel_rps *rps = &gt->rps;
357
358         if (!frequency_sampling_enabled(pmu))
359                 return;
360
361         /* Report 0/0 (actual/requested) frequency while parked. */
362         if (!intel_gt_pm_get_if_awake(gt))
363                 return;
364
365         if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
366                 u32 val;
367
368                 /*
369                  * We take a quick peek here without using forcewake
370                  * so that we don't perturb the system under observation
371                  * (forcewake => !rc6 => increased power use). We expect
372                  * that if the read fails because it is outside of the
373                  * mmio power well, then it will return 0 -- in which
374                  * case we assume the system is running at the intended
375                  * frequency. Fortunately, the read should rarely fail!
376                  */
377                 val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
378                 if (val)
379                         val = intel_rps_get_cagf(rps, val);
380                 else
381                         val = rps->cur_freq;
382
383                 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
384                                 intel_gpu_freq(rps, val), period_ns / 1000);
385         }
386
387         if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
388                 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
389                                 intel_gpu_freq(rps, rps->cur_freq),
390                                 period_ns / 1000);
391         }
392
393         intel_gt_pm_put_async(gt);
394 }
395
396 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
397 {
398         struct drm_i915_private *i915 =
399                 container_of(hrtimer, struct drm_i915_private, pmu.timer);
400         struct i915_pmu *pmu = &i915->pmu;
401         struct intel_gt *gt = &i915->gt;
402         unsigned int period_ns;
403         ktime_t now;
404
405         if (!READ_ONCE(pmu->timer_enabled))
406                 return HRTIMER_NORESTART;
407
408         now = ktime_get();
409         period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
410         pmu->timer_last = now;
411
412         /*
413          * Strictly speaking the passed in period may not be 100% accurate for
414          * all internal calculation, since some amount of time can be spent on
415          * grabbing the forcewake. However the potential error from timer call-
416          * back delay greatly dominates this so we keep it simple.
417          */
418         engines_sample(gt, period_ns);
419         frequency_sample(gt, period_ns);
420
421         hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
422
423         return HRTIMER_RESTART;
424 }
425
426 static void i915_pmu_event_destroy(struct perf_event *event)
427 {
428         struct drm_i915_private *i915 =
429                 container_of(event->pmu, typeof(*i915), pmu.base);
430
431         drm_WARN_ON(&i915->drm, event->parent);
432
433         drm_dev_put(&i915->drm);
434 }
435
436 static int
437 engine_event_status(struct intel_engine_cs *engine,
438                     enum drm_i915_pmu_engine_sample sample)
439 {
440         switch (sample) {
441         case I915_SAMPLE_BUSY:
442         case I915_SAMPLE_WAIT:
443                 break;
444         case I915_SAMPLE_SEMA:
445                 if (INTEL_GEN(engine->i915) < 6)
446                         return -ENODEV;
447                 break;
448         default:
449                 return -ENOENT;
450         }
451
452         return 0;
453 }
454
455 static int
456 config_status(struct drm_i915_private *i915, u64 config)
457 {
458         switch (config) {
459         case I915_PMU_ACTUAL_FREQUENCY:
460                 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
461                         /* Requires a mutex for sampling! */
462                         return -ENODEV;
463                 fallthrough;
464         case I915_PMU_REQUESTED_FREQUENCY:
465                 if (INTEL_GEN(i915) < 6)
466                         return -ENODEV;
467                 break;
468         case I915_PMU_INTERRUPTS:
469                 break;
470         case I915_PMU_RC6_RESIDENCY:
471                 if (!HAS_RC6(i915))
472                         return -ENODEV;
473                 break;
474         default:
475                 return -ENOENT;
476         }
477
478         return 0;
479 }
480
481 static int engine_event_init(struct perf_event *event)
482 {
483         struct drm_i915_private *i915 =
484                 container_of(event->pmu, typeof(*i915), pmu.base);
485         struct intel_engine_cs *engine;
486
487         engine = intel_engine_lookup_user(i915, engine_event_class(event),
488                                           engine_event_instance(event));
489         if (!engine)
490                 return -ENODEV;
491
492         return engine_event_status(engine, engine_event_sample(event));
493 }
494
495 static int i915_pmu_event_init(struct perf_event *event)
496 {
497         struct drm_i915_private *i915 =
498                 container_of(event->pmu, typeof(*i915), pmu.base);
499         struct i915_pmu *pmu = &i915->pmu;
500         int ret;
501
502         if (pmu->closed)
503                 return -ENODEV;
504
505         if (event->attr.type != event->pmu->type)
506                 return -ENOENT;
507
508         /* unsupported modes and filters */
509         if (event->attr.sample_period) /* no sampling */
510                 return -EINVAL;
511
512         if (has_branch_stack(event))
513                 return -EOPNOTSUPP;
514
515         if (event->cpu < 0)
516                 return -EINVAL;
517
518         /* only allow running on one cpu at a time */
519         if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
520                 return -EINVAL;
521
522         if (is_engine_event(event))
523                 ret = engine_event_init(event);
524         else
525                 ret = config_status(i915, event->attr.config);
526         if (ret)
527                 return ret;
528
529         if (!event->parent) {
530                 drm_dev_get(&i915->drm);
531                 event->destroy = i915_pmu_event_destroy;
532         }
533
534         return 0;
535 }
536
537 static u64 __i915_pmu_event_read(struct perf_event *event)
538 {
539         struct drm_i915_private *i915 =
540                 container_of(event->pmu, typeof(*i915), pmu.base);
541         struct i915_pmu *pmu = &i915->pmu;
542         u64 val = 0;
543
544         if (is_engine_event(event)) {
545                 u8 sample = engine_event_sample(event);
546                 struct intel_engine_cs *engine;
547
548                 engine = intel_engine_lookup_user(i915,
549                                                   engine_event_class(event),
550                                                   engine_event_instance(event));
551
552                 if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
553                         /* Do nothing */
554                 } else if (sample == I915_SAMPLE_BUSY &&
555                            intel_engine_supports_stats(engine)) {
556                         ktime_t unused;
557
558                         val = ktime_to_ns(intel_engine_get_busy_time(engine,
559                                                                      &unused));
560                 } else {
561                         val = engine->pmu.sample[sample].cur;
562                 }
563         } else {
564                 switch (event->attr.config) {
565                 case I915_PMU_ACTUAL_FREQUENCY:
566                         val =
567                            div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
568                                    USEC_PER_SEC /* to MHz */);
569                         break;
570                 case I915_PMU_REQUESTED_FREQUENCY:
571                         val =
572                            div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
573                                    USEC_PER_SEC /* to MHz */);
574                         break;
575                 case I915_PMU_INTERRUPTS:
576                         val = READ_ONCE(pmu->irq_count);
577                         break;
578                 case I915_PMU_RC6_RESIDENCY:
579                         val = get_rc6(&i915->gt);
580                         break;
581                 }
582         }
583
584         return val;
585 }
586
587 static void i915_pmu_event_read(struct perf_event *event)
588 {
589         struct drm_i915_private *i915 =
590                 container_of(event->pmu, typeof(*i915), pmu.base);
591         struct hw_perf_event *hwc = &event->hw;
592         struct i915_pmu *pmu = &i915->pmu;
593         u64 prev, new;
594
595         if (pmu->closed) {
596                 event->hw.state = PERF_HES_STOPPED;
597                 return;
598         }
599 again:
600         prev = local64_read(&hwc->prev_count);
601         new = __i915_pmu_event_read(event);
602
603         if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
604                 goto again;
605
606         local64_add(new - prev, &event->count);
607 }
608
609 static void i915_pmu_enable(struct perf_event *event)
610 {
611         struct drm_i915_private *i915 =
612                 container_of(event->pmu, typeof(*i915), pmu.base);
613         unsigned int bit = event_enabled_bit(event);
614         struct i915_pmu *pmu = &i915->pmu;
615         intel_wakeref_t wakeref;
616         unsigned long flags;
617
618         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
619         spin_lock_irqsave(&pmu->lock, flags);
620
621         /*
622          * Update the bitmask of enabled events and increment
623          * the event reference counter.
624          */
625         BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
626         GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
627         GEM_BUG_ON(pmu->enable_count[bit] == ~0);
628
629         if (pmu->enable_count[bit] == 0 &&
630             config_enabled_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) {
631                 pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = 0;
632                 pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
633                 pmu->sleep_last = ktime_get();
634         }
635
636         pmu->enable |= BIT_ULL(bit);
637         pmu->enable_count[bit]++;
638
639         /*
640          * Start the sampling timer if needed and not already enabled.
641          */
642         __i915_pmu_maybe_start_timer(pmu);
643
644         /*
645          * For per-engine events the bitmask and reference counting
646          * is stored per engine.
647          */
648         if (is_engine_event(event)) {
649                 u8 sample = engine_event_sample(event);
650                 struct intel_engine_cs *engine;
651
652                 engine = intel_engine_lookup_user(i915,
653                                                   engine_event_class(event),
654                                                   engine_event_instance(event));
655
656                 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
657                              I915_ENGINE_SAMPLE_COUNT);
658                 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
659                              I915_ENGINE_SAMPLE_COUNT);
660                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
661                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
662                 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
663
664                 engine->pmu.enable |= BIT(sample);
665                 engine->pmu.enable_count[sample]++;
666         }
667
668         spin_unlock_irqrestore(&pmu->lock, flags);
669
670         /*
671          * Store the current counter value so we can report the correct delta
672          * for all listeners. Even when the event was already enabled and has
673          * an existing non-zero value.
674          */
675         local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
676
677         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
678 }
679
680 static void i915_pmu_disable(struct perf_event *event)
681 {
682         struct drm_i915_private *i915 =
683                 container_of(event->pmu, typeof(*i915), pmu.base);
684         unsigned int bit = event_enabled_bit(event);
685         struct i915_pmu *pmu = &i915->pmu;
686         unsigned long flags;
687
688         spin_lock_irqsave(&pmu->lock, flags);
689
690         if (is_engine_event(event)) {
691                 u8 sample = engine_event_sample(event);
692                 struct intel_engine_cs *engine;
693
694                 engine = intel_engine_lookup_user(i915,
695                                                   engine_event_class(event),
696                                                   engine_event_instance(event));
697
698                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
699                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
700                 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
701
702                 /*
703                  * Decrement the reference count and clear the enabled
704                  * bitmask when the last listener on an event goes away.
705                  */
706                 if (--engine->pmu.enable_count[sample] == 0)
707                         engine->pmu.enable &= ~BIT(sample);
708         }
709
710         GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
711         GEM_BUG_ON(pmu->enable_count[bit] == 0);
712         /*
713          * Decrement the reference count and clear the enabled
714          * bitmask when the last listener on an event goes away.
715          */
716         if (--pmu->enable_count[bit] == 0) {
717                 pmu->enable &= ~BIT_ULL(bit);
718                 pmu->timer_enabled &= pmu_needs_timer(pmu, true);
719         }
720
721         spin_unlock_irqrestore(&pmu->lock, flags);
722 }
723
724 static void i915_pmu_event_start(struct perf_event *event, int flags)
725 {
726         struct drm_i915_private *i915 =
727                 container_of(event->pmu, typeof(*i915), pmu.base);
728         struct i915_pmu *pmu = &i915->pmu;
729
730         if (pmu->closed)
731                 return;
732
733         i915_pmu_enable(event);
734         event->hw.state = 0;
735 }
736
737 static void i915_pmu_event_stop(struct perf_event *event, int flags)
738 {
739         if (flags & PERF_EF_UPDATE)
740                 i915_pmu_event_read(event);
741         i915_pmu_disable(event);
742         event->hw.state = PERF_HES_STOPPED;
743 }
744
745 static int i915_pmu_event_add(struct perf_event *event, int flags)
746 {
747         struct drm_i915_private *i915 =
748                 container_of(event->pmu, typeof(*i915), pmu.base);
749         struct i915_pmu *pmu = &i915->pmu;
750
751         if (pmu->closed)
752                 return -ENODEV;
753
754         if (flags & PERF_EF_START)
755                 i915_pmu_event_start(event, flags);
756
757         return 0;
758 }
759
760 static void i915_pmu_event_del(struct perf_event *event, int flags)
761 {
762         i915_pmu_event_stop(event, PERF_EF_UPDATE);
763 }
764
765 static int i915_pmu_event_event_idx(struct perf_event *event)
766 {
767         return 0;
768 }
769
770 struct i915_str_attribute {
771         struct device_attribute attr;
772         const char *str;
773 };
774
775 static ssize_t i915_pmu_format_show(struct device *dev,
776                                     struct device_attribute *attr, char *buf)
777 {
778         struct i915_str_attribute *eattr;
779
780         eattr = container_of(attr, struct i915_str_attribute, attr);
781         return sprintf(buf, "%s\n", eattr->str);
782 }
783
784 #define I915_PMU_FORMAT_ATTR(_name, _config) \
785         (&((struct i915_str_attribute[]) { \
786                 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
787                   .str = _config, } \
788         })[0].attr.attr)
789
790 static struct attribute *i915_pmu_format_attrs[] = {
791         I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
792         NULL,
793 };
794
795 static const struct attribute_group i915_pmu_format_attr_group = {
796         .name = "format",
797         .attrs = i915_pmu_format_attrs,
798 };
799
800 struct i915_ext_attribute {
801         struct device_attribute attr;
802         unsigned long val;
803 };
804
805 static ssize_t i915_pmu_event_show(struct device *dev,
806                                    struct device_attribute *attr, char *buf)
807 {
808         struct i915_ext_attribute *eattr;
809
810         eattr = container_of(attr, struct i915_ext_attribute, attr);
811         return sprintf(buf, "config=0x%lx\n", eattr->val);
812 }
813
814 static ssize_t
815 i915_pmu_get_attr_cpumask(struct device *dev,
816                           struct device_attribute *attr,
817                           char *buf)
818 {
819         return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
820 }
821
822 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
823
824 static struct attribute *i915_cpumask_attrs[] = {
825         &dev_attr_cpumask.attr,
826         NULL,
827 };
828
829 static const struct attribute_group i915_pmu_cpumask_attr_group = {
830         .attrs = i915_cpumask_attrs,
831 };
832
833 #define __event(__config, __name, __unit) \
834 { \
835         .config = (__config), \
836         .name = (__name), \
837         .unit = (__unit), \
838 }
839
840 #define __engine_event(__sample, __name) \
841 { \
842         .sample = (__sample), \
843         .name = (__name), \
844 }
845
846 static struct i915_ext_attribute *
847 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
848 {
849         sysfs_attr_init(&attr->attr.attr);
850         attr->attr.attr.name = name;
851         attr->attr.attr.mode = 0444;
852         attr->attr.show = i915_pmu_event_show;
853         attr->val = config;
854
855         return ++attr;
856 }
857
858 static struct perf_pmu_events_attr *
859 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
860              const char *str)
861 {
862         sysfs_attr_init(&attr->attr.attr);
863         attr->attr.attr.name = name;
864         attr->attr.attr.mode = 0444;
865         attr->attr.show = perf_event_sysfs_show;
866         attr->event_str = str;
867
868         return ++attr;
869 }
870
871 static struct attribute **
872 create_event_attributes(struct i915_pmu *pmu)
873 {
874         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
875         static const struct {
876                 u64 config;
877                 const char *name;
878                 const char *unit;
879         } events[] = {
880                 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
881                 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
882                 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
883                 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
884         };
885         static const struct {
886                 enum drm_i915_pmu_engine_sample sample;
887                 char *name;
888         } engine_events[] = {
889                 __engine_event(I915_SAMPLE_BUSY, "busy"),
890                 __engine_event(I915_SAMPLE_SEMA, "sema"),
891                 __engine_event(I915_SAMPLE_WAIT, "wait"),
892         };
893         unsigned int count = 0;
894         struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
895         struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
896         struct attribute **attr = NULL, **attr_iter;
897         struct intel_engine_cs *engine;
898         unsigned int i;
899
900         /* Count how many counters we will be exposing. */
901         for (i = 0; i < ARRAY_SIZE(events); i++) {
902                 if (!config_status(i915, events[i].config))
903                         count++;
904         }
905
906         for_each_uabi_engine(engine, i915) {
907                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
908                         if (!engine_event_status(engine,
909                                                  engine_events[i].sample))
910                                 count++;
911                 }
912         }
913
914         /* Allocate attribute objects and table. */
915         i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
916         if (!i915_attr)
917                 goto err_alloc;
918
919         pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
920         if (!pmu_attr)
921                 goto err_alloc;
922
923         /* Max one pointer of each attribute type plus a termination entry. */
924         attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
925         if (!attr)
926                 goto err_alloc;
927
928         i915_iter = i915_attr;
929         pmu_iter = pmu_attr;
930         attr_iter = attr;
931
932         /* Initialize supported non-engine counters. */
933         for (i = 0; i < ARRAY_SIZE(events); i++) {
934                 char *str;
935
936                 if (config_status(i915, events[i].config))
937                         continue;
938
939                 str = kstrdup(events[i].name, GFP_KERNEL);
940                 if (!str)
941                         goto err;
942
943                 *attr_iter++ = &i915_iter->attr.attr;
944                 i915_iter = add_i915_attr(i915_iter, str, events[i].config);
945
946                 if (events[i].unit) {
947                         str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
948                         if (!str)
949                                 goto err;
950
951                         *attr_iter++ = &pmu_iter->attr.attr;
952                         pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
953                 }
954         }
955
956         /* Initialize supported engine counters. */
957         for_each_uabi_engine(engine, i915) {
958                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
959                         char *str;
960
961                         if (engine_event_status(engine,
962                                                 engine_events[i].sample))
963                                 continue;
964
965                         str = kasprintf(GFP_KERNEL, "%s-%s",
966                                         engine->name, engine_events[i].name);
967                         if (!str)
968                                 goto err;
969
970                         *attr_iter++ = &i915_iter->attr.attr;
971                         i915_iter =
972                                 add_i915_attr(i915_iter, str,
973                                               __I915_PMU_ENGINE(engine->uabi_class,
974                                                                 engine->uabi_instance,
975                                                                 engine_events[i].sample));
976
977                         str = kasprintf(GFP_KERNEL, "%s-%s.unit",
978                                         engine->name, engine_events[i].name);
979                         if (!str)
980                                 goto err;
981
982                         *attr_iter++ = &pmu_iter->attr.attr;
983                         pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
984                 }
985         }
986
987         pmu->i915_attr = i915_attr;
988         pmu->pmu_attr = pmu_attr;
989
990         return attr;
991
992 err:;
993         for (attr_iter = attr; *attr_iter; attr_iter++)
994                 kfree((*attr_iter)->name);
995
996 err_alloc:
997         kfree(attr);
998         kfree(i915_attr);
999         kfree(pmu_attr);
1000
1001         return NULL;
1002 }
1003
1004 static void free_event_attributes(struct i915_pmu *pmu)
1005 {
1006         struct attribute **attr_iter = pmu->events_attr_group.attrs;
1007
1008         for (; *attr_iter; attr_iter++)
1009                 kfree((*attr_iter)->name);
1010
1011         kfree(pmu->events_attr_group.attrs);
1012         kfree(pmu->i915_attr);
1013         kfree(pmu->pmu_attr);
1014
1015         pmu->events_attr_group.attrs = NULL;
1016         pmu->i915_attr = NULL;
1017         pmu->pmu_attr = NULL;
1018 }
1019
1020 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
1021 {
1022         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1023
1024         GEM_BUG_ON(!pmu->base.event_init);
1025
1026         /* Select the first online CPU as a designated reader. */
1027         if (!cpumask_weight(&i915_pmu_cpumask))
1028                 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
1029
1030         return 0;
1031 }
1032
1033 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
1034 {
1035         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1036         unsigned int target = i915_pmu_target_cpu;
1037
1038         GEM_BUG_ON(!pmu->base.event_init);
1039
1040         /*
1041          * Unregistering an instance generates a CPU offline event which we must
1042          * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
1043          */
1044         if (pmu->closed)
1045                 return 0;
1046
1047         if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
1048                 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
1049
1050                 /* Migrate events if there is a valid target */
1051                 if (target < nr_cpu_ids) {
1052                         cpumask_set_cpu(target, &i915_pmu_cpumask);
1053                         i915_pmu_target_cpu = target;
1054                 }
1055         }
1056
1057         if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
1058                 perf_pmu_migrate_context(&pmu->base, cpu, target);
1059                 pmu->cpuhp.cpu = target;
1060         }
1061
1062         return 0;
1063 }
1064
1065 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1066
1067 void i915_pmu_init(void)
1068 {
1069         int ret;
1070
1071         ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1072                                       "perf/x86/intel/i915:online",
1073                                       i915_pmu_cpu_online,
1074                                       i915_pmu_cpu_offline);
1075         if (ret < 0)
1076                 pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
1077                           ret);
1078         else
1079                 cpuhp_slot = ret;
1080 }
1081
1082 void i915_pmu_exit(void)
1083 {
1084         if (cpuhp_slot != CPUHP_INVALID)
1085                 cpuhp_remove_multi_state(cpuhp_slot);
1086 }
1087
1088 static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
1089 {
1090         if (cpuhp_slot == CPUHP_INVALID)
1091                 return -EINVAL;
1092
1093         return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
1094 }
1095
1096 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
1097 {
1098         cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
1099 }
1100
1101 static bool is_igp(struct drm_i915_private *i915)
1102 {
1103         struct pci_dev *pdev = i915->drm.pdev;
1104
1105         /* IGP is 0000:00:02.0 */
1106         return pci_domain_nr(pdev->bus) == 0 &&
1107                pdev->bus->number == 0 &&
1108                PCI_SLOT(pdev->devfn) == 2 &&
1109                PCI_FUNC(pdev->devfn) == 0;
1110 }
1111
1112 void i915_pmu_register(struct drm_i915_private *i915)
1113 {
1114         struct i915_pmu *pmu = &i915->pmu;
1115         const struct attribute_group *attr_groups[] = {
1116                 &i915_pmu_format_attr_group,
1117                 &pmu->events_attr_group,
1118                 &i915_pmu_cpumask_attr_group,
1119                 NULL
1120         };
1121
1122         int ret = -ENOMEM;
1123
1124         if (INTEL_GEN(i915) <= 2) {
1125                 drm_info(&i915->drm, "PMU not supported for this GPU.");
1126                 return;
1127         }
1128
1129         spin_lock_init(&pmu->lock);
1130         hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1131         pmu->timer.function = i915_sample;
1132         pmu->cpuhp.cpu = -1;
1133
1134         if (!is_igp(i915)) {
1135                 pmu->name = kasprintf(GFP_KERNEL,
1136                                       "i915_%s",
1137                                       dev_name(i915->drm.dev));
1138                 if (pmu->name) {
1139                         /* tools/perf reserves colons as special. */
1140                         strreplace((char *)pmu->name, ':', '_');
1141                 }
1142         } else {
1143                 pmu->name = "i915";
1144         }
1145         if (!pmu->name)
1146                 goto err;
1147
1148         pmu->events_attr_group.name = "events";
1149         pmu->events_attr_group.attrs = create_event_attributes(pmu);
1150         if (!pmu->events_attr_group.attrs)
1151                 goto err_name;
1152
1153         pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
1154                                         GFP_KERNEL);
1155         if (!pmu->base.attr_groups)
1156                 goto err_attr;
1157
1158         pmu->base.module        = THIS_MODULE;
1159         pmu->base.task_ctx_nr   = perf_invalid_context;
1160         pmu->base.event_init    = i915_pmu_event_init;
1161         pmu->base.add           = i915_pmu_event_add;
1162         pmu->base.del           = i915_pmu_event_del;
1163         pmu->base.start         = i915_pmu_event_start;
1164         pmu->base.stop          = i915_pmu_event_stop;
1165         pmu->base.read          = i915_pmu_event_read;
1166         pmu->base.event_idx     = i915_pmu_event_event_idx;
1167
1168         ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1169         if (ret)
1170                 goto err_groups;
1171
1172         ret = i915_pmu_register_cpuhp_state(pmu);
1173         if (ret)
1174                 goto err_unreg;
1175
1176         return;
1177
1178 err_unreg:
1179         perf_pmu_unregister(&pmu->base);
1180 err_groups:
1181         kfree(pmu->base.attr_groups);
1182 err_attr:
1183         pmu->base.event_init = NULL;
1184         free_event_attributes(pmu);
1185 err_name:
1186         if (!is_igp(i915))
1187                 kfree(pmu->name);
1188 err:
1189         drm_notice(&i915->drm, "Failed to register PMU!\n");
1190 }
1191
1192 void i915_pmu_unregister(struct drm_i915_private *i915)
1193 {
1194         struct i915_pmu *pmu = &i915->pmu;
1195
1196         if (!pmu->base.event_init)
1197                 return;
1198
1199         /*
1200          * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
1201          * ensures all currently executing ones will have exited before we
1202          * proceed with unregistration.
1203          */
1204         pmu->closed = true;
1205         synchronize_rcu();
1206
1207         hrtimer_cancel(&pmu->timer);
1208
1209         i915_pmu_unregister_cpuhp_state(pmu);
1210
1211         perf_pmu_unregister(&pmu->base);
1212         pmu->base.event_init = NULL;
1213         kfree(pmu->base.attr_groups);
1214         if (!is_igp(i915))
1215                 kfree(pmu->name);
1216         free_event_attributes(pmu);
1217 }