powerpc/perf/hv-24x7: Move cpumask file to top folder of hv-24x7 driver
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / i915_pmu.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6
7 #include <linux/irq.h>
8 #include <linux/pm_runtime.h>
9
10 #include "gt/intel_engine.h"
11 #include "gt/intel_engine_pm.h"
12 #include "gt/intel_engine_user.h"
13 #include "gt/intel_gt_pm.h"
14 #include "gt/intel_rc6.h"
15 #include "gt/intel_rps.h"
16
17 #include "i915_drv.h"
18 #include "i915_pmu.h"
19 #include "intel_pm.h"
20
21 /* Frequency for the sampling timer for events which need it. */
22 #define FREQUENCY 200
23 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
24
25 #define ENGINE_SAMPLE_MASK \
26         (BIT(I915_SAMPLE_BUSY) | \
27          BIT(I915_SAMPLE_WAIT) | \
28          BIT(I915_SAMPLE_SEMA))
29
30 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
31
32 static cpumask_t i915_pmu_cpumask;
33
34 static u8 engine_config_sample(u64 config)
35 {
36         return config & I915_PMU_SAMPLE_MASK;
37 }
38
39 static u8 engine_event_sample(struct perf_event *event)
40 {
41         return engine_config_sample(event->attr.config);
42 }
43
44 static u8 engine_event_class(struct perf_event *event)
45 {
46         return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
47 }
48
49 static u8 engine_event_instance(struct perf_event *event)
50 {
51         return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
52 }
53
54 static bool is_engine_config(u64 config)
55 {
56         return config < __I915_PMU_OTHER(0);
57 }
58
59 static unsigned int config_enabled_bit(u64 config)
60 {
61         if (is_engine_config(config))
62                 return engine_config_sample(config);
63         else
64                 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
65 }
66
67 static u64 config_enabled_mask(u64 config)
68 {
69         return BIT_ULL(config_enabled_bit(config));
70 }
71
72 static bool is_engine_event(struct perf_event *event)
73 {
74         return is_engine_config(event->attr.config);
75 }
76
77 static unsigned int event_enabled_bit(struct perf_event *event)
78 {
79         return config_enabled_bit(event->attr.config);
80 }
81
82 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
83 {
84         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
85         u64 enable;
86
87         /*
88          * Only some counters need the sampling timer.
89          *
90          * We start with a bitmask of all currently enabled events.
91          */
92         enable = pmu->enable;
93
94         /*
95          * Mask out all the ones which do not need the timer, or in
96          * other words keep all the ones that could need the timer.
97          */
98         enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
99                   config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
100                   ENGINE_SAMPLE_MASK;
101
102         /*
103          * When the GPU is idle per-engine counters do not need to be
104          * running so clear those bits out.
105          */
106         if (!gpu_active)
107                 enable &= ~ENGINE_SAMPLE_MASK;
108         /*
109          * Also there is software busyness tracking available we do not
110          * need the timer for I915_SAMPLE_BUSY counter.
111          */
112         else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
113                 enable &= ~BIT(I915_SAMPLE_BUSY);
114
115         /*
116          * If some bits remain it means we need the sampling timer running.
117          */
118         return enable;
119 }
120
121 static u64 __get_rc6(struct intel_gt *gt)
122 {
123         struct drm_i915_private *i915 = gt->i915;
124         u64 val;
125
126         val = intel_rc6_residency_ns(&gt->rc6,
127                                      IS_VALLEYVIEW(i915) ?
128                                      VLV_GT_RENDER_RC6 :
129                                      GEN6_GT_GFX_RC6);
130
131         if (HAS_RC6p(i915))
132                 val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6p);
133
134         if (HAS_RC6pp(i915))
135                 val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6pp);
136
137         return val;
138 }
139
140 #if IS_ENABLED(CONFIG_PM)
141
142 static inline s64 ktime_since(const ktime_t kt)
143 {
144         return ktime_to_ns(ktime_sub(ktime_get(), kt));
145 }
146
147 static u64 get_rc6(struct intel_gt *gt)
148 {
149         struct drm_i915_private *i915 = gt->i915;
150         struct i915_pmu *pmu = &i915->pmu;
151         unsigned long flags;
152         bool awake = false;
153         u64 val;
154
155         if (intel_gt_pm_get_if_awake(gt)) {
156                 val = __get_rc6(gt);
157                 intel_gt_pm_put_async(gt);
158                 awake = true;
159         }
160
161         spin_lock_irqsave(&pmu->lock, flags);
162
163         if (awake) {
164                 pmu->sample[__I915_SAMPLE_RC6].cur = val;
165         } else {
166                 /*
167                  * We think we are runtime suspended.
168                  *
169                  * Report the delta from when the device was suspended to now,
170                  * on top of the last known real value, as the approximated RC6
171                  * counter value.
172                  */
173                 val = ktime_since(pmu->sleep_last);
174                 val += pmu->sample[__I915_SAMPLE_RC6].cur;
175         }
176
177         if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
178                 val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
179         else
180                 pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
181
182         spin_unlock_irqrestore(&pmu->lock, flags);
183
184         return val;
185 }
186
187 static void park_rc6(struct drm_i915_private *i915)
188 {
189         struct i915_pmu *pmu = &i915->pmu;
190
191         if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
192                 pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
193
194         pmu->sleep_last = ktime_get();
195 }
196
197 #else
198
199 static u64 get_rc6(struct intel_gt *gt)
200 {
201         return __get_rc6(gt);
202 }
203
204 static void park_rc6(struct drm_i915_private *i915) {}
205
206 #endif
207
208 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
209 {
210         if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
211                 pmu->timer_enabled = true;
212                 pmu->timer_last = ktime_get();
213                 hrtimer_start_range_ns(&pmu->timer,
214                                        ns_to_ktime(PERIOD), 0,
215                                        HRTIMER_MODE_REL_PINNED);
216         }
217 }
218
219 void i915_pmu_gt_parked(struct drm_i915_private *i915)
220 {
221         struct i915_pmu *pmu = &i915->pmu;
222
223         if (!pmu->base.event_init)
224                 return;
225
226         spin_lock_irq(&pmu->lock);
227
228         park_rc6(i915);
229
230         /*
231          * Signal sampling timer to stop if only engine events are enabled and
232          * GPU went idle.
233          */
234         pmu->timer_enabled = pmu_needs_timer(pmu, false);
235
236         spin_unlock_irq(&pmu->lock);
237 }
238
239 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
240 {
241         struct i915_pmu *pmu = &i915->pmu;
242
243         if (!pmu->base.event_init)
244                 return;
245
246         spin_lock_irq(&pmu->lock);
247
248         /*
249          * Re-enable sampling timer when GPU goes active.
250          */
251         __i915_pmu_maybe_start_timer(pmu);
252
253         spin_unlock_irq(&pmu->lock);
254 }
255
256 static void
257 add_sample(struct i915_pmu_sample *sample, u32 val)
258 {
259         sample->cur += val;
260 }
261
262 static bool exclusive_mmio_access(const struct drm_i915_private *i915)
263 {
264         /*
265          * We have to avoid concurrent mmio cache line access on gen7 or
266          * risk a machine hang. For a fun history lesson dig out the old
267          * userspace intel_gpu_top and run it on Ivybridge or Haswell!
268          */
269         return IS_GEN(i915, 7);
270 }
271
272 static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
273 {
274         struct intel_engine_pmu *pmu = &engine->pmu;
275         bool busy;
276         u32 val;
277
278         val = ENGINE_READ_FW(engine, RING_CTL);
279         if (val == 0) /* powerwell off => engine idle */
280                 return;
281
282         if (val & RING_WAIT)
283                 add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
284         if (val & RING_WAIT_SEMAPHORE)
285                 add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
286
287         /* No need to sample when busy stats are supported. */
288         if (intel_engine_supports_stats(engine))
289                 return;
290
291         /*
292          * While waiting on a semaphore or event, MI_MODE reports the
293          * ring as idle. However, previously using the seqno, and with
294          * execlists sampling, we account for the ring waiting as the
295          * engine being busy. Therefore, we record the sample as being
296          * busy if either waiting or !idle.
297          */
298         busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
299         if (!busy) {
300                 val = ENGINE_READ_FW(engine, RING_MI_MODE);
301                 busy = !(val & MODE_IDLE);
302         }
303         if (busy)
304                 add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
305 }
306
307 static void
308 engines_sample(struct intel_gt *gt, unsigned int period_ns)
309 {
310         struct drm_i915_private *i915 = gt->i915;
311         struct intel_engine_cs *engine;
312         enum intel_engine_id id;
313         unsigned long flags;
314
315         if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
316                 return;
317
318         if (!intel_gt_pm_is_awake(gt))
319                 return;
320
321         for_each_engine(engine, gt, id) {
322                 if (!intel_engine_pm_get_if_awake(engine))
323                         continue;
324
325                 if (exclusive_mmio_access(i915)) {
326                         spin_lock_irqsave(&engine->uncore->lock, flags);
327                         engine_sample(engine, period_ns);
328                         spin_unlock_irqrestore(&engine->uncore->lock, flags);
329                 } else {
330                         engine_sample(engine, period_ns);
331                 }
332
333                 intel_engine_pm_put_async(engine);
334         }
335 }
336
337 static void
338 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
339 {
340         sample->cur += mul_u32_u32(val, mul);
341 }
342
343 static bool frequency_sampling_enabled(struct i915_pmu *pmu)
344 {
345         return pmu->enable &
346                (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
347                 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY));
348 }
349
350 static void
351 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
352 {
353         struct drm_i915_private *i915 = gt->i915;
354         struct intel_uncore *uncore = gt->uncore;
355         struct i915_pmu *pmu = &i915->pmu;
356         struct intel_rps *rps = &gt->rps;
357
358         if (!frequency_sampling_enabled(pmu))
359                 return;
360
361         /* Report 0/0 (actual/requested) frequency while parked. */
362         if (!intel_gt_pm_get_if_awake(gt))
363                 return;
364
365         if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
366                 u32 val;
367
368                 /*
369                  * We take a quick peek here without using forcewake
370                  * so that we don't perturb the system under observation
371                  * (forcewake => !rc6 => increased power use). We expect
372                  * that if the read fails because it is outside of the
373                  * mmio power well, then it will return 0 -- in which
374                  * case we assume the system is running at the intended
375                  * frequency. Fortunately, the read should rarely fail!
376                  */
377                 val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
378                 if (val)
379                         val = intel_rps_get_cagf(rps, val);
380                 else
381                         val = rps->cur_freq;
382
383                 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
384                                 intel_gpu_freq(rps, val), period_ns / 1000);
385         }
386
387         if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
388                 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
389                                 intel_gpu_freq(rps, rps->cur_freq),
390                                 period_ns / 1000);
391         }
392
393         intel_gt_pm_put_async(gt);
394 }
395
396 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
397 {
398         struct drm_i915_private *i915 =
399                 container_of(hrtimer, struct drm_i915_private, pmu.timer);
400         struct i915_pmu *pmu = &i915->pmu;
401         struct intel_gt *gt = &i915->gt;
402         unsigned int period_ns;
403         ktime_t now;
404
405         if (!READ_ONCE(pmu->timer_enabled))
406                 return HRTIMER_NORESTART;
407
408         now = ktime_get();
409         period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
410         pmu->timer_last = now;
411
412         /*
413          * Strictly speaking the passed in period may not be 100% accurate for
414          * all internal calculation, since some amount of time can be spent on
415          * grabbing the forcewake. However the potential error from timer call-
416          * back delay greatly dominates this so we keep it simple.
417          */
418         engines_sample(gt, period_ns);
419         frequency_sample(gt, period_ns);
420
421         hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
422
423         return HRTIMER_RESTART;
424 }
425
426 static u64 count_interrupts(struct drm_i915_private *i915)
427 {
428         /* open-coded kstat_irqs() */
429         struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
430         u64 sum = 0;
431         int cpu;
432
433         if (!desc || !desc->kstat_irqs)
434                 return 0;
435
436         for_each_possible_cpu(cpu)
437                 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
438
439         return sum;
440 }
441
442 static void i915_pmu_event_destroy(struct perf_event *event)
443 {
444         struct drm_i915_private *i915 =
445                 container_of(event->pmu, typeof(*i915), pmu.base);
446
447         drm_WARN_ON(&i915->drm, event->parent);
448
449         module_put(THIS_MODULE);
450 }
451
452 static int
453 engine_event_status(struct intel_engine_cs *engine,
454                     enum drm_i915_pmu_engine_sample sample)
455 {
456         switch (sample) {
457         case I915_SAMPLE_BUSY:
458         case I915_SAMPLE_WAIT:
459                 break;
460         case I915_SAMPLE_SEMA:
461                 if (INTEL_GEN(engine->i915) < 6)
462                         return -ENODEV;
463                 break;
464         default:
465                 return -ENOENT;
466         }
467
468         return 0;
469 }
470
471 static int
472 config_status(struct drm_i915_private *i915, u64 config)
473 {
474         switch (config) {
475         case I915_PMU_ACTUAL_FREQUENCY:
476                 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
477                         /* Requires a mutex for sampling! */
478                         return -ENODEV;
479                 /* Fall-through. */
480         case I915_PMU_REQUESTED_FREQUENCY:
481                 if (INTEL_GEN(i915) < 6)
482                         return -ENODEV;
483                 break;
484         case I915_PMU_INTERRUPTS:
485                 break;
486         case I915_PMU_RC6_RESIDENCY:
487                 if (!HAS_RC6(i915))
488                         return -ENODEV;
489                 break;
490         default:
491                 return -ENOENT;
492         }
493
494         return 0;
495 }
496
497 static int engine_event_init(struct perf_event *event)
498 {
499         struct drm_i915_private *i915 =
500                 container_of(event->pmu, typeof(*i915), pmu.base);
501         struct intel_engine_cs *engine;
502
503         engine = intel_engine_lookup_user(i915, engine_event_class(event),
504                                           engine_event_instance(event));
505         if (!engine)
506                 return -ENODEV;
507
508         return engine_event_status(engine, engine_event_sample(event));
509 }
510
511 static int i915_pmu_event_init(struct perf_event *event)
512 {
513         struct drm_i915_private *i915 =
514                 container_of(event->pmu, typeof(*i915), pmu.base);
515         int ret;
516
517         if (event->attr.type != event->pmu->type)
518                 return -ENOENT;
519
520         /* unsupported modes and filters */
521         if (event->attr.sample_period) /* no sampling */
522                 return -EINVAL;
523
524         if (has_branch_stack(event))
525                 return -EOPNOTSUPP;
526
527         if (event->cpu < 0)
528                 return -EINVAL;
529
530         /* only allow running on one cpu at a time */
531         if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
532                 return -EINVAL;
533
534         if (is_engine_event(event))
535                 ret = engine_event_init(event);
536         else
537                 ret = config_status(i915, event->attr.config);
538         if (ret)
539                 return ret;
540
541         if (!event->parent) {
542                 __module_get(THIS_MODULE);
543                 event->destroy = i915_pmu_event_destroy;
544         }
545
546         return 0;
547 }
548
549 static u64 __i915_pmu_event_read(struct perf_event *event)
550 {
551         struct drm_i915_private *i915 =
552                 container_of(event->pmu, typeof(*i915), pmu.base);
553         struct i915_pmu *pmu = &i915->pmu;
554         u64 val = 0;
555
556         if (is_engine_event(event)) {
557                 u8 sample = engine_event_sample(event);
558                 struct intel_engine_cs *engine;
559
560                 engine = intel_engine_lookup_user(i915,
561                                                   engine_event_class(event),
562                                                   engine_event_instance(event));
563
564                 if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
565                         /* Do nothing */
566                 } else if (sample == I915_SAMPLE_BUSY &&
567                            intel_engine_supports_stats(engine)) {
568                         ktime_t unused;
569
570                         val = ktime_to_ns(intel_engine_get_busy_time(engine,
571                                                                      &unused));
572                 } else {
573                         val = engine->pmu.sample[sample].cur;
574                 }
575         } else {
576                 switch (event->attr.config) {
577                 case I915_PMU_ACTUAL_FREQUENCY:
578                         val =
579                            div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
580                                    USEC_PER_SEC /* to MHz */);
581                         break;
582                 case I915_PMU_REQUESTED_FREQUENCY:
583                         val =
584                            div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
585                                    USEC_PER_SEC /* to MHz */);
586                         break;
587                 case I915_PMU_INTERRUPTS:
588                         val = count_interrupts(i915);
589                         break;
590                 case I915_PMU_RC6_RESIDENCY:
591                         val = get_rc6(&i915->gt);
592                         break;
593                 }
594         }
595
596         return val;
597 }
598
599 static void i915_pmu_event_read(struct perf_event *event)
600 {
601         struct hw_perf_event *hwc = &event->hw;
602         u64 prev, new;
603
604 again:
605         prev = local64_read(&hwc->prev_count);
606         new = __i915_pmu_event_read(event);
607
608         if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
609                 goto again;
610
611         local64_add(new - prev, &event->count);
612 }
613
614 static void i915_pmu_enable(struct perf_event *event)
615 {
616         struct drm_i915_private *i915 =
617                 container_of(event->pmu, typeof(*i915), pmu.base);
618         unsigned int bit = event_enabled_bit(event);
619         struct i915_pmu *pmu = &i915->pmu;
620         intel_wakeref_t wakeref;
621         unsigned long flags;
622
623         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
624         spin_lock_irqsave(&pmu->lock, flags);
625
626         /*
627          * Update the bitmask of enabled events and increment
628          * the event reference counter.
629          */
630         BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
631         GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
632         GEM_BUG_ON(pmu->enable_count[bit] == ~0);
633
634         if (pmu->enable_count[bit] == 0 &&
635             config_enabled_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) {
636                 pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = 0;
637                 pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
638                 pmu->sleep_last = ktime_get();
639         }
640
641         pmu->enable |= BIT_ULL(bit);
642         pmu->enable_count[bit]++;
643
644         /*
645          * Start the sampling timer if needed and not already enabled.
646          */
647         __i915_pmu_maybe_start_timer(pmu);
648
649         /*
650          * For per-engine events the bitmask and reference counting
651          * is stored per engine.
652          */
653         if (is_engine_event(event)) {
654                 u8 sample = engine_event_sample(event);
655                 struct intel_engine_cs *engine;
656
657                 engine = intel_engine_lookup_user(i915,
658                                                   engine_event_class(event),
659                                                   engine_event_instance(event));
660
661                 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
662                              I915_ENGINE_SAMPLE_COUNT);
663                 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
664                              I915_ENGINE_SAMPLE_COUNT);
665                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
666                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
667                 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
668
669                 engine->pmu.enable |= BIT(sample);
670                 engine->pmu.enable_count[sample]++;
671         }
672
673         spin_unlock_irqrestore(&pmu->lock, flags);
674
675         /*
676          * Store the current counter value so we can report the correct delta
677          * for all listeners. Even when the event was already enabled and has
678          * an existing non-zero value.
679          */
680         local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
681
682         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
683 }
684
685 static void i915_pmu_disable(struct perf_event *event)
686 {
687         struct drm_i915_private *i915 =
688                 container_of(event->pmu, typeof(*i915), pmu.base);
689         unsigned int bit = event_enabled_bit(event);
690         struct i915_pmu *pmu = &i915->pmu;
691         unsigned long flags;
692
693         spin_lock_irqsave(&pmu->lock, flags);
694
695         if (is_engine_event(event)) {
696                 u8 sample = engine_event_sample(event);
697                 struct intel_engine_cs *engine;
698
699                 engine = intel_engine_lookup_user(i915,
700                                                   engine_event_class(event),
701                                                   engine_event_instance(event));
702
703                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
704                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
705                 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
706
707                 /*
708                  * Decrement the reference count and clear the enabled
709                  * bitmask when the last listener on an event goes away.
710                  */
711                 if (--engine->pmu.enable_count[sample] == 0)
712                         engine->pmu.enable &= ~BIT(sample);
713         }
714
715         GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
716         GEM_BUG_ON(pmu->enable_count[bit] == 0);
717         /*
718          * Decrement the reference count and clear the enabled
719          * bitmask when the last listener on an event goes away.
720          */
721         if (--pmu->enable_count[bit] == 0) {
722                 pmu->enable &= ~BIT_ULL(bit);
723                 pmu->timer_enabled &= pmu_needs_timer(pmu, true);
724         }
725
726         spin_unlock_irqrestore(&pmu->lock, flags);
727 }
728
729 static void i915_pmu_event_start(struct perf_event *event, int flags)
730 {
731         i915_pmu_enable(event);
732         event->hw.state = 0;
733 }
734
735 static void i915_pmu_event_stop(struct perf_event *event, int flags)
736 {
737         if (flags & PERF_EF_UPDATE)
738                 i915_pmu_event_read(event);
739         i915_pmu_disable(event);
740         event->hw.state = PERF_HES_STOPPED;
741 }
742
743 static int i915_pmu_event_add(struct perf_event *event, int flags)
744 {
745         if (flags & PERF_EF_START)
746                 i915_pmu_event_start(event, flags);
747
748         return 0;
749 }
750
751 static void i915_pmu_event_del(struct perf_event *event, int flags)
752 {
753         i915_pmu_event_stop(event, PERF_EF_UPDATE);
754 }
755
756 static int i915_pmu_event_event_idx(struct perf_event *event)
757 {
758         return 0;
759 }
760
761 struct i915_str_attribute {
762         struct device_attribute attr;
763         const char *str;
764 };
765
766 static ssize_t i915_pmu_format_show(struct device *dev,
767                                     struct device_attribute *attr, char *buf)
768 {
769         struct i915_str_attribute *eattr;
770
771         eattr = container_of(attr, struct i915_str_attribute, attr);
772         return sprintf(buf, "%s\n", eattr->str);
773 }
774
775 #define I915_PMU_FORMAT_ATTR(_name, _config) \
776         (&((struct i915_str_attribute[]) { \
777                 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
778                   .str = _config, } \
779         })[0].attr.attr)
780
781 static struct attribute *i915_pmu_format_attrs[] = {
782         I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
783         NULL,
784 };
785
786 static const struct attribute_group i915_pmu_format_attr_group = {
787         .name = "format",
788         .attrs = i915_pmu_format_attrs,
789 };
790
791 struct i915_ext_attribute {
792         struct device_attribute attr;
793         unsigned long val;
794 };
795
796 static ssize_t i915_pmu_event_show(struct device *dev,
797                                    struct device_attribute *attr, char *buf)
798 {
799         struct i915_ext_attribute *eattr;
800
801         eattr = container_of(attr, struct i915_ext_attribute, attr);
802         return sprintf(buf, "config=0x%lx\n", eattr->val);
803 }
804
805 static ssize_t
806 i915_pmu_get_attr_cpumask(struct device *dev,
807                           struct device_attribute *attr,
808                           char *buf)
809 {
810         return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
811 }
812
813 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
814
815 static struct attribute *i915_cpumask_attrs[] = {
816         &dev_attr_cpumask.attr,
817         NULL,
818 };
819
820 static const struct attribute_group i915_pmu_cpumask_attr_group = {
821         .attrs = i915_cpumask_attrs,
822 };
823
824 #define __event(__config, __name, __unit) \
825 { \
826         .config = (__config), \
827         .name = (__name), \
828         .unit = (__unit), \
829 }
830
831 #define __engine_event(__sample, __name) \
832 { \
833         .sample = (__sample), \
834         .name = (__name), \
835 }
836
837 static struct i915_ext_attribute *
838 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
839 {
840         sysfs_attr_init(&attr->attr.attr);
841         attr->attr.attr.name = name;
842         attr->attr.attr.mode = 0444;
843         attr->attr.show = i915_pmu_event_show;
844         attr->val = config;
845
846         return ++attr;
847 }
848
849 static struct perf_pmu_events_attr *
850 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
851              const char *str)
852 {
853         sysfs_attr_init(&attr->attr.attr);
854         attr->attr.attr.name = name;
855         attr->attr.attr.mode = 0444;
856         attr->attr.show = perf_event_sysfs_show;
857         attr->event_str = str;
858
859         return ++attr;
860 }
861
862 static struct attribute **
863 create_event_attributes(struct i915_pmu *pmu)
864 {
865         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
866         static const struct {
867                 u64 config;
868                 const char *name;
869                 const char *unit;
870         } events[] = {
871                 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
872                 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
873                 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
874                 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
875         };
876         static const struct {
877                 enum drm_i915_pmu_engine_sample sample;
878                 char *name;
879         } engine_events[] = {
880                 __engine_event(I915_SAMPLE_BUSY, "busy"),
881                 __engine_event(I915_SAMPLE_SEMA, "sema"),
882                 __engine_event(I915_SAMPLE_WAIT, "wait"),
883         };
884         unsigned int count = 0;
885         struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
886         struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
887         struct attribute **attr = NULL, **attr_iter;
888         struct intel_engine_cs *engine;
889         unsigned int i;
890
891         /* Count how many counters we will be exposing. */
892         for (i = 0; i < ARRAY_SIZE(events); i++) {
893                 if (!config_status(i915, events[i].config))
894                         count++;
895         }
896
897         for_each_uabi_engine(engine, i915) {
898                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
899                         if (!engine_event_status(engine,
900                                                  engine_events[i].sample))
901                                 count++;
902                 }
903         }
904
905         /* Allocate attribute objects and table. */
906         i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
907         if (!i915_attr)
908                 goto err_alloc;
909
910         pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
911         if (!pmu_attr)
912                 goto err_alloc;
913
914         /* Max one pointer of each attribute type plus a termination entry. */
915         attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
916         if (!attr)
917                 goto err_alloc;
918
919         i915_iter = i915_attr;
920         pmu_iter = pmu_attr;
921         attr_iter = attr;
922
923         /* Initialize supported non-engine counters. */
924         for (i = 0; i < ARRAY_SIZE(events); i++) {
925                 char *str;
926
927                 if (config_status(i915, events[i].config))
928                         continue;
929
930                 str = kstrdup(events[i].name, GFP_KERNEL);
931                 if (!str)
932                         goto err;
933
934                 *attr_iter++ = &i915_iter->attr.attr;
935                 i915_iter = add_i915_attr(i915_iter, str, events[i].config);
936
937                 if (events[i].unit) {
938                         str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
939                         if (!str)
940                                 goto err;
941
942                         *attr_iter++ = &pmu_iter->attr.attr;
943                         pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
944                 }
945         }
946
947         /* Initialize supported engine counters. */
948         for_each_uabi_engine(engine, i915) {
949                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
950                         char *str;
951
952                         if (engine_event_status(engine,
953                                                 engine_events[i].sample))
954                                 continue;
955
956                         str = kasprintf(GFP_KERNEL, "%s-%s",
957                                         engine->name, engine_events[i].name);
958                         if (!str)
959                                 goto err;
960
961                         *attr_iter++ = &i915_iter->attr.attr;
962                         i915_iter =
963                                 add_i915_attr(i915_iter, str,
964                                               __I915_PMU_ENGINE(engine->uabi_class,
965                                                                 engine->uabi_instance,
966                                                                 engine_events[i].sample));
967
968                         str = kasprintf(GFP_KERNEL, "%s-%s.unit",
969                                         engine->name, engine_events[i].name);
970                         if (!str)
971                                 goto err;
972
973                         *attr_iter++ = &pmu_iter->attr.attr;
974                         pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
975                 }
976         }
977
978         pmu->i915_attr = i915_attr;
979         pmu->pmu_attr = pmu_attr;
980
981         return attr;
982
983 err:;
984         for (attr_iter = attr; *attr_iter; attr_iter++)
985                 kfree((*attr_iter)->name);
986
987 err_alloc:
988         kfree(attr);
989         kfree(i915_attr);
990         kfree(pmu_attr);
991
992         return NULL;
993 }
994
995 static void free_event_attributes(struct i915_pmu *pmu)
996 {
997         struct attribute **attr_iter = pmu->events_attr_group.attrs;
998
999         for (; *attr_iter; attr_iter++)
1000                 kfree((*attr_iter)->name);
1001
1002         kfree(pmu->events_attr_group.attrs);
1003         kfree(pmu->i915_attr);
1004         kfree(pmu->pmu_attr);
1005
1006         pmu->events_attr_group.attrs = NULL;
1007         pmu->i915_attr = NULL;
1008         pmu->pmu_attr = NULL;
1009 }
1010
1011 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
1012 {
1013         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1014
1015         GEM_BUG_ON(!pmu->base.event_init);
1016
1017         /* Select the first online CPU as a designated reader. */
1018         if (!cpumask_weight(&i915_pmu_cpumask))
1019                 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
1020
1021         return 0;
1022 }
1023
1024 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
1025 {
1026         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1027         unsigned int target;
1028
1029         GEM_BUG_ON(!pmu->base.event_init);
1030
1031         if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
1032                 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
1033                 /* Migrate events if there is a valid target */
1034                 if (target < nr_cpu_ids) {
1035                         cpumask_set_cpu(target, &i915_pmu_cpumask);
1036                         perf_pmu_migrate_context(&pmu->base, cpu, target);
1037                 }
1038         }
1039
1040         return 0;
1041 }
1042
1043 static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
1044 {
1045         enum cpuhp_state slot;
1046         int ret;
1047
1048         ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1049                                       "perf/x86/intel/i915:online",
1050                                       i915_pmu_cpu_online,
1051                                       i915_pmu_cpu_offline);
1052         if (ret < 0)
1053                 return ret;
1054
1055         slot = ret;
1056         ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node);
1057         if (ret) {
1058                 cpuhp_remove_multi_state(slot);
1059                 return ret;
1060         }
1061
1062         pmu->cpuhp.slot = slot;
1063         return 0;
1064 }
1065
1066 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
1067 {
1068         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
1069
1070         drm_WARN_ON(&i915->drm, pmu->cpuhp.slot == CPUHP_INVALID);
1071         drm_WARN_ON(&i915->drm, cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node));
1072         cpuhp_remove_multi_state(pmu->cpuhp.slot);
1073         pmu->cpuhp.slot = CPUHP_INVALID;
1074 }
1075
1076 static bool is_igp(struct drm_i915_private *i915)
1077 {
1078         struct pci_dev *pdev = i915->drm.pdev;
1079
1080         /* IGP is 0000:00:02.0 */
1081         return pci_domain_nr(pdev->bus) == 0 &&
1082                pdev->bus->number == 0 &&
1083                PCI_SLOT(pdev->devfn) == 2 &&
1084                PCI_FUNC(pdev->devfn) == 0;
1085 }
1086
1087 void i915_pmu_register(struct drm_i915_private *i915)
1088 {
1089         struct i915_pmu *pmu = &i915->pmu;
1090         const struct attribute_group *attr_groups[] = {
1091                 &i915_pmu_format_attr_group,
1092                 &pmu->events_attr_group,
1093                 &i915_pmu_cpumask_attr_group,
1094                 NULL
1095         };
1096
1097         int ret = -ENOMEM;
1098
1099         if (INTEL_GEN(i915) <= 2) {
1100                 drm_info(&i915->drm, "PMU not supported for this GPU.");
1101                 return;
1102         }
1103
1104         spin_lock_init(&pmu->lock);
1105         hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1106         pmu->timer.function = i915_sample;
1107         pmu->cpuhp.slot = CPUHP_INVALID;
1108
1109         if (!is_igp(i915)) {
1110                 pmu->name = kasprintf(GFP_KERNEL,
1111                                       "i915_%s",
1112                                       dev_name(i915->drm.dev));
1113                 if (pmu->name) {
1114                         /* tools/perf reserves colons as special. */
1115                         strreplace((char *)pmu->name, ':', '_');
1116                 }
1117         } else {
1118                 pmu->name = "i915";
1119         }
1120         if (!pmu->name)
1121                 goto err;
1122
1123         pmu->events_attr_group.name = "events";
1124         pmu->events_attr_group.attrs = create_event_attributes(pmu);
1125         if (!pmu->events_attr_group.attrs)
1126                 goto err_name;
1127
1128         pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
1129                                         GFP_KERNEL);
1130         if (!pmu->base.attr_groups)
1131                 goto err_attr;
1132
1133         pmu->base.task_ctx_nr   = perf_invalid_context;
1134         pmu->base.event_init    = i915_pmu_event_init;
1135         pmu->base.add           = i915_pmu_event_add;
1136         pmu->base.del           = i915_pmu_event_del;
1137         pmu->base.start         = i915_pmu_event_start;
1138         pmu->base.stop          = i915_pmu_event_stop;
1139         pmu->base.read          = i915_pmu_event_read;
1140         pmu->base.event_idx     = i915_pmu_event_event_idx;
1141
1142         ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1143         if (ret)
1144                 goto err_groups;
1145
1146         ret = i915_pmu_register_cpuhp_state(pmu);
1147         if (ret)
1148                 goto err_unreg;
1149
1150         return;
1151
1152 err_unreg:
1153         perf_pmu_unregister(&pmu->base);
1154 err_groups:
1155         kfree(pmu->base.attr_groups);
1156 err_attr:
1157         pmu->base.event_init = NULL;
1158         free_event_attributes(pmu);
1159 err_name:
1160         if (!is_igp(i915))
1161                 kfree(pmu->name);
1162 err:
1163         drm_notice(&i915->drm, "Failed to register PMU!\n");
1164 }
1165
1166 void i915_pmu_unregister(struct drm_i915_private *i915)
1167 {
1168         struct i915_pmu *pmu = &i915->pmu;
1169
1170         if (!pmu->base.event_init)
1171                 return;
1172
1173         drm_WARN_ON(&i915->drm, pmu->enable);
1174
1175         hrtimer_cancel(&pmu->timer);
1176
1177         i915_pmu_unregister_cpuhp_state(pmu);
1178
1179         perf_pmu_unregister(&pmu->base);
1180         pmu->base.event_init = NULL;
1181         kfree(pmu->base.attr_groups);
1182         if (!is_igp(i915))
1183                 kfree(pmu->name);
1184         free_event_attributes(pmu);
1185 }