Merge tag 'devicetree-for-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / i915_pmu.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6
7 #include <linux/irq.h>
8 #include <linux/pm_runtime.h>
9
10 #include "gt/intel_engine.h"
11 #include "gt/intel_engine_pm.h"
12 #include "gt/intel_engine_user.h"
13 #include "gt/intel_gt_pm.h"
14 #include "gt/intel_rc6.h"
15 #include "gt/intel_rps.h"
16
17 #include "i915_drv.h"
18 #include "i915_pmu.h"
19 #include "intel_pm.h"
20
21 /* Frequency for the sampling timer for events which need it. */
22 #define FREQUENCY 200
23 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
24
25 #define ENGINE_SAMPLE_MASK \
26         (BIT(I915_SAMPLE_BUSY) | \
27          BIT(I915_SAMPLE_WAIT) | \
28          BIT(I915_SAMPLE_SEMA))
29
30 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
31
32 static cpumask_t i915_pmu_cpumask;
33 static unsigned int i915_pmu_target_cpu = -1;
34
35 static u8 engine_config_sample(u64 config)
36 {
37         return config & I915_PMU_SAMPLE_MASK;
38 }
39
40 static u8 engine_event_sample(struct perf_event *event)
41 {
42         return engine_config_sample(event->attr.config);
43 }
44
45 static u8 engine_event_class(struct perf_event *event)
46 {
47         return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
48 }
49
50 static u8 engine_event_instance(struct perf_event *event)
51 {
52         return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
53 }
54
55 static bool is_engine_config(u64 config)
56 {
57         return config < __I915_PMU_OTHER(0);
58 }
59
60 static unsigned int config_enabled_bit(u64 config)
61 {
62         if (is_engine_config(config))
63                 return engine_config_sample(config);
64         else
65                 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
66 }
67
68 static u64 config_enabled_mask(u64 config)
69 {
70         return BIT_ULL(config_enabled_bit(config));
71 }
72
73 static bool is_engine_event(struct perf_event *event)
74 {
75         return is_engine_config(event->attr.config);
76 }
77
78 static unsigned int event_enabled_bit(struct perf_event *event)
79 {
80         return config_enabled_bit(event->attr.config);
81 }
82
83 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
84 {
85         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
86         u64 enable;
87
88         /*
89          * Only some counters need the sampling timer.
90          *
91          * We start with a bitmask of all currently enabled events.
92          */
93         enable = pmu->enable;
94
95         /*
96          * Mask out all the ones which do not need the timer, or in
97          * other words keep all the ones that could need the timer.
98          */
99         enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
100                   config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
101                   ENGINE_SAMPLE_MASK;
102
103         /*
104          * When the GPU is idle per-engine counters do not need to be
105          * running so clear those bits out.
106          */
107         if (!gpu_active)
108                 enable &= ~ENGINE_SAMPLE_MASK;
109         /*
110          * Also there is software busyness tracking available we do not
111          * need the timer for I915_SAMPLE_BUSY counter.
112          */
113         else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
114                 enable &= ~BIT(I915_SAMPLE_BUSY);
115
116         /*
117          * If some bits remain it means we need the sampling timer running.
118          */
119         return enable;
120 }
121
122 static u64 __get_rc6(struct intel_gt *gt)
123 {
124         struct drm_i915_private *i915 = gt->i915;
125         u64 val;
126
127         val = intel_rc6_residency_ns(&gt->rc6,
128                                      IS_VALLEYVIEW(i915) ?
129                                      VLV_GT_RENDER_RC6 :
130                                      GEN6_GT_GFX_RC6);
131
132         if (HAS_RC6p(i915))
133                 val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6p);
134
135         if (HAS_RC6pp(i915))
136                 val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6pp);
137
138         return val;
139 }
140
141 #if IS_ENABLED(CONFIG_PM)
142
143 static inline s64 ktime_since(const ktime_t kt)
144 {
145         return ktime_to_ns(ktime_sub(ktime_get(), kt));
146 }
147
148 static u64 get_rc6(struct intel_gt *gt)
149 {
150         struct drm_i915_private *i915 = gt->i915;
151         struct i915_pmu *pmu = &i915->pmu;
152         unsigned long flags;
153         bool awake = false;
154         u64 val;
155
156         if (intel_gt_pm_get_if_awake(gt)) {
157                 val = __get_rc6(gt);
158                 intel_gt_pm_put_async(gt);
159                 awake = true;
160         }
161
162         spin_lock_irqsave(&pmu->lock, flags);
163
164         if (awake) {
165                 pmu->sample[__I915_SAMPLE_RC6].cur = val;
166         } else {
167                 /*
168                  * We think we are runtime suspended.
169                  *
170                  * Report the delta from when the device was suspended to now,
171                  * on top of the last known real value, as the approximated RC6
172                  * counter value.
173                  */
174                 val = ktime_since(pmu->sleep_last);
175                 val += pmu->sample[__I915_SAMPLE_RC6].cur;
176         }
177
178         if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
179                 val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
180         else
181                 pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
182
183         spin_unlock_irqrestore(&pmu->lock, flags);
184
185         return val;
186 }
187
188 static void park_rc6(struct drm_i915_private *i915)
189 {
190         struct i915_pmu *pmu = &i915->pmu;
191
192         if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
193                 pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
194
195         pmu->sleep_last = ktime_get();
196 }
197
198 #else
199
200 static u64 get_rc6(struct intel_gt *gt)
201 {
202         return __get_rc6(gt);
203 }
204
205 static void park_rc6(struct drm_i915_private *i915) {}
206
207 #endif
208
209 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
210 {
211         if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
212                 pmu->timer_enabled = true;
213                 pmu->timer_last = ktime_get();
214                 hrtimer_start_range_ns(&pmu->timer,
215                                        ns_to_ktime(PERIOD), 0,
216                                        HRTIMER_MODE_REL_PINNED);
217         }
218 }
219
220 void i915_pmu_gt_parked(struct drm_i915_private *i915)
221 {
222         struct i915_pmu *pmu = &i915->pmu;
223
224         if (!pmu->base.event_init)
225                 return;
226
227         spin_lock_irq(&pmu->lock);
228
229         park_rc6(i915);
230
231         /*
232          * Signal sampling timer to stop if only engine events are enabled and
233          * GPU went idle.
234          */
235         pmu->timer_enabled = pmu_needs_timer(pmu, false);
236
237         spin_unlock_irq(&pmu->lock);
238 }
239
240 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
241 {
242         struct i915_pmu *pmu = &i915->pmu;
243
244         if (!pmu->base.event_init)
245                 return;
246
247         spin_lock_irq(&pmu->lock);
248
249         /*
250          * Re-enable sampling timer when GPU goes active.
251          */
252         __i915_pmu_maybe_start_timer(pmu);
253
254         spin_unlock_irq(&pmu->lock);
255 }
256
257 static void
258 add_sample(struct i915_pmu_sample *sample, u32 val)
259 {
260         sample->cur += val;
261 }
262
263 static bool exclusive_mmio_access(const struct drm_i915_private *i915)
264 {
265         /*
266          * We have to avoid concurrent mmio cache line access on gen7 or
267          * risk a machine hang. For a fun history lesson dig out the old
268          * userspace intel_gpu_top and run it on Ivybridge or Haswell!
269          */
270         return IS_GEN(i915, 7);
271 }
272
273 static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
274 {
275         struct intel_engine_pmu *pmu = &engine->pmu;
276         bool busy;
277         u32 val;
278
279         val = ENGINE_READ_FW(engine, RING_CTL);
280         if (val == 0) /* powerwell off => engine idle */
281                 return;
282
283         if (val & RING_WAIT)
284                 add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
285         if (val & RING_WAIT_SEMAPHORE)
286                 add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
287
288         /* No need to sample when busy stats are supported. */
289         if (intel_engine_supports_stats(engine))
290                 return;
291
292         /*
293          * While waiting on a semaphore or event, MI_MODE reports the
294          * ring as idle. However, previously using the seqno, and with
295          * execlists sampling, we account for the ring waiting as the
296          * engine being busy. Therefore, we record the sample as being
297          * busy if either waiting or !idle.
298          */
299         busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
300         if (!busy) {
301                 val = ENGINE_READ_FW(engine, RING_MI_MODE);
302                 busy = !(val & MODE_IDLE);
303         }
304         if (busy)
305                 add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
306 }
307
308 static void
309 engines_sample(struct intel_gt *gt, unsigned int period_ns)
310 {
311         struct drm_i915_private *i915 = gt->i915;
312         struct intel_engine_cs *engine;
313         enum intel_engine_id id;
314         unsigned long flags;
315
316         if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
317                 return;
318
319         if (!intel_gt_pm_is_awake(gt))
320                 return;
321
322         for_each_engine(engine, gt, id) {
323                 if (!intel_engine_pm_get_if_awake(engine))
324                         continue;
325
326                 if (exclusive_mmio_access(i915)) {
327                         spin_lock_irqsave(&engine->uncore->lock, flags);
328                         engine_sample(engine, period_ns);
329                         spin_unlock_irqrestore(&engine->uncore->lock, flags);
330                 } else {
331                         engine_sample(engine, period_ns);
332                 }
333
334                 intel_engine_pm_put_async(engine);
335         }
336 }
337
338 static void
339 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
340 {
341         sample->cur += mul_u32_u32(val, mul);
342 }
343
344 static bool frequency_sampling_enabled(struct i915_pmu *pmu)
345 {
346         return pmu->enable &
347                (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
348                 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY));
349 }
350
351 static void
352 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
353 {
354         struct drm_i915_private *i915 = gt->i915;
355         struct intel_uncore *uncore = gt->uncore;
356         struct i915_pmu *pmu = &i915->pmu;
357         struct intel_rps *rps = &gt->rps;
358
359         if (!frequency_sampling_enabled(pmu))
360                 return;
361
362         /* Report 0/0 (actual/requested) frequency while parked. */
363         if (!intel_gt_pm_get_if_awake(gt))
364                 return;
365
366         if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
367                 u32 val;
368
369                 /*
370                  * We take a quick peek here without using forcewake
371                  * so that we don't perturb the system under observation
372                  * (forcewake => !rc6 => increased power use). We expect
373                  * that if the read fails because it is outside of the
374                  * mmio power well, then it will return 0 -- in which
375                  * case we assume the system is running at the intended
376                  * frequency. Fortunately, the read should rarely fail!
377                  */
378                 val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
379                 if (val)
380                         val = intel_rps_get_cagf(rps, val);
381                 else
382                         val = rps->cur_freq;
383
384                 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
385                                 intel_gpu_freq(rps, val), period_ns / 1000);
386         }
387
388         if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
389                 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
390                                 intel_gpu_freq(rps, rps->cur_freq),
391                                 period_ns / 1000);
392         }
393
394         intel_gt_pm_put_async(gt);
395 }
396
397 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
398 {
399         struct drm_i915_private *i915 =
400                 container_of(hrtimer, struct drm_i915_private, pmu.timer);
401         struct i915_pmu *pmu = &i915->pmu;
402         struct intel_gt *gt = &i915->gt;
403         unsigned int period_ns;
404         ktime_t now;
405
406         if (!READ_ONCE(pmu->timer_enabled))
407                 return HRTIMER_NORESTART;
408
409         now = ktime_get();
410         period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
411         pmu->timer_last = now;
412
413         /*
414          * Strictly speaking the passed in period may not be 100% accurate for
415          * all internal calculation, since some amount of time can be spent on
416          * grabbing the forcewake. However the potential error from timer call-
417          * back delay greatly dominates this so we keep it simple.
418          */
419         engines_sample(gt, period_ns);
420         frequency_sample(gt, period_ns);
421
422         hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
423
424         return HRTIMER_RESTART;
425 }
426
427 static u64 count_interrupts(struct drm_i915_private *i915)
428 {
429         /* open-coded kstat_irqs() */
430         struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
431         u64 sum = 0;
432         int cpu;
433
434         if (!desc || !desc->kstat_irqs)
435                 return 0;
436
437         for_each_possible_cpu(cpu)
438                 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
439
440         return sum;
441 }
442
443 static void i915_pmu_event_destroy(struct perf_event *event)
444 {
445         struct drm_i915_private *i915 =
446                 container_of(event->pmu, typeof(*i915), pmu.base);
447
448         drm_WARN_ON(&i915->drm, event->parent);
449
450         drm_dev_put(&i915->drm);
451 }
452
453 static int
454 engine_event_status(struct intel_engine_cs *engine,
455                     enum drm_i915_pmu_engine_sample sample)
456 {
457         switch (sample) {
458         case I915_SAMPLE_BUSY:
459         case I915_SAMPLE_WAIT:
460                 break;
461         case I915_SAMPLE_SEMA:
462                 if (INTEL_GEN(engine->i915) < 6)
463                         return -ENODEV;
464                 break;
465         default:
466                 return -ENOENT;
467         }
468
469         return 0;
470 }
471
472 static int
473 config_status(struct drm_i915_private *i915, u64 config)
474 {
475         switch (config) {
476         case I915_PMU_ACTUAL_FREQUENCY:
477                 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
478                         /* Requires a mutex for sampling! */
479                         return -ENODEV;
480                 fallthrough;
481         case I915_PMU_REQUESTED_FREQUENCY:
482                 if (INTEL_GEN(i915) < 6)
483                         return -ENODEV;
484                 break;
485         case I915_PMU_INTERRUPTS:
486                 break;
487         case I915_PMU_RC6_RESIDENCY:
488                 if (!HAS_RC6(i915))
489                         return -ENODEV;
490                 break;
491         default:
492                 return -ENOENT;
493         }
494
495         return 0;
496 }
497
498 static int engine_event_init(struct perf_event *event)
499 {
500         struct drm_i915_private *i915 =
501                 container_of(event->pmu, typeof(*i915), pmu.base);
502         struct intel_engine_cs *engine;
503
504         engine = intel_engine_lookup_user(i915, engine_event_class(event),
505                                           engine_event_instance(event));
506         if (!engine)
507                 return -ENODEV;
508
509         return engine_event_status(engine, engine_event_sample(event));
510 }
511
512 static int i915_pmu_event_init(struct perf_event *event)
513 {
514         struct drm_i915_private *i915 =
515                 container_of(event->pmu, typeof(*i915), pmu.base);
516         struct i915_pmu *pmu = &i915->pmu;
517         int ret;
518
519         if (pmu->closed)
520                 return -ENODEV;
521
522         if (event->attr.type != event->pmu->type)
523                 return -ENOENT;
524
525         /* unsupported modes and filters */
526         if (event->attr.sample_period) /* no sampling */
527                 return -EINVAL;
528
529         if (has_branch_stack(event))
530                 return -EOPNOTSUPP;
531
532         if (event->cpu < 0)
533                 return -EINVAL;
534
535         /* only allow running on one cpu at a time */
536         if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
537                 return -EINVAL;
538
539         if (is_engine_event(event))
540                 ret = engine_event_init(event);
541         else
542                 ret = config_status(i915, event->attr.config);
543         if (ret)
544                 return ret;
545
546         if (!event->parent) {
547                 drm_dev_get(&i915->drm);
548                 event->destroy = i915_pmu_event_destroy;
549         }
550
551         return 0;
552 }
553
554 static u64 __i915_pmu_event_read(struct perf_event *event)
555 {
556         struct drm_i915_private *i915 =
557                 container_of(event->pmu, typeof(*i915), pmu.base);
558         struct i915_pmu *pmu = &i915->pmu;
559         u64 val = 0;
560
561         if (is_engine_event(event)) {
562                 u8 sample = engine_event_sample(event);
563                 struct intel_engine_cs *engine;
564
565                 engine = intel_engine_lookup_user(i915,
566                                                   engine_event_class(event),
567                                                   engine_event_instance(event));
568
569                 if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
570                         /* Do nothing */
571                 } else if (sample == I915_SAMPLE_BUSY &&
572                            intel_engine_supports_stats(engine)) {
573                         ktime_t unused;
574
575                         val = ktime_to_ns(intel_engine_get_busy_time(engine,
576                                                                      &unused));
577                 } else {
578                         val = engine->pmu.sample[sample].cur;
579                 }
580         } else {
581                 switch (event->attr.config) {
582                 case I915_PMU_ACTUAL_FREQUENCY:
583                         val =
584                            div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
585                                    USEC_PER_SEC /* to MHz */);
586                         break;
587                 case I915_PMU_REQUESTED_FREQUENCY:
588                         val =
589                            div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
590                                    USEC_PER_SEC /* to MHz */);
591                         break;
592                 case I915_PMU_INTERRUPTS:
593                         val = count_interrupts(i915);
594                         break;
595                 case I915_PMU_RC6_RESIDENCY:
596                         val = get_rc6(&i915->gt);
597                         break;
598                 }
599         }
600
601         return val;
602 }
603
604 static void i915_pmu_event_read(struct perf_event *event)
605 {
606         struct drm_i915_private *i915 =
607                 container_of(event->pmu, typeof(*i915), pmu.base);
608         struct hw_perf_event *hwc = &event->hw;
609         struct i915_pmu *pmu = &i915->pmu;
610         u64 prev, new;
611
612         if (pmu->closed) {
613                 event->hw.state = PERF_HES_STOPPED;
614                 return;
615         }
616 again:
617         prev = local64_read(&hwc->prev_count);
618         new = __i915_pmu_event_read(event);
619
620         if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
621                 goto again;
622
623         local64_add(new - prev, &event->count);
624 }
625
626 static void i915_pmu_enable(struct perf_event *event)
627 {
628         struct drm_i915_private *i915 =
629                 container_of(event->pmu, typeof(*i915), pmu.base);
630         unsigned int bit = event_enabled_bit(event);
631         struct i915_pmu *pmu = &i915->pmu;
632         intel_wakeref_t wakeref;
633         unsigned long flags;
634
635         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
636         spin_lock_irqsave(&pmu->lock, flags);
637
638         /*
639          * Update the bitmask of enabled events and increment
640          * the event reference counter.
641          */
642         BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
643         GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
644         GEM_BUG_ON(pmu->enable_count[bit] == ~0);
645
646         if (pmu->enable_count[bit] == 0 &&
647             config_enabled_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) {
648                 pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = 0;
649                 pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
650                 pmu->sleep_last = ktime_get();
651         }
652
653         pmu->enable |= BIT_ULL(bit);
654         pmu->enable_count[bit]++;
655
656         /*
657          * Start the sampling timer if needed and not already enabled.
658          */
659         __i915_pmu_maybe_start_timer(pmu);
660
661         /*
662          * For per-engine events the bitmask and reference counting
663          * is stored per engine.
664          */
665         if (is_engine_event(event)) {
666                 u8 sample = engine_event_sample(event);
667                 struct intel_engine_cs *engine;
668
669                 engine = intel_engine_lookup_user(i915,
670                                                   engine_event_class(event),
671                                                   engine_event_instance(event));
672
673                 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
674                              I915_ENGINE_SAMPLE_COUNT);
675                 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
676                              I915_ENGINE_SAMPLE_COUNT);
677                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
678                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
679                 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
680
681                 engine->pmu.enable |= BIT(sample);
682                 engine->pmu.enable_count[sample]++;
683         }
684
685         spin_unlock_irqrestore(&pmu->lock, flags);
686
687         /*
688          * Store the current counter value so we can report the correct delta
689          * for all listeners. Even when the event was already enabled and has
690          * an existing non-zero value.
691          */
692         local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
693
694         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
695 }
696
697 static void i915_pmu_disable(struct perf_event *event)
698 {
699         struct drm_i915_private *i915 =
700                 container_of(event->pmu, typeof(*i915), pmu.base);
701         unsigned int bit = event_enabled_bit(event);
702         struct i915_pmu *pmu = &i915->pmu;
703         unsigned long flags;
704
705         spin_lock_irqsave(&pmu->lock, flags);
706
707         if (is_engine_event(event)) {
708                 u8 sample = engine_event_sample(event);
709                 struct intel_engine_cs *engine;
710
711                 engine = intel_engine_lookup_user(i915,
712                                                   engine_event_class(event),
713                                                   engine_event_instance(event));
714
715                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
716                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
717                 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
718
719                 /*
720                  * Decrement the reference count and clear the enabled
721                  * bitmask when the last listener on an event goes away.
722                  */
723                 if (--engine->pmu.enable_count[sample] == 0)
724                         engine->pmu.enable &= ~BIT(sample);
725         }
726
727         GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
728         GEM_BUG_ON(pmu->enable_count[bit] == 0);
729         /*
730          * Decrement the reference count and clear the enabled
731          * bitmask when the last listener on an event goes away.
732          */
733         if (--pmu->enable_count[bit] == 0) {
734                 pmu->enable &= ~BIT_ULL(bit);
735                 pmu->timer_enabled &= pmu_needs_timer(pmu, true);
736         }
737
738         spin_unlock_irqrestore(&pmu->lock, flags);
739 }
740
741 static void i915_pmu_event_start(struct perf_event *event, int flags)
742 {
743         struct drm_i915_private *i915 =
744                 container_of(event->pmu, typeof(*i915), pmu.base);
745         struct i915_pmu *pmu = &i915->pmu;
746
747         if (pmu->closed)
748                 return;
749
750         i915_pmu_enable(event);
751         event->hw.state = 0;
752 }
753
754 static void i915_pmu_event_stop(struct perf_event *event, int flags)
755 {
756         if (flags & PERF_EF_UPDATE)
757                 i915_pmu_event_read(event);
758         i915_pmu_disable(event);
759         event->hw.state = PERF_HES_STOPPED;
760 }
761
762 static int i915_pmu_event_add(struct perf_event *event, int flags)
763 {
764         struct drm_i915_private *i915 =
765                 container_of(event->pmu, typeof(*i915), pmu.base);
766         struct i915_pmu *pmu = &i915->pmu;
767
768         if (pmu->closed)
769                 return -ENODEV;
770
771         if (flags & PERF_EF_START)
772                 i915_pmu_event_start(event, flags);
773
774         return 0;
775 }
776
777 static void i915_pmu_event_del(struct perf_event *event, int flags)
778 {
779         i915_pmu_event_stop(event, PERF_EF_UPDATE);
780 }
781
782 static int i915_pmu_event_event_idx(struct perf_event *event)
783 {
784         return 0;
785 }
786
787 struct i915_str_attribute {
788         struct device_attribute attr;
789         const char *str;
790 };
791
792 static ssize_t i915_pmu_format_show(struct device *dev,
793                                     struct device_attribute *attr, char *buf)
794 {
795         struct i915_str_attribute *eattr;
796
797         eattr = container_of(attr, struct i915_str_attribute, attr);
798         return sprintf(buf, "%s\n", eattr->str);
799 }
800
801 #define I915_PMU_FORMAT_ATTR(_name, _config) \
802         (&((struct i915_str_attribute[]) { \
803                 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
804                   .str = _config, } \
805         })[0].attr.attr)
806
807 static struct attribute *i915_pmu_format_attrs[] = {
808         I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
809         NULL,
810 };
811
812 static const struct attribute_group i915_pmu_format_attr_group = {
813         .name = "format",
814         .attrs = i915_pmu_format_attrs,
815 };
816
817 struct i915_ext_attribute {
818         struct device_attribute attr;
819         unsigned long val;
820 };
821
822 static ssize_t i915_pmu_event_show(struct device *dev,
823                                    struct device_attribute *attr, char *buf)
824 {
825         struct i915_ext_attribute *eattr;
826
827         eattr = container_of(attr, struct i915_ext_attribute, attr);
828         return sprintf(buf, "config=0x%lx\n", eattr->val);
829 }
830
831 static ssize_t
832 i915_pmu_get_attr_cpumask(struct device *dev,
833                           struct device_attribute *attr,
834                           char *buf)
835 {
836         return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
837 }
838
839 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
840
841 static struct attribute *i915_cpumask_attrs[] = {
842         &dev_attr_cpumask.attr,
843         NULL,
844 };
845
846 static const struct attribute_group i915_pmu_cpumask_attr_group = {
847         .attrs = i915_cpumask_attrs,
848 };
849
850 #define __event(__config, __name, __unit) \
851 { \
852         .config = (__config), \
853         .name = (__name), \
854         .unit = (__unit), \
855 }
856
857 #define __engine_event(__sample, __name) \
858 { \
859         .sample = (__sample), \
860         .name = (__name), \
861 }
862
863 static struct i915_ext_attribute *
864 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
865 {
866         sysfs_attr_init(&attr->attr.attr);
867         attr->attr.attr.name = name;
868         attr->attr.attr.mode = 0444;
869         attr->attr.show = i915_pmu_event_show;
870         attr->val = config;
871
872         return ++attr;
873 }
874
875 static struct perf_pmu_events_attr *
876 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
877              const char *str)
878 {
879         sysfs_attr_init(&attr->attr.attr);
880         attr->attr.attr.name = name;
881         attr->attr.attr.mode = 0444;
882         attr->attr.show = perf_event_sysfs_show;
883         attr->event_str = str;
884
885         return ++attr;
886 }
887
888 static struct attribute **
889 create_event_attributes(struct i915_pmu *pmu)
890 {
891         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
892         static const struct {
893                 u64 config;
894                 const char *name;
895                 const char *unit;
896         } events[] = {
897                 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
898                 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
899                 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
900                 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
901         };
902         static const struct {
903                 enum drm_i915_pmu_engine_sample sample;
904                 char *name;
905         } engine_events[] = {
906                 __engine_event(I915_SAMPLE_BUSY, "busy"),
907                 __engine_event(I915_SAMPLE_SEMA, "sema"),
908                 __engine_event(I915_SAMPLE_WAIT, "wait"),
909         };
910         unsigned int count = 0;
911         struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
912         struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
913         struct attribute **attr = NULL, **attr_iter;
914         struct intel_engine_cs *engine;
915         unsigned int i;
916
917         /* Count how many counters we will be exposing. */
918         for (i = 0; i < ARRAY_SIZE(events); i++) {
919                 if (!config_status(i915, events[i].config))
920                         count++;
921         }
922
923         for_each_uabi_engine(engine, i915) {
924                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
925                         if (!engine_event_status(engine,
926                                                  engine_events[i].sample))
927                                 count++;
928                 }
929         }
930
931         /* Allocate attribute objects and table. */
932         i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
933         if (!i915_attr)
934                 goto err_alloc;
935
936         pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
937         if (!pmu_attr)
938                 goto err_alloc;
939
940         /* Max one pointer of each attribute type plus a termination entry. */
941         attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
942         if (!attr)
943                 goto err_alloc;
944
945         i915_iter = i915_attr;
946         pmu_iter = pmu_attr;
947         attr_iter = attr;
948
949         /* Initialize supported non-engine counters. */
950         for (i = 0; i < ARRAY_SIZE(events); i++) {
951                 char *str;
952
953                 if (config_status(i915, events[i].config))
954                         continue;
955
956                 str = kstrdup(events[i].name, GFP_KERNEL);
957                 if (!str)
958                         goto err;
959
960                 *attr_iter++ = &i915_iter->attr.attr;
961                 i915_iter = add_i915_attr(i915_iter, str, events[i].config);
962
963                 if (events[i].unit) {
964                         str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
965                         if (!str)
966                                 goto err;
967
968                         *attr_iter++ = &pmu_iter->attr.attr;
969                         pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
970                 }
971         }
972
973         /* Initialize supported engine counters. */
974         for_each_uabi_engine(engine, i915) {
975                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
976                         char *str;
977
978                         if (engine_event_status(engine,
979                                                 engine_events[i].sample))
980                                 continue;
981
982                         str = kasprintf(GFP_KERNEL, "%s-%s",
983                                         engine->name, engine_events[i].name);
984                         if (!str)
985                                 goto err;
986
987                         *attr_iter++ = &i915_iter->attr.attr;
988                         i915_iter =
989                                 add_i915_attr(i915_iter, str,
990                                               __I915_PMU_ENGINE(engine->uabi_class,
991                                                                 engine->uabi_instance,
992                                                                 engine_events[i].sample));
993
994                         str = kasprintf(GFP_KERNEL, "%s-%s.unit",
995                                         engine->name, engine_events[i].name);
996                         if (!str)
997                                 goto err;
998
999                         *attr_iter++ = &pmu_iter->attr.attr;
1000                         pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
1001                 }
1002         }
1003
1004         pmu->i915_attr = i915_attr;
1005         pmu->pmu_attr = pmu_attr;
1006
1007         return attr;
1008
1009 err:;
1010         for (attr_iter = attr; *attr_iter; attr_iter++)
1011                 kfree((*attr_iter)->name);
1012
1013 err_alloc:
1014         kfree(attr);
1015         kfree(i915_attr);
1016         kfree(pmu_attr);
1017
1018         return NULL;
1019 }
1020
1021 static void free_event_attributes(struct i915_pmu *pmu)
1022 {
1023         struct attribute **attr_iter = pmu->events_attr_group.attrs;
1024
1025         for (; *attr_iter; attr_iter++)
1026                 kfree((*attr_iter)->name);
1027
1028         kfree(pmu->events_attr_group.attrs);
1029         kfree(pmu->i915_attr);
1030         kfree(pmu->pmu_attr);
1031
1032         pmu->events_attr_group.attrs = NULL;
1033         pmu->i915_attr = NULL;
1034         pmu->pmu_attr = NULL;
1035 }
1036
1037 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
1038 {
1039         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1040
1041         GEM_BUG_ON(!pmu->base.event_init);
1042
1043         /* Select the first online CPU as a designated reader. */
1044         if (!cpumask_weight(&i915_pmu_cpumask))
1045                 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
1046
1047         return 0;
1048 }
1049
1050 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
1051 {
1052         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1053         unsigned int target = i915_pmu_target_cpu;
1054
1055         GEM_BUG_ON(!pmu->base.event_init);
1056
1057         /*
1058          * Unregistering an instance generates a CPU offline event which we must
1059          * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
1060          */
1061         if (pmu->closed)
1062                 return 0;
1063
1064         if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
1065                 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
1066
1067                 /* Migrate events if there is a valid target */
1068                 if (target < nr_cpu_ids) {
1069                         cpumask_set_cpu(target, &i915_pmu_cpumask);
1070                         i915_pmu_target_cpu = target;
1071                 }
1072         }
1073
1074         if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
1075                 perf_pmu_migrate_context(&pmu->base, cpu, target);
1076                 pmu->cpuhp.cpu = target;
1077         }
1078
1079         return 0;
1080 }
1081
1082 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1083
1084 void i915_pmu_init(void)
1085 {
1086         int ret;
1087
1088         ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1089                                       "perf/x86/intel/i915:online",
1090                                       i915_pmu_cpu_online,
1091                                       i915_pmu_cpu_offline);
1092         if (ret < 0)
1093                 pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
1094                           ret);
1095         else
1096                 cpuhp_slot = ret;
1097 }
1098
1099 void i915_pmu_exit(void)
1100 {
1101         if (cpuhp_slot != CPUHP_INVALID)
1102                 cpuhp_remove_multi_state(cpuhp_slot);
1103 }
1104
1105 static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
1106 {
1107         if (cpuhp_slot == CPUHP_INVALID)
1108                 return -EINVAL;
1109
1110         return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
1111 }
1112
1113 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
1114 {
1115         cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
1116 }
1117
1118 static bool is_igp(struct drm_i915_private *i915)
1119 {
1120         struct pci_dev *pdev = i915->drm.pdev;
1121
1122         /* IGP is 0000:00:02.0 */
1123         return pci_domain_nr(pdev->bus) == 0 &&
1124                pdev->bus->number == 0 &&
1125                PCI_SLOT(pdev->devfn) == 2 &&
1126                PCI_FUNC(pdev->devfn) == 0;
1127 }
1128
1129 void i915_pmu_register(struct drm_i915_private *i915)
1130 {
1131         struct i915_pmu *pmu = &i915->pmu;
1132         const struct attribute_group *attr_groups[] = {
1133                 &i915_pmu_format_attr_group,
1134                 &pmu->events_attr_group,
1135                 &i915_pmu_cpumask_attr_group,
1136                 NULL
1137         };
1138
1139         int ret = -ENOMEM;
1140
1141         if (INTEL_GEN(i915) <= 2) {
1142                 drm_info(&i915->drm, "PMU not supported for this GPU.");
1143                 return;
1144         }
1145
1146         spin_lock_init(&pmu->lock);
1147         hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1148         pmu->timer.function = i915_sample;
1149         pmu->cpuhp.cpu = -1;
1150
1151         if (!is_igp(i915)) {
1152                 pmu->name = kasprintf(GFP_KERNEL,
1153                                       "i915_%s",
1154                                       dev_name(i915->drm.dev));
1155                 if (pmu->name) {
1156                         /* tools/perf reserves colons as special. */
1157                         strreplace((char *)pmu->name, ':', '_');
1158                 }
1159         } else {
1160                 pmu->name = "i915";
1161         }
1162         if (!pmu->name)
1163                 goto err;
1164
1165         pmu->events_attr_group.name = "events";
1166         pmu->events_attr_group.attrs = create_event_attributes(pmu);
1167         if (!pmu->events_attr_group.attrs)
1168                 goto err_name;
1169
1170         pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
1171                                         GFP_KERNEL);
1172         if (!pmu->base.attr_groups)
1173                 goto err_attr;
1174
1175         pmu->base.module        = THIS_MODULE;
1176         pmu->base.task_ctx_nr   = perf_invalid_context;
1177         pmu->base.event_init    = i915_pmu_event_init;
1178         pmu->base.add           = i915_pmu_event_add;
1179         pmu->base.del           = i915_pmu_event_del;
1180         pmu->base.start         = i915_pmu_event_start;
1181         pmu->base.stop          = i915_pmu_event_stop;
1182         pmu->base.read          = i915_pmu_event_read;
1183         pmu->base.event_idx     = i915_pmu_event_event_idx;
1184
1185         ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1186         if (ret)
1187                 goto err_groups;
1188
1189         ret = i915_pmu_register_cpuhp_state(pmu);
1190         if (ret)
1191                 goto err_unreg;
1192
1193         return;
1194
1195 err_unreg:
1196         perf_pmu_unregister(&pmu->base);
1197 err_groups:
1198         kfree(pmu->base.attr_groups);
1199 err_attr:
1200         pmu->base.event_init = NULL;
1201         free_event_attributes(pmu);
1202 err_name:
1203         if (!is_igp(i915))
1204                 kfree(pmu->name);
1205 err:
1206         drm_notice(&i915->drm, "Failed to register PMU!\n");
1207 }
1208
1209 void i915_pmu_unregister(struct drm_i915_private *i915)
1210 {
1211         struct i915_pmu *pmu = &i915->pmu;
1212
1213         if (!pmu->base.event_init)
1214                 return;
1215
1216         /*
1217          * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
1218          * ensures all currently executing ones will have exited before we
1219          * proceed with unregistration.
1220          */
1221         pmu->closed = true;
1222         synchronize_rcu();
1223
1224         hrtimer_cancel(&pmu->timer);
1225
1226         i915_pmu_unregister_cpuhp_state(pmu);
1227
1228         perf_pmu_unregister(&pmu->base);
1229         pmu->base.event_init = NULL;
1230         kfree(pmu->base.attr_groups);
1231         if (!is_igp(i915))
1232                 kfree(pmu->name);
1233         free_event_attributes(pmu);
1234 }