docs: Fix empty parallelism argument
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / i915_pmu.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6
7 #include <linux/irq.h>
8 #include <linux/pm_runtime.h>
9
10 #include "gt/intel_engine.h"
11 #include "gt/intel_engine_pm.h"
12 #include "gt/intel_engine_user.h"
13 #include "gt/intel_gt_pm.h"
14 #include "gt/intel_rc6.h"
15 #include "gt/intel_rps.h"
16
17 #include "i915_drv.h"
18 #include "i915_pmu.h"
19 #include "intel_pm.h"
20
21 /* Frequency for the sampling timer for events which need it. */
22 #define FREQUENCY 200
23 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
24
25 #define ENGINE_SAMPLE_MASK \
26         (BIT(I915_SAMPLE_BUSY) | \
27          BIT(I915_SAMPLE_WAIT) | \
28          BIT(I915_SAMPLE_SEMA))
29
30 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
31
32 static cpumask_t i915_pmu_cpumask;
33
34 static u8 engine_config_sample(u64 config)
35 {
36         return config & I915_PMU_SAMPLE_MASK;
37 }
38
39 static u8 engine_event_sample(struct perf_event *event)
40 {
41         return engine_config_sample(event->attr.config);
42 }
43
44 static u8 engine_event_class(struct perf_event *event)
45 {
46         return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
47 }
48
49 static u8 engine_event_instance(struct perf_event *event)
50 {
51         return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
52 }
53
54 static bool is_engine_config(u64 config)
55 {
56         return config < __I915_PMU_OTHER(0);
57 }
58
59 static unsigned int config_enabled_bit(u64 config)
60 {
61         if (is_engine_config(config))
62                 return engine_config_sample(config);
63         else
64                 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
65 }
66
67 static u64 config_enabled_mask(u64 config)
68 {
69         return BIT_ULL(config_enabled_bit(config));
70 }
71
72 static bool is_engine_event(struct perf_event *event)
73 {
74         return is_engine_config(event->attr.config);
75 }
76
77 static unsigned int event_enabled_bit(struct perf_event *event)
78 {
79         return config_enabled_bit(event->attr.config);
80 }
81
82 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
83 {
84         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
85         u64 enable;
86
87         /*
88          * Only some counters need the sampling timer.
89          *
90          * We start with a bitmask of all currently enabled events.
91          */
92         enable = pmu->enable;
93
94         /*
95          * Mask out all the ones which do not need the timer, or in
96          * other words keep all the ones that could need the timer.
97          */
98         enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
99                   config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
100                   ENGINE_SAMPLE_MASK;
101
102         /*
103          * When the GPU is idle per-engine counters do not need to be
104          * running so clear those bits out.
105          */
106         if (!gpu_active)
107                 enable &= ~ENGINE_SAMPLE_MASK;
108         /*
109          * Also there is software busyness tracking available we do not
110          * need the timer for I915_SAMPLE_BUSY counter.
111          */
112         else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
113                 enable &= ~BIT(I915_SAMPLE_BUSY);
114
115         /*
116          * If some bits remain it means we need the sampling timer running.
117          */
118         return enable;
119 }
120
121 static u64 __get_rc6(struct intel_gt *gt)
122 {
123         struct drm_i915_private *i915 = gt->i915;
124         u64 val;
125
126         val = intel_rc6_residency_ns(&gt->rc6,
127                                      IS_VALLEYVIEW(i915) ?
128                                      VLV_GT_RENDER_RC6 :
129                                      GEN6_GT_GFX_RC6);
130
131         if (HAS_RC6p(i915))
132                 val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6p);
133
134         if (HAS_RC6pp(i915))
135                 val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6pp);
136
137         return val;
138 }
139
140 #if IS_ENABLED(CONFIG_PM)
141
142 static inline s64 ktime_since(const ktime_t kt)
143 {
144         return ktime_to_ns(ktime_sub(ktime_get(), kt));
145 }
146
147 static u64 get_rc6(struct intel_gt *gt)
148 {
149         struct drm_i915_private *i915 = gt->i915;
150         struct i915_pmu *pmu = &i915->pmu;
151         unsigned long flags;
152         bool awake = false;
153         u64 val;
154
155         if (intel_gt_pm_get_if_awake(gt)) {
156                 val = __get_rc6(gt);
157                 intel_gt_pm_put_async(gt);
158                 awake = true;
159         }
160
161         spin_lock_irqsave(&pmu->lock, flags);
162
163         if (awake) {
164                 pmu->sample[__I915_SAMPLE_RC6].cur = val;
165         } else {
166                 /*
167                  * We think we are runtime suspended.
168                  *
169                  * Report the delta from when the device was suspended to now,
170                  * on top of the last known real value, as the approximated RC6
171                  * counter value.
172                  */
173                 val = ktime_since(pmu->sleep_last);
174                 val += pmu->sample[__I915_SAMPLE_RC6].cur;
175         }
176
177         if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
178                 val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
179         else
180                 pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
181
182         spin_unlock_irqrestore(&pmu->lock, flags);
183
184         return val;
185 }
186
187 static void park_rc6(struct drm_i915_private *i915)
188 {
189         struct i915_pmu *pmu = &i915->pmu;
190
191         if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
192                 pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
193
194         pmu->sleep_last = ktime_get();
195 }
196
197 #else
198
199 static u64 get_rc6(struct intel_gt *gt)
200 {
201         return __get_rc6(gt);
202 }
203
204 static void park_rc6(struct drm_i915_private *i915) {}
205
206 #endif
207
208 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
209 {
210         if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
211                 pmu->timer_enabled = true;
212                 pmu->timer_last = ktime_get();
213                 hrtimer_start_range_ns(&pmu->timer,
214                                        ns_to_ktime(PERIOD), 0,
215                                        HRTIMER_MODE_REL_PINNED);
216         }
217 }
218
219 void i915_pmu_gt_parked(struct drm_i915_private *i915)
220 {
221         struct i915_pmu *pmu = &i915->pmu;
222
223         if (!pmu->base.event_init)
224                 return;
225
226         spin_lock_irq(&pmu->lock);
227
228         park_rc6(i915);
229
230         /*
231          * Signal sampling timer to stop if only engine events are enabled and
232          * GPU went idle.
233          */
234         pmu->timer_enabled = pmu_needs_timer(pmu, false);
235
236         spin_unlock_irq(&pmu->lock);
237 }
238
239 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
240 {
241         struct i915_pmu *pmu = &i915->pmu;
242
243         if (!pmu->base.event_init)
244                 return;
245
246         spin_lock_irq(&pmu->lock);
247
248         /*
249          * Re-enable sampling timer when GPU goes active.
250          */
251         __i915_pmu_maybe_start_timer(pmu);
252
253         spin_unlock_irq(&pmu->lock);
254 }
255
256 static void
257 add_sample(struct i915_pmu_sample *sample, u32 val)
258 {
259         sample->cur += val;
260 }
261
262 static bool exclusive_mmio_access(const struct drm_i915_private *i915)
263 {
264         /*
265          * We have to avoid concurrent mmio cache line access on gen7 or
266          * risk a machine hang. For a fun history lesson dig out the old
267          * userspace intel_gpu_top and run it on Ivybridge or Haswell!
268          */
269         return IS_GEN(i915, 7);
270 }
271
272 static void
273 engines_sample(struct intel_gt *gt, unsigned int period_ns)
274 {
275         struct drm_i915_private *i915 = gt->i915;
276         struct intel_engine_cs *engine;
277         enum intel_engine_id id;
278
279         if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
280                 return;
281
282         if (!intel_gt_pm_is_awake(gt))
283                 return;
284
285         for_each_engine(engine, gt, id) {
286                 struct intel_engine_pmu *pmu = &engine->pmu;
287                 spinlock_t *mmio_lock;
288                 unsigned long flags;
289                 bool busy;
290                 u32 val;
291
292                 if (!intel_engine_pm_get_if_awake(engine))
293                         continue;
294
295                 mmio_lock = NULL;
296                 if (exclusive_mmio_access(i915))
297                         mmio_lock = &engine->uncore->lock;
298
299                 if (unlikely(mmio_lock))
300                         spin_lock_irqsave(mmio_lock, flags);
301
302                 val = ENGINE_READ_FW(engine, RING_CTL);
303                 if (val == 0) /* powerwell off => engine idle */
304                         goto skip;
305
306                 if (val & RING_WAIT)
307                         add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
308                 if (val & RING_WAIT_SEMAPHORE)
309                         add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
310
311                 /* No need to sample when busy stats are supported. */
312                 if (intel_engine_supports_stats(engine))
313                         goto skip;
314
315                 /*
316                  * While waiting on a semaphore or event, MI_MODE reports the
317                  * ring as idle. However, previously using the seqno, and with
318                  * execlists sampling, we account for the ring waiting as the
319                  * engine being busy. Therefore, we record the sample as being
320                  * busy if either waiting or !idle.
321                  */
322                 busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
323                 if (!busy) {
324                         val = ENGINE_READ_FW(engine, RING_MI_MODE);
325                         busy = !(val & MODE_IDLE);
326                 }
327                 if (busy)
328                         add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
329
330 skip:
331                 if (unlikely(mmio_lock))
332                         spin_unlock_irqrestore(mmio_lock, flags);
333                 intel_engine_pm_put_async(engine);
334         }
335 }
336
337 static void
338 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
339 {
340         sample->cur += mul_u32_u32(val, mul);
341 }
342
343 static bool frequency_sampling_enabled(struct i915_pmu *pmu)
344 {
345         return pmu->enable &
346                (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
347                 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY));
348 }
349
350 static void
351 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
352 {
353         struct drm_i915_private *i915 = gt->i915;
354         struct intel_uncore *uncore = gt->uncore;
355         struct i915_pmu *pmu = &i915->pmu;
356         struct intel_rps *rps = &gt->rps;
357
358         if (!frequency_sampling_enabled(pmu))
359                 return;
360
361         /* Report 0/0 (actual/requested) frequency while parked. */
362         if (!intel_gt_pm_get_if_awake(gt))
363                 return;
364
365         if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
366                 u32 val;
367
368                 /*
369                  * We take a quick peek here without using forcewake
370                  * so that we don't perturb the system under observation
371                  * (forcewake => !rc6 => increased power use). We expect
372                  * that if the read fails because it is outside of the
373                  * mmio power well, then it will return 0 -- in which
374                  * case we assume the system is running at the intended
375                  * frequency. Fortunately, the read should rarely fail!
376                  */
377                 val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
378                 if (val)
379                         val = intel_rps_get_cagf(rps, val);
380                 else
381                         val = rps->cur_freq;
382
383                 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
384                                 intel_gpu_freq(rps, val), period_ns / 1000);
385         }
386
387         if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
388                 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
389                                 intel_gpu_freq(rps, rps->cur_freq),
390                                 period_ns / 1000);
391         }
392
393         intel_gt_pm_put_async(gt);
394 }
395
396 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
397 {
398         struct drm_i915_private *i915 =
399                 container_of(hrtimer, struct drm_i915_private, pmu.timer);
400         struct i915_pmu *pmu = &i915->pmu;
401         struct intel_gt *gt = &i915->gt;
402         unsigned int period_ns;
403         ktime_t now;
404
405         if (!READ_ONCE(pmu->timer_enabled))
406                 return HRTIMER_NORESTART;
407
408         now = ktime_get();
409         period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
410         pmu->timer_last = now;
411
412         /*
413          * Strictly speaking the passed in period may not be 100% accurate for
414          * all internal calculation, since some amount of time can be spent on
415          * grabbing the forcewake. However the potential error from timer call-
416          * back delay greatly dominates this so we keep it simple.
417          */
418         engines_sample(gt, period_ns);
419         frequency_sample(gt, period_ns);
420
421         hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
422
423         return HRTIMER_RESTART;
424 }
425
426 static u64 count_interrupts(struct drm_i915_private *i915)
427 {
428         /* open-coded kstat_irqs() */
429         struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
430         u64 sum = 0;
431         int cpu;
432
433         if (!desc || !desc->kstat_irqs)
434                 return 0;
435
436         for_each_possible_cpu(cpu)
437                 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
438
439         return sum;
440 }
441
442 static void engine_event_destroy(struct perf_event *event)
443 {
444         struct drm_i915_private *i915 =
445                 container_of(event->pmu, typeof(*i915), pmu.base);
446         struct intel_engine_cs *engine;
447
448         engine = intel_engine_lookup_user(i915,
449                                           engine_event_class(event),
450                                           engine_event_instance(event));
451         if (WARN_ON_ONCE(!engine))
452                 return;
453
454         if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
455             intel_engine_supports_stats(engine))
456                 intel_disable_engine_stats(engine);
457 }
458
459 static void i915_pmu_event_destroy(struct perf_event *event)
460 {
461         WARN_ON(event->parent);
462
463         if (is_engine_event(event))
464                 engine_event_destroy(event);
465 }
466
467 static int
468 engine_event_status(struct intel_engine_cs *engine,
469                     enum drm_i915_pmu_engine_sample sample)
470 {
471         switch (sample) {
472         case I915_SAMPLE_BUSY:
473         case I915_SAMPLE_WAIT:
474                 break;
475         case I915_SAMPLE_SEMA:
476                 if (INTEL_GEN(engine->i915) < 6)
477                         return -ENODEV;
478                 break;
479         default:
480                 return -ENOENT;
481         }
482
483         return 0;
484 }
485
486 static int
487 config_status(struct drm_i915_private *i915, u64 config)
488 {
489         switch (config) {
490         case I915_PMU_ACTUAL_FREQUENCY:
491                 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
492                         /* Requires a mutex for sampling! */
493                         return -ENODEV;
494                 /* Fall-through. */
495         case I915_PMU_REQUESTED_FREQUENCY:
496                 if (INTEL_GEN(i915) < 6)
497                         return -ENODEV;
498                 break;
499         case I915_PMU_INTERRUPTS:
500                 break;
501         case I915_PMU_RC6_RESIDENCY:
502                 if (!HAS_RC6(i915))
503                         return -ENODEV;
504                 break;
505         default:
506                 return -ENOENT;
507         }
508
509         return 0;
510 }
511
512 static int engine_event_init(struct perf_event *event)
513 {
514         struct drm_i915_private *i915 =
515                 container_of(event->pmu, typeof(*i915), pmu.base);
516         struct intel_engine_cs *engine;
517         u8 sample;
518         int ret;
519
520         engine = intel_engine_lookup_user(i915, engine_event_class(event),
521                                           engine_event_instance(event));
522         if (!engine)
523                 return -ENODEV;
524
525         sample = engine_event_sample(event);
526         ret = engine_event_status(engine, sample);
527         if (ret)
528                 return ret;
529
530         if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
531                 ret = intel_enable_engine_stats(engine);
532
533         return ret;
534 }
535
536 static int i915_pmu_event_init(struct perf_event *event)
537 {
538         struct drm_i915_private *i915 =
539                 container_of(event->pmu, typeof(*i915), pmu.base);
540         int ret;
541
542         if (event->attr.type != event->pmu->type)
543                 return -ENOENT;
544
545         /* unsupported modes and filters */
546         if (event->attr.sample_period) /* no sampling */
547                 return -EINVAL;
548
549         if (has_branch_stack(event))
550                 return -EOPNOTSUPP;
551
552         if (event->cpu < 0)
553                 return -EINVAL;
554
555         /* only allow running on one cpu at a time */
556         if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
557                 return -EINVAL;
558
559         if (is_engine_event(event))
560                 ret = engine_event_init(event);
561         else
562                 ret = config_status(i915, event->attr.config);
563         if (ret)
564                 return ret;
565
566         if (!event->parent)
567                 event->destroy = i915_pmu_event_destroy;
568
569         return 0;
570 }
571
572 static u64 __i915_pmu_event_read(struct perf_event *event)
573 {
574         struct drm_i915_private *i915 =
575                 container_of(event->pmu, typeof(*i915), pmu.base);
576         struct i915_pmu *pmu = &i915->pmu;
577         u64 val = 0;
578
579         if (is_engine_event(event)) {
580                 u8 sample = engine_event_sample(event);
581                 struct intel_engine_cs *engine;
582
583                 engine = intel_engine_lookup_user(i915,
584                                                   engine_event_class(event),
585                                                   engine_event_instance(event));
586
587                 if (WARN_ON_ONCE(!engine)) {
588                         /* Do nothing */
589                 } else if (sample == I915_SAMPLE_BUSY &&
590                            intel_engine_supports_stats(engine)) {
591                         val = ktime_to_ns(intel_engine_get_busy_time(engine));
592                 } else {
593                         val = engine->pmu.sample[sample].cur;
594                 }
595         } else {
596                 switch (event->attr.config) {
597                 case I915_PMU_ACTUAL_FREQUENCY:
598                         val =
599                            div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
600                                    USEC_PER_SEC /* to MHz */);
601                         break;
602                 case I915_PMU_REQUESTED_FREQUENCY:
603                         val =
604                            div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
605                                    USEC_PER_SEC /* to MHz */);
606                         break;
607                 case I915_PMU_INTERRUPTS:
608                         val = count_interrupts(i915);
609                         break;
610                 case I915_PMU_RC6_RESIDENCY:
611                         val = get_rc6(&i915->gt);
612                         break;
613                 }
614         }
615
616         return val;
617 }
618
619 static void i915_pmu_event_read(struct perf_event *event)
620 {
621         struct hw_perf_event *hwc = &event->hw;
622         u64 prev, new;
623
624 again:
625         prev = local64_read(&hwc->prev_count);
626         new = __i915_pmu_event_read(event);
627
628         if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
629                 goto again;
630
631         local64_add(new - prev, &event->count);
632 }
633
634 static void i915_pmu_enable(struct perf_event *event)
635 {
636         struct drm_i915_private *i915 =
637                 container_of(event->pmu, typeof(*i915), pmu.base);
638         unsigned int bit = event_enabled_bit(event);
639         struct i915_pmu *pmu = &i915->pmu;
640         unsigned long flags;
641
642         spin_lock_irqsave(&pmu->lock, flags);
643
644         /*
645          * Update the bitmask of enabled events and increment
646          * the event reference counter.
647          */
648         BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
649         GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
650         GEM_BUG_ON(pmu->enable_count[bit] == ~0);
651         pmu->enable |= BIT_ULL(bit);
652         pmu->enable_count[bit]++;
653
654         /*
655          * Start the sampling timer if needed and not already enabled.
656          */
657         __i915_pmu_maybe_start_timer(pmu);
658
659         /*
660          * For per-engine events the bitmask and reference counting
661          * is stored per engine.
662          */
663         if (is_engine_event(event)) {
664                 u8 sample = engine_event_sample(event);
665                 struct intel_engine_cs *engine;
666
667                 engine = intel_engine_lookup_user(i915,
668                                                   engine_event_class(event),
669                                                   engine_event_instance(event));
670
671                 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
672                              I915_ENGINE_SAMPLE_COUNT);
673                 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
674                              I915_ENGINE_SAMPLE_COUNT);
675                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
676                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
677                 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
678
679                 engine->pmu.enable |= BIT(sample);
680                 engine->pmu.enable_count[sample]++;
681         }
682
683         spin_unlock_irqrestore(&pmu->lock, flags);
684
685         /*
686          * Store the current counter value so we can report the correct delta
687          * for all listeners. Even when the event was already enabled and has
688          * an existing non-zero value.
689          */
690         local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
691 }
692
693 static void i915_pmu_disable(struct perf_event *event)
694 {
695         struct drm_i915_private *i915 =
696                 container_of(event->pmu, typeof(*i915), pmu.base);
697         unsigned int bit = event_enabled_bit(event);
698         struct i915_pmu *pmu = &i915->pmu;
699         unsigned long flags;
700
701         spin_lock_irqsave(&pmu->lock, flags);
702
703         if (is_engine_event(event)) {
704                 u8 sample = engine_event_sample(event);
705                 struct intel_engine_cs *engine;
706
707                 engine = intel_engine_lookup_user(i915,
708                                                   engine_event_class(event),
709                                                   engine_event_instance(event));
710
711                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
712                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
713                 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
714
715                 /*
716                  * Decrement the reference count and clear the enabled
717                  * bitmask when the last listener on an event goes away.
718                  */
719                 if (--engine->pmu.enable_count[sample] == 0)
720                         engine->pmu.enable &= ~BIT(sample);
721         }
722
723         GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
724         GEM_BUG_ON(pmu->enable_count[bit] == 0);
725         /*
726          * Decrement the reference count and clear the enabled
727          * bitmask when the last listener on an event goes away.
728          */
729         if (--pmu->enable_count[bit] == 0) {
730                 pmu->enable &= ~BIT_ULL(bit);
731                 pmu->timer_enabled &= pmu_needs_timer(pmu, true);
732         }
733
734         spin_unlock_irqrestore(&pmu->lock, flags);
735 }
736
737 static void i915_pmu_event_start(struct perf_event *event, int flags)
738 {
739         i915_pmu_enable(event);
740         event->hw.state = 0;
741 }
742
743 static void i915_pmu_event_stop(struct perf_event *event, int flags)
744 {
745         if (flags & PERF_EF_UPDATE)
746                 i915_pmu_event_read(event);
747         i915_pmu_disable(event);
748         event->hw.state = PERF_HES_STOPPED;
749 }
750
751 static int i915_pmu_event_add(struct perf_event *event, int flags)
752 {
753         if (flags & PERF_EF_START)
754                 i915_pmu_event_start(event, flags);
755
756         return 0;
757 }
758
759 static void i915_pmu_event_del(struct perf_event *event, int flags)
760 {
761         i915_pmu_event_stop(event, PERF_EF_UPDATE);
762 }
763
764 static int i915_pmu_event_event_idx(struct perf_event *event)
765 {
766         return 0;
767 }
768
769 struct i915_str_attribute {
770         struct device_attribute attr;
771         const char *str;
772 };
773
774 static ssize_t i915_pmu_format_show(struct device *dev,
775                                     struct device_attribute *attr, char *buf)
776 {
777         struct i915_str_attribute *eattr;
778
779         eattr = container_of(attr, struct i915_str_attribute, attr);
780         return sprintf(buf, "%s\n", eattr->str);
781 }
782
783 #define I915_PMU_FORMAT_ATTR(_name, _config) \
784         (&((struct i915_str_attribute[]) { \
785                 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
786                   .str = _config, } \
787         })[0].attr.attr)
788
789 static struct attribute *i915_pmu_format_attrs[] = {
790         I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
791         NULL,
792 };
793
794 static const struct attribute_group i915_pmu_format_attr_group = {
795         .name = "format",
796         .attrs = i915_pmu_format_attrs,
797 };
798
799 struct i915_ext_attribute {
800         struct device_attribute attr;
801         unsigned long val;
802 };
803
804 static ssize_t i915_pmu_event_show(struct device *dev,
805                                    struct device_attribute *attr, char *buf)
806 {
807         struct i915_ext_attribute *eattr;
808
809         eattr = container_of(attr, struct i915_ext_attribute, attr);
810         return sprintf(buf, "config=0x%lx\n", eattr->val);
811 }
812
813 static struct attribute_group i915_pmu_events_attr_group = {
814         .name = "events",
815         /* Patch in attrs at runtime. */
816 };
817
818 static ssize_t
819 i915_pmu_get_attr_cpumask(struct device *dev,
820                           struct device_attribute *attr,
821                           char *buf)
822 {
823         return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
824 }
825
826 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
827
828 static struct attribute *i915_cpumask_attrs[] = {
829         &dev_attr_cpumask.attr,
830         NULL,
831 };
832
833 static const struct attribute_group i915_pmu_cpumask_attr_group = {
834         .attrs = i915_cpumask_attrs,
835 };
836
837 static const struct attribute_group *i915_pmu_attr_groups[] = {
838         &i915_pmu_format_attr_group,
839         &i915_pmu_events_attr_group,
840         &i915_pmu_cpumask_attr_group,
841         NULL
842 };
843
844 #define __event(__config, __name, __unit) \
845 { \
846         .config = (__config), \
847         .name = (__name), \
848         .unit = (__unit), \
849 }
850
851 #define __engine_event(__sample, __name) \
852 { \
853         .sample = (__sample), \
854         .name = (__name), \
855 }
856
857 static struct i915_ext_attribute *
858 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
859 {
860         sysfs_attr_init(&attr->attr.attr);
861         attr->attr.attr.name = name;
862         attr->attr.attr.mode = 0444;
863         attr->attr.show = i915_pmu_event_show;
864         attr->val = config;
865
866         return ++attr;
867 }
868
869 static struct perf_pmu_events_attr *
870 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
871              const char *str)
872 {
873         sysfs_attr_init(&attr->attr.attr);
874         attr->attr.attr.name = name;
875         attr->attr.attr.mode = 0444;
876         attr->attr.show = perf_event_sysfs_show;
877         attr->event_str = str;
878
879         return ++attr;
880 }
881
882 static struct attribute **
883 create_event_attributes(struct i915_pmu *pmu)
884 {
885         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
886         static const struct {
887                 u64 config;
888                 const char *name;
889                 const char *unit;
890         } events[] = {
891                 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
892                 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
893                 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
894                 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
895         };
896         static const struct {
897                 enum drm_i915_pmu_engine_sample sample;
898                 char *name;
899         } engine_events[] = {
900                 __engine_event(I915_SAMPLE_BUSY, "busy"),
901                 __engine_event(I915_SAMPLE_SEMA, "sema"),
902                 __engine_event(I915_SAMPLE_WAIT, "wait"),
903         };
904         unsigned int count = 0;
905         struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
906         struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
907         struct attribute **attr = NULL, **attr_iter;
908         struct intel_engine_cs *engine;
909         unsigned int i;
910
911         /* Count how many counters we will be exposing. */
912         for (i = 0; i < ARRAY_SIZE(events); i++) {
913                 if (!config_status(i915, events[i].config))
914                         count++;
915         }
916
917         for_each_uabi_engine(engine, i915) {
918                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
919                         if (!engine_event_status(engine,
920                                                  engine_events[i].sample))
921                                 count++;
922                 }
923         }
924
925         /* Allocate attribute objects and table. */
926         i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
927         if (!i915_attr)
928                 goto err_alloc;
929
930         pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
931         if (!pmu_attr)
932                 goto err_alloc;
933
934         /* Max one pointer of each attribute type plus a termination entry. */
935         attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
936         if (!attr)
937                 goto err_alloc;
938
939         i915_iter = i915_attr;
940         pmu_iter = pmu_attr;
941         attr_iter = attr;
942
943         /* Initialize supported non-engine counters. */
944         for (i = 0; i < ARRAY_SIZE(events); i++) {
945                 char *str;
946
947                 if (config_status(i915, events[i].config))
948                         continue;
949
950                 str = kstrdup(events[i].name, GFP_KERNEL);
951                 if (!str)
952                         goto err;
953
954                 *attr_iter++ = &i915_iter->attr.attr;
955                 i915_iter = add_i915_attr(i915_iter, str, events[i].config);
956
957                 if (events[i].unit) {
958                         str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
959                         if (!str)
960                                 goto err;
961
962                         *attr_iter++ = &pmu_iter->attr.attr;
963                         pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
964                 }
965         }
966
967         /* Initialize supported engine counters. */
968         for_each_uabi_engine(engine, i915) {
969                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
970                         char *str;
971
972                         if (engine_event_status(engine,
973                                                 engine_events[i].sample))
974                                 continue;
975
976                         str = kasprintf(GFP_KERNEL, "%s-%s",
977                                         engine->name, engine_events[i].name);
978                         if (!str)
979                                 goto err;
980
981                         *attr_iter++ = &i915_iter->attr.attr;
982                         i915_iter =
983                                 add_i915_attr(i915_iter, str,
984                                               __I915_PMU_ENGINE(engine->uabi_class,
985                                                                 engine->uabi_instance,
986                                                                 engine_events[i].sample));
987
988                         str = kasprintf(GFP_KERNEL, "%s-%s.unit",
989                                         engine->name, engine_events[i].name);
990                         if (!str)
991                                 goto err;
992
993                         *attr_iter++ = &pmu_iter->attr.attr;
994                         pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
995                 }
996         }
997
998         pmu->i915_attr = i915_attr;
999         pmu->pmu_attr = pmu_attr;
1000
1001         return attr;
1002
1003 err:;
1004         for (attr_iter = attr; *attr_iter; attr_iter++)
1005                 kfree((*attr_iter)->name);
1006
1007 err_alloc:
1008         kfree(attr);
1009         kfree(i915_attr);
1010         kfree(pmu_attr);
1011
1012         return NULL;
1013 }
1014
1015 static void free_event_attributes(struct i915_pmu *pmu)
1016 {
1017         struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
1018
1019         for (; *attr_iter; attr_iter++)
1020                 kfree((*attr_iter)->name);
1021
1022         kfree(i915_pmu_events_attr_group.attrs);
1023         kfree(pmu->i915_attr);
1024         kfree(pmu->pmu_attr);
1025
1026         i915_pmu_events_attr_group.attrs = NULL;
1027         pmu->i915_attr = NULL;
1028         pmu->pmu_attr = NULL;
1029 }
1030
1031 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
1032 {
1033         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
1034
1035         GEM_BUG_ON(!pmu->base.event_init);
1036
1037         /* Select the first online CPU as a designated reader. */
1038         if (!cpumask_weight(&i915_pmu_cpumask))
1039                 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
1040
1041         return 0;
1042 }
1043
1044 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
1045 {
1046         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
1047         unsigned int target;
1048
1049         GEM_BUG_ON(!pmu->base.event_init);
1050
1051         if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
1052                 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
1053                 /* Migrate events if there is a valid target */
1054                 if (target < nr_cpu_ids) {
1055                         cpumask_set_cpu(target, &i915_pmu_cpumask);
1056                         perf_pmu_migrate_context(&pmu->base, cpu, target);
1057                 }
1058         }
1059
1060         return 0;
1061 }
1062
1063 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1064
1065 static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
1066 {
1067         enum cpuhp_state slot;
1068         int ret;
1069
1070         ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1071                                       "perf/x86/intel/i915:online",
1072                                       i915_pmu_cpu_online,
1073                                       i915_pmu_cpu_offline);
1074         if (ret < 0)
1075                 return ret;
1076
1077         slot = ret;
1078         ret = cpuhp_state_add_instance(slot, &pmu->node);
1079         if (ret) {
1080                 cpuhp_remove_multi_state(slot);
1081                 return ret;
1082         }
1083
1084         cpuhp_slot = slot;
1085         return 0;
1086 }
1087
1088 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
1089 {
1090         WARN_ON(cpuhp_slot == CPUHP_INVALID);
1091         WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &pmu->node));
1092         cpuhp_remove_multi_state(cpuhp_slot);
1093 }
1094
1095 static bool is_igp(struct drm_i915_private *i915)
1096 {
1097         struct pci_dev *pdev = i915->drm.pdev;
1098
1099         /* IGP is 0000:00:02.0 */
1100         return pci_domain_nr(pdev->bus) == 0 &&
1101                pdev->bus->number == 0 &&
1102                PCI_SLOT(pdev->devfn) == 2 &&
1103                PCI_FUNC(pdev->devfn) == 0;
1104 }
1105
1106 void i915_pmu_register(struct drm_i915_private *i915)
1107 {
1108         struct i915_pmu *pmu = &i915->pmu;
1109         int ret = -ENOMEM;
1110
1111         if (INTEL_GEN(i915) <= 2) {
1112                 dev_info(i915->drm.dev, "PMU not supported for this GPU.");
1113                 return;
1114         }
1115
1116         spin_lock_init(&pmu->lock);
1117         hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1118         pmu->timer.function = i915_sample;
1119
1120         if (!is_igp(i915)) {
1121                 pmu->name = kasprintf(GFP_KERNEL,
1122                                       "i915_%s",
1123                                       dev_name(i915->drm.dev));
1124                 if (pmu->name) {
1125                         /* tools/perf reserves colons as special. */
1126                         strreplace((char *)pmu->name, ':', '_');
1127                 }
1128         } else {
1129                 pmu->name = "i915";
1130         }
1131         if (!pmu->name)
1132                 goto err;
1133
1134         i915_pmu_events_attr_group.attrs = create_event_attributes(pmu);
1135         if (!i915_pmu_events_attr_group.attrs)
1136                 goto err_name;
1137
1138         pmu->base.attr_groups   = i915_pmu_attr_groups;
1139         pmu->base.task_ctx_nr   = perf_invalid_context;
1140         pmu->base.event_init    = i915_pmu_event_init;
1141         pmu->base.add           = i915_pmu_event_add;
1142         pmu->base.del           = i915_pmu_event_del;
1143         pmu->base.start         = i915_pmu_event_start;
1144         pmu->base.stop          = i915_pmu_event_stop;
1145         pmu->base.read          = i915_pmu_event_read;
1146         pmu->base.event_idx     = i915_pmu_event_event_idx;
1147
1148         ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1149         if (ret)
1150                 goto err_attr;
1151
1152         ret = i915_pmu_register_cpuhp_state(pmu);
1153         if (ret)
1154                 goto err_unreg;
1155
1156         return;
1157
1158 err_unreg:
1159         perf_pmu_unregister(&pmu->base);
1160 err_attr:
1161         pmu->base.event_init = NULL;
1162         free_event_attributes(pmu);
1163 err_name:
1164         if (!is_igp(i915))
1165                 kfree(pmu->name);
1166 err:
1167         dev_notice(i915->drm.dev, "Failed to register PMU!\n");
1168 }
1169
1170 void i915_pmu_unregister(struct drm_i915_private *i915)
1171 {
1172         struct i915_pmu *pmu = &i915->pmu;
1173
1174         if (!pmu->base.event_init)
1175                 return;
1176
1177         WARN_ON(pmu->enable);
1178
1179         hrtimer_cancel(&pmu->timer);
1180
1181         i915_pmu_unregister_cpuhp_state(pmu);
1182
1183         perf_pmu_unregister(&pmu->base);
1184         pmu->base.event_init = NULL;
1185         if (!is_igp(i915))
1186                 kfree(pmu->name);
1187         free_event_attributes(pmu);
1188 }