Merge branch 'proc-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / selftest_rps.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt_clock_utils.h"
13 #include "intel_gt_pm.h"
14 #include "intel_rc6.h"
15 #include "selftest_rps.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/librapl.h"
19
20 /* Try to isolate the impact of cstates from determing frequency response */
21 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
22
23 static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine)
24 {
25         unsigned long old;
26
27         old = fetch_and_zero(&engine->props.heartbeat_interval_ms);
28
29         intel_engine_pm_get(engine);
30         intel_engine_park_heartbeat(engine);
31
32         return old;
33 }
34
35 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
36                                     unsigned long saved)
37 {
38         intel_engine_pm_put(engine);
39
40         engine->props.heartbeat_interval_ms = saved;
41 }
42
43 static void dummy_rps_work(struct work_struct *wrk)
44 {
45 }
46
47 static int cmp_u64(const void *A, const void *B)
48 {
49         const u64 *a = A, *b = B;
50
51         if (a < b)
52                 return -1;
53         else if (a > b)
54                 return 1;
55         else
56                 return 0;
57 }
58
59 static int cmp_u32(const void *A, const void *B)
60 {
61         const u32 *a = A, *b = B;
62
63         if (a < b)
64                 return -1;
65         else if (a > b)
66                 return 1;
67         else
68                 return 0;
69 }
70
71 static struct i915_vma *
72 create_spin_counter(struct intel_engine_cs *engine,
73                     struct i915_address_space *vm,
74                     bool srm,
75                     u32 **cancel,
76                     u32 **counter)
77 {
78         enum {
79                 COUNT,
80                 INC,
81                 __NGPR__,
82         };
83 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
84         struct drm_i915_gem_object *obj;
85         struct i915_vma *vma;
86         unsigned long end;
87         u32 *base, *cs;
88         int loop, i;
89         int err;
90
91         obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
92         if (IS_ERR(obj))
93                 return ERR_CAST(obj);
94
95         end = obj->base.size / sizeof(u32) - 1;
96
97         vma = i915_vma_instance(obj, vm, NULL);
98         if (IS_ERR(vma)) {
99                 i915_gem_object_put(obj);
100                 return vma;
101         }
102
103         err = i915_vma_pin(vma, 0, 0, PIN_USER);
104         if (err) {
105                 i915_vma_put(vma);
106                 return ERR_PTR(err);
107         }
108
109         base = i915_gem_object_pin_map(obj, I915_MAP_WC);
110         if (IS_ERR(base)) {
111                 i915_gem_object_put(obj);
112                 return ERR_CAST(base);
113         }
114         cs = base;
115
116         *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
117         for (i = 0; i < __NGPR__; i++) {
118                 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
119                 *cs++ = 0;
120                 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
121                 *cs++ = 0;
122         }
123
124         *cs++ = MI_LOAD_REGISTER_IMM(1);
125         *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
126         *cs++ = 1;
127
128         loop = cs - base;
129
130         /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
131         for (i = 0; i < 1024; i++) {
132                 *cs++ = MI_MATH(4);
133                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
134                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
135                 *cs++ = MI_MATH_ADD;
136                 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
137
138                 if (srm) {
139                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
140                         *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
141                         *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
142                         *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
143                 }
144         }
145
146         *cs++ = MI_BATCH_BUFFER_START_GEN8;
147         *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
148         *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
149         GEM_BUG_ON(cs - base > end);
150
151         i915_gem_object_flush_map(obj);
152
153         *cancel = base + loop;
154         *counter = srm ? memset32(base + end, 0, 1) : NULL;
155         return vma;
156 }
157
158 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
159 {
160         u8 history[64], i;
161         unsigned long end;
162         int sleep;
163
164         i = 0;
165         memset(history, freq, sizeof(history));
166         sleep = 20;
167
168         /* The PCU does not change instantly, but drifts towards the goal? */
169         end = jiffies + msecs_to_jiffies(timeout_ms);
170         do {
171                 u8 act;
172
173                 act = read_cagf(rps);
174                 if (time_after(jiffies, end))
175                         return act;
176
177                 /* Target acquired */
178                 if (act == freq)
179                         return act;
180
181                 /* Any change within the last N samples? */
182                 if (!memchr_inv(history, act, sizeof(history)))
183                         return act;
184
185                 history[i] = act;
186                 i = (i + 1) % ARRAY_SIZE(history);
187
188                 usleep_range(sleep, 2 * sleep);
189                 sleep *= 2;
190                 if (sleep > timeout_ms * 20)
191                         sleep = timeout_ms * 20;
192         } while (1);
193 }
194
195 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
196 {
197         mutex_lock(&rps->lock);
198         GEM_BUG_ON(!intel_rps_is_active(rps));
199         intel_rps_set(rps, freq);
200         GEM_BUG_ON(rps->last_freq != freq);
201         mutex_unlock(&rps->lock);
202
203         return wait_for_freq(rps, freq, 50);
204 }
205
206 static void show_pstate_limits(struct intel_rps *rps)
207 {
208         struct drm_i915_private *i915 = rps_to_i915(rps);
209
210         if (IS_BROXTON(i915)) {
211                 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
212                         i915_mmio_reg_offset(BXT_RP_STATE_CAP),
213                         intel_uncore_read(rps_to_uncore(rps),
214                                           BXT_RP_STATE_CAP));
215         } else if (IS_GEN(i915, 9)) {
216                 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
217                         i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
218                         intel_uncore_read(rps_to_uncore(rps),
219                                           GEN9_RP_STATE_LIMITS));
220         }
221 }
222
223 int live_rps_clock_interval(void *arg)
224 {
225         struct intel_gt *gt = arg;
226         struct intel_rps *rps = &gt->rps;
227         void (*saved_work)(struct work_struct *wrk);
228         struct intel_engine_cs *engine;
229         enum intel_engine_id id;
230         struct igt_spinner spin;
231         int err = 0;
232
233         if (!intel_rps_is_enabled(rps))
234                 return 0;
235
236         if (igt_spinner_init(&spin, gt))
237                 return -ENOMEM;
238
239         intel_gt_pm_wait_for_idle(gt);
240         saved_work = rps->work.func;
241         rps->work.func = dummy_rps_work;
242
243         intel_gt_pm_get(gt);
244         intel_rps_disable(&gt->rps);
245
246         intel_gt_check_clock_frequency(gt);
247
248         for_each_engine(engine, gt, id) {
249                 unsigned long saved_heartbeat;
250                 struct i915_request *rq;
251                 u32 cycles;
252                 u64 dt;
253
254                 if (!intel_engine_can_store_dword(engine))
255                         continue;
256
257                 saved_heartbeat = engine_heartbeat_disable(engine);
258
259                 rq = igt_spinner_create_request(&spin,
260                                                 engine->kernel_context,
261                                                 MI_NOOP);
262                 if (IS_ERR(rq)) {
263                         engine_heartbeat_enable(engine, saved_heartbeat);
264                         err = PTR_ERR(rq);
265                         break;
266                 }
267
268                 i915_request_add(rq);
269
270                 if (!igt_wait_for_spinner(&spin, rq)) {
271                         pr_err("%s: RPS spinner did not start\n",
272                                engine->name);
273                         igt_spinner_end(&spin);
274                         engine_heartbeat_enable(engine, saved_heartbeat);
275                         intel_gt_set_wedged(engine->gt);
276                         err = -EIO;
277                         break;
278                 }
279
280                 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
281
282                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
283
284                 /* Set the evaluation interval to infinity! */
285                 intel_uncore_write_fw(gt->uncore,
286                                       GEN6_RP_UP_EI, 0xffffffff);
287                 intel_uncore_write_fw(gt->uncore,
288                                       GEN6_RP_UP_THRESHOLD, 0xffffffff);
289
290                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
291                                       GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
292
293                 if (wait_for(intel_uncore_read_fw(gt->uncore,
294                                                   GEN6_RP_CUR_UP_EI),
295                              10)) {
296                         /* Just skip the test; assume lack of HW support */
297                         pr_notice("%s: rps evaluation interval not ticking\n",
298                                   engine->name);
299                         err = -ENODEV;
300                 } else {
301                         ktime_t dt_[5];
302                         u32 cycles_[5];
303                         int i;
304
305                         for (i = 0; i < 5; i++) {
306                                 preempt_disable();
307
308                                 dt_[i] = ktime_get();
309                                 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
310
311                                 udelay(1000);
312
313                                 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
314                                 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
315
316                                 preempt_enable();
317                         }
318
319                         /* Use the median of both cycle/dt; close enough */
320                         sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
321                         cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
322                         sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
323                         dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
324                 }
325
326                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
327                 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
328
329                 igt_spinner_end(&spin);
330                 engine_heartbeat_enable(engine, saved_heartbeat);
331
332                 if (err == 0) {
333                         u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
334                         u32 expected =
335                                 intel_gt_ns_to_pm_interval(gt, dt);
336
337                         pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
338                                 engine->name, cycles, time, dt, expected,
339                                 gt->clock_frequency / 1000);
340
341                         if (10 * time < 8 * dt ||
342                             8 * time > 10 * dt) {
343                                 pr_err("%s: rps clock time does not match walltime!\n",
344                                        engine->name);
345                                 err = -EINVAL;
346                         }
347
348                         if (10 * expected < 8 * cycles ||
349                             8 * expected > 10 * cycles) {
350                                 pr_err("%s: walltime does not match rps clock ticks!\n",
351                                        engine->name);
352                                 err = -EINVAL;
353                         }
354                 }
355
356                 if (igt_flush_test(gt->i915))
357                         err = -EIO;
358
359                 break; /* once is enough */
360         }
361
362         intel_rps_enable(&gt->rps);
363         intel_gt_pm_put(gt);
364
365         igt_spinner_fini(&spin);
366
367         intel_gt_pm_wait_for_idle(gt);
368         rps->work.func = saved_work;
369
370         if (err == -ENODEV) /* skipped, don't report a fail */
371                 err = 0;
372
373         return err;
374 }
375
376 int live_rps_control(void *arg)
377 {
378         struct intel_gt *gt = arg;
379         struct intel_rps *rps = &gt->rps;
380         void (*saved_work)(struct work_struct *wrk);
381         struct intel_engine_cs *engine;
382         enum intel_engine_id id;
383         struct igt_spinner spin;
384         int err = 0;
385
386         /*
387          * Check that the actual frequency matches our requested frequency,
388          * to verify our control mechanism. We have to be careful that the
389          * PCU may throttle the GPU in which case the actual frequency used
390          * will be lowered than requested.
391          */
392
393         if (!intel_rps_is_enabled(rps))
394                 return 0;
395
396         if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
397                 return 0;
398
399         if (igt_spinner_init(&spin, gt))
400                 return -ENOMEM;
401
402         intel_gt_pm_wait_for_idle(gt);
403         saved_work = rps->work.func;
404         rps->work.func = dummy_rps_work;
405
406         intel_gt_pm_get(gt);
407         for_each_engine(engine, gt, id) {
408                 unsigned long saved_heartbeat;
409                 struct i915_request *rq;
410                 ktime_t min_dt, max_dt;
411                 int f, limit;
412                 int min, max;
413
414                 if (!intel_engine_can_store_dword(engine))
415                         continue;
416
417                 saved_heartbeat = engine_heartbeat_disable(engine);
418
419                 rq = igt_spinner_create_request(&spin,
420                                                 engine->kernel_context,
421                                                 MI_NOOP);
422                 if (IS_ERR(rq)) {
423                         err = PTR_ERR(rq);
424                         break;
425                 }
426
427                 i915_request_add(rq);
428
429                 if (!igt_wait_for_spinner(&spin, rq)) {
430                         pr_err("%s: RPS spinner did not start\n",
431                                engine->name);
432                         igt_spinner_end(&spin);
433                         engine_heartbeat_enable(engine, saved_heartbeat);
434                         intel_gt_set_wedged(engine->gt);
435                         err = -EIO;
436                         break;
437                 }
438
439                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
440                         pr_err("%s: could not set minimum frequency [%x], only %x!\n",
441                                engine->name, rps->min_freq, read_cagf(rps));
442                         igt_spinner_end(&spin);
443                         engine_heartbeat_enable(engine, saved_heartbeat);
444                         show_pstate_limits(rps);
445                         err = -EINVAL;
446                         break;
447                 }
448
449                 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
450                         if (rps_set_check(rps, f) < f)
451                                 break;
452                 }
453
454                 limit = rps_set_check(rps, f);
455
456                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
457                         pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
458                                engine->name, rps->min_freq, read_cagf(rps));
459                         igt_spinner_end(&spin);
460                         engine_heartbeat_enable(engine, saved_heartbeat);
461                         show_pstate_limits(rps);
462                         err = -EINVAL;
463                         break;
464                 }
465
466                 max_dt = ktime_get();
467                 max = rps_set_check(rps, limit);
468                 max_dt = ktime_sub(ktime_get(), max_dt);
469
470                 min_dt = ktime_get();
471                 min = rps_set_check(rps, rps->min_freq);
472                 min_dt = ktime_sub(ktime_get(), min_dt);
473
474                 igt_spinner_end(&spin);
475                 engine_heartbeat_enable(engine, saved_heartbeat);
476
477                 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
478                         engine->name,
479                         rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
480                         rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
481                         limit, intel_gpu_freq(rps, limit),
482                         min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
483
484                 if (limit == rps->min_freq) {
485                         pr_err("%s: GPU throttled to minimum!\n",
486                                engine->name);
487                         show_pstate_limits(rps);
488                         err = -ENODEV;
489                         break;
490                 }
491
492                 if (igt_flush_test(gt->i915)) {
493                         err = -EIO;
494                         break;
495                 }
496         }
497         intel_gt_pm_put(gt);
498
499         igt_spinner_fini(&spin);
500
501         intel_gt_pm_wait_for_idle(gt);
502         rps->work.func = saved_work;
503
504         return err;
505 }
506
507 static void show_pcu_config(struct intel_rps *rps)
508 {
509         struct drm_i915_private *i915 = rps_to_i915(rps);
510         unsigned int max_gpu_freq, min_gpu_freq;
511         intel_wakeref_t wakeref;
512         int gpu_freq;
513
514         if (!HAS_LLC(i915))
515                 return;
516
517         min_gpu_freq = rps->min_freq;
518         max_gpu_freq = rps->max_freq;
519         if (INTEL_GEN(i915) >= 9) {
520                 /* Convert GT frequency to 50 HZ units */
521                 min_gpu_freq /= GEN9_FREQ_SCALER;
522                 max_gpu_freq /= GEN9_FREQ_SCALER;
523         }
524
525         wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
526
527         pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
528         for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
529                 int ia_freq = gpu_freq;
530
531                 sandybridge_pcode_read(i915,
532                                        GEN6_PCODE_READ_MIN_FREQ_TABLE,
533                                        &ia_freq, NULL);
534
535                 pr_info("%5d  %5d  %5d\n",
536                         gpu_freq * 50,
537                         ((ia_freq >> 0) & 0xff) * 100,
538                         ((ia_freq >> 8) & 0xff) * 100);
539         }
540
541         intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
542 }
543
544 static u64 __measure_frequency(u32 *cntr, int duration_ms)
545 {
546         u64 dc, dt;
547
548         dt = ktime_get();
549         dc = READ_ONCE(*cntr);
550         usleep_range(1000 * duration_ms, 2000 * duration_ms);
551         dc = READ_ONCE(*cntr) - dc;
552         dt = ktime_get() - dt;
553
554         return div64_u64(1000 * 1000 * dc, dt);
555 }
556
557 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
558 {
559         u64 x[5];
560         int i;
561
562         *freq = rps_set_check(rps, *freq);
563         for (i = 0; i < 5; i++)
564                 x[i] = __measure_frequency(cntr, 2);
565         *freq = (*freq + read_cagf(rps)) / 2;
566
567         /* A simple triangle filter for better result stability */
568         sort(x, 5, sizeof(*x), cmp_u64, NULL);
569         return div_u64(x[1] + 2 * x[2] + x[3], 4);
570 }
571
572 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
573                                   int duration_ms)
574 {
575         u64 dc, dt;
576
577         dt = ktime_get();
578         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
579         usleep_range(1000 * duration_ms, 2000 * duration_ms);
580         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
581         dt = ktime_get() - dt;
582
583         return div64_u64(1000 * 1000 * dc, dt);
584 }
585
586 static u64 measure_cs_frequency_at(struct intel_rps *rps,
587                                    struct intel_engine_cs *engine,
588                                    int *freq)
589 {
590         u64 x[5];
591         int i;
592
593         *freq = rps_set_check(rps, *freq);
594         for (i = 0; i < 5; i++)
595                 x[i] = __measure_cs_frequency(engine, 2);
596         *freq = (*freq + read_cagf(rps)) / 2;
597
598         /* A simple triangle filter for better result stability */
599         sort(x, 5, sizeof(*x), cmp_u64, NULL);
600         return div_u64(x[1] + 2 * x[2] + x[3], 4);
601 }
602
603 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
604 {
605         return f_d * x > f_n * y && f_n * x < f_d * y;
606 }
607
608 int live_rps_frequency_cs(void *arg)
609 {
610         void (*saved_work)(struct work_struct *wrk);
611         struct intel_gt *gt = arg;
612         struct intel_rps *rps = &gt->rps;
613         struct intel_engine_cs *engine;
614         struct pm_qos_request qos;
615         enum intel_engine_id id;
616         int err = 0;
617
618         /*
619          * The premise is that the GPU does change freqency at our behest.
620          * Let's check there is a correspondence between the requested
621          * frequency, the actual frequency, and the observed clock rate.
622          */
623
624         if (!intel_rps_is_enabled(rps))
625                 return 0;
626
627         if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
628                 return 0;
629
630         if (CPU_LATENCY >= 0)
631                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
632
633         intel_gt_pm_wait_for_idle(gt);
634         saved_work = rps->work.func;
635         rps->work.func = dummy_rps_work;
636
637         for_each_engine(engine, gt, id) {
638                 unsigned long saved_heartbeat;
639                 struct i915_request *rq;
640                 struct i915_vma *vma;
641                 u32 *cancel, *cntr;
642                 struct {
643                         u64 count;
644                         int freq;
645                 } min, max;
646
647                 saved_heartbeat = engine_heartbeat_disable(engine);
648
649                 vma = create_spin_counter(engine,
650                                           engine->kernel_context->vm, false,
651                                           &cancel, &cntr);
652                 if (IS_ERR(vma)) {
653                         err = PTR_ERR(vma);
654                         engine_heartbeat_enable(engine, saved_heartbeat);
655                         break;
656                 }
657
658                 rq = intel_engine_create_kernel_request(engine);
659                 if (IS_ERR(rq)) {
660                         err = PTR_ERR(rq);
661                         goto err_vma;
662                 }
663
664                 i915_vma_lock(vma);
665                 err = i915_request_await_object(rq, vma->obj, false);
666                 if (!err)
667                         err = i915_vma_move_to_active(vma, rq, 0);
668                 if (!err)
669                         err = rq->engine->emit_bb_start(rq,
670                                                         vma->node.start,
671                                                         PAGE_SIZE, 0);
672                 i915_vma_unlock(vma);
673                 i915_request_add(rq);
674                 if (err)
675                         goto err_vma;
676
677                 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
678                              10)) {
679                         pr_err("%s: timed loop did not start\n",
680                                engine->name);
681                         goto err_vma;
682                 }
683
684                 min.freq = rps->min_freq;
685                 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
686
687                 max.freq = rps->max_freq;
688                 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
689
690                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
691                         engine->name,
692                         min.count, intel_gpu_freq(rps, min.freq),
693                         max.count, intel_gpu_freq(rps, max.freq),
694                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
695                                                      max.freq * min.count));
696
697                 if (!scaled_within(max.freq * min.count,
698                                    min.freq * max.count,
699                                    2, 3)) {
700                         int f;
701
702                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
703                                engine->name,
704                                max.freq * min.count,
705                                min.freq * max.count);
706                         show_pcu_config(rps);
707
708                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
709                                 int act = f;
710                                 u64 count;
711
712                                 count = measure_cs_frequency_at(rps, engine, &act);
713                                 if (act < f)
714                                         break;
715
716                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
717                                         engine->name,
718                                         act, intel_gpu_freq(rps, act), count,
719                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
720                                                                      act * min.count));
721
722                                 f = act; /* may skip ahead [pcu granularity] */
723                         }
724
725                         err = -EINVAL;
726                 }
727
728 err_vma:
729                 *cancel = MI_BATCH_BUFFER_END;
730                 i915_gem_object_flush_map(vma->obj);
731                 i915_gem_object_unpin_map(vma->obj);
732                 i915_vma_unpin(vma);
733                 i915_vma_put(vma);
734
735                 engine_heartbeat_enable(engine, saved_heartbeat);
736                 if (igt_flush_test(gt->i915))
737                         err = -EIO;
738                 if (err)
739                         break;
740         }
741
742         intel_gt_pm_wait_for_idle(gt);
743         rps->work.func = saved_work;
744
745         if (CPU_LATENCY >= 0)
746                 cpu_latency_qos_remove_request(&qos);
747
748         return err;
749 }
750
751 int live_rps_frequency_srm(void *arg)
752 {
753         void (*saved_work)(struct work_struct *wrk);
754         struct intel_gt *gt = arg;
755         struct intel_rps *rps = &gt->rps;
756         struct intel_engine_cs *engine;
757         struct pm_qos_request qos;
758         enum intel_engine_id id;
759         int err = 0;
760
761         /*
762          * The premise is that the GPU does change freqency at our behest.
763          * Let's check there is a correspondence between the requested
764          * frequency, the actual frequency, and the observed clock rate.
765          */
766
767         if (!intel_rps_is_enabled(rps))
768                 return 0;
769
770         if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
771                 return 0;
772
773         if (CPU_LATENCY >= 0)
774                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
775
776         intel_gt_pm_wait_for_idle(gt);
777         saved_work = rps->work.func;
778         rps->work.func = dummy_rps_work;
779
780         for_each_engine(engine, gt, id) {
781                 unsigned long saved_heartbeat;
782                 struct i915_request *rq;
783                 struct i915_vma *vma;
784                 u32 *cancel, *cntr;
785                 struct {
786                         u64 count;
787                         int freq;
788                 } min, max;
789
790                 saved_heartbeat = engine_heartbeat_disable(engine);
791
792                 vma = create_spin_counter(engine,
793                                           engine->kernel_context->vm, true,
794                                           &cancel, &cntr);
795                 if (IS_ERR(vma)) {
796                         err = PTR_ERR(vma);
797                         engine_heartbeat_enable(engine, saved_heartbeat);
798                         break;
799                 }
800
801                 rq = intel_engine_create_kernel_request(engine);
802                 if (IS_ERR(rq)) {
803                         err = PTR_ERR(rq);
804                         goto err_vma;
805                 }
806
807                 i915_vma_lock(vma);
808                 err = i915_request_await_object(rq, vma->obj, false);
809                 if (!err)
810                         err = i915_vma_move_to_active(vma, rq, 0);
811                 if (!err)
812                         err = rq->engine->emit_bb_start(rq,
813                                                         vma->node.start,
814                                                         PAGE_SIZE, 0);
815                 i915_vma_unlock(vma);
816                 i915_request_add(rq);
817                 if (err)
818                         goto err_vma;
819
820                 if (wait_for(READ_ONCE(*cntr), 10)) {
821                         pr_err("%s: timed loop did not start\n",
822                                engine->name);
823                         goto err_vma;
824                 }
825
826                 min.freq = rps->min_freq;
827                 min.count = measure_frequency_at(rps, cntr, &min.freq);
828
829                 max.freq = rps->max_freq;
830                 max.count = measure_frequency_at(rps, cntr, &max.freq);
831
832                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
833                         engine->name,
834                         min.count, intel_gpu_freq(rps, min.freq),
835                         max.count, intel_gpu_freq(rps, max.freq),
836                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
837                                                      max.freq * min.count));
838
839                 if (!scaled_within(max.freq * min.count,
840                                    min.freq * max.count,
841                                    1, 2)) {
842                         int f;
843
844                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
845                                engine->name,
846                                max.freq * min.count,
847                                min.freq * max.count);
848                         show_pcu_config(rps);
849
850                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
851                                 int act = f;
852                                 u64 count;
853
854                                 count = measure_frequency_at(rps, cntr, &act);
855                                 if (act < f)
856                                         break;
857
858                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
859                                         engine->name,
860                                         act, intel_gpu_freq(rps, act), count,
861                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
862                                                                      act * min.count));
863
864                                 f = act; /* may skip ahead [pcu granularity] */
865                         }
866
867                         err = -EINVAL;
868                 }
869
870 err_vma:
871                 *cancel = MI_BATCH_BUFFER_END;
872                 i915_gem_object_flush_map(vma->obj);
873                 i915_gem_object_unpin_map(vma->obj);
874                 i915_vma_unpin(vma);
875                 i915_vma_put(vma);
876
877                 engine_heartbeat_enable(engine, saved_heartbeat);
878                 if (igt_flush_test(gt->i915))
879                         err = -EIO;
880                 if (err)
881                         break;
882         }
883
884         intel_gt_pm_wait_for_idle(gt);
885         rps->work.func = saved_work;
886
887         if (CPU_LATENCY >= 0)
888                 cpu_latency_qos_remove_request(&qos);
889
890         return err;
891 }
892
893 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
894 {
895         /* Flush any previous EI */
896         usleep_range(timeout_us, 2 * timeout_us);
897
898         /* Reset the interrupt status */
899         rps_disable_interrupts(rps);
900         GEM_BUG_ON(rps->pm_iir);
901         rps_enable_interrupts(rps);
902
903         /* And then wait for the timeout, for real this time */
904         usleep_range(2 * timeout_us, 3 * timeout_us);
905 }
906
907 static int __rps_up_interrupt(struct intel_rps *rps,
908                               struct intel_engine_cs *engine,
909                               struct igt_spinner *spin)
910 {
911         struct intel_uncore *uncore = engine->uncore;
912         struct i915_request *rq;
913         u32 timeout;
914
915         if (!intel_engine_can_store_dword(engine))
916                 return 0;
917
918         rps_set_check(rps, rps->min_freq);
919
920         rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
921         if (IS_ERR(rq))
922                 return PTR_ERR(rq);
923
924         i915_request_get(rq);
925         i915_request_add(rq);
926
927         if (!igt_wait_for_spinner(spin, rq)) {
928                 pr_err("%s: RPS spinner did not start\n",
929                        engine->name);
930                 i915_request_put(rq);
931                 intel_gt_set_wedged(engine->gt);
932                 return -EIO;
933         }
934
935         if (!intel_rps_is_active(rps)) {
936                 pr_err("%s: RPS not enabled on starting spinner\n",
937                        engine->name);
938                 igt_spinner_end(spin);
939                 i915_request_put(rq);
940                 return -EINVAL;
941         }
942
943         if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
944                 pr_err("%s: RPS did not register UP interrupt\n",
945                        engine->name);
946                 i915_request_put(rq);
947                 return -EINVAL;
948         }
949
950         if (rps->last_freq != rps->min_freq) {
951                 pr_err("%s: RPS did not program min frequency\n",
952                        engine->name);
953                 i915_request_put(rq);
954                 return -EINVAL;
955         }
956
957         timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
958         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
959         timeout = DIV_ROUND_UP(timeout, 1000);
960
961         sleep_for_ei(rps, timeout);
962         GEM_BUG_ON(i915_request_completed(rq));
963
964         igt_spinner_end(spin);
965         i915_request_put(rq);
966
967         if (rps->cur_freq != rps->min_freq) {
968                 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
969                        engine->name, intel_rps_read_actual_frequency(rps));
970                 return -EINVAL;
971         }
972
973         if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
974                 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
975                        engine->name, rps->pm_iir,
976                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
977                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
978                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
979                 return -EINVAL;
980         }
981
982         return 0;
983 }
984
985 static int __rps_down_interrupt(struct intel_rps *rps,
986                                 struct intel_engine_cs *engine)
987 {
988         struct intel_uncore *uncore = engine->uncore;
989         u32 timeout;
990
991         rps_set_check(rps, rps->max_freq);
992
993         if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
994                 pr_err("%s: RPS did not register DOWN interrupt\n",
995                        engine->name);
996                 return -EINVAL;
997         }
998
999         if (rps->last_freq != rps->max_freq) {
1000                 pr_err("%s: RPS did not program max frequency\n",
1001                        engine->name);
1002                 return -EINVAL;
1003         }
1004
1005         timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
1006         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
1007         timeout = DIV_ROUND_UP(timeout, 1000);
1008
1009         sleep_for_ei(rps, timeout);
1010
1011         if (rps->cur_freq != rps->max_freq) {
1012                 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1013                        engine->name,
1014                        intel_rps_read_actual_frequency(rps));
1015                 return -EINVAL;
1016         }
1017
1018         if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1019                 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1020                        engine->name, rps->pm_iir,
1021                        intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1022                        intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1023                        intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1024                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1025                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1026                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
1027                 return -EINVAL;
1028         }
1029
1030         return 0;
1031 }
1032
1033 int live_rps_interrupt(void *arg)
1034 {
1035         struct intel_gt *gt = arg;
1036         struct intel_rps *rps = &gt->rps;
1037         void (*saved_work)(struct work_struct *wrk);
1038         struct intel_engine_cs *engine;
1039         enum intel_engine_id id;
1040         struct igt_spinner spin;
1041         u32 pm_events;
1042         int err = 0;
1043
1044         /*
1045          * First, let's check whether or not we are receiving interrupts.
1046          */
1047
1048         if (!intel_rps_has_interrupts(rps))
1049                 return 0;
1050
1051         intel_gt_pm_get(gt);
1052         pm_events = rps->pm_events;
1053         intel_gt_pm_put(gt);
1054         if (!pm_events) {
1055                 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1056                 return -ENODEV;
1057         }
1058
1059         if (igt_spinner_init(&spin, gt))
1060                 return -ENOMEM;
1061
1062         intel_gt_pm_wait_for_idle(gt);
1063         saved_work = rps->work.func;
1064         rps->work.func = dummy_rps_work;
1065
1066         for_each_engine(engine, gt, id) {
1067                 /* Keep the engine busy with a spinner; expect an UP! */
1068                 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1069                         unsigned long saved_heartbeat;
1070
1071                         intel_gt_pm_wait_for_idle(engine->gt);
1072                         GEM_BUG_ON(intel_rps_is_active(rps));
1073
1074                         saved_heartbeat = engine_heartbeat_disable(engine);
1075
1076                         err = __rps_up_interrupt(rps, engine, &spin);
1077
1078                         engine_heartbeat_enable(engine, saved_heartbeat);
1079                         if (err)
1080                                 goto out;
1081
1082                         intel_gt_pm_wait_for_idle(engine->gt);
1083                 }
1084
1085                 /* Keep the engine awake but idle and check for DOWN */
1086                 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1087                         unsigned long saved_heartbeat;
1088
1089                         saved_heartbeat = engine_heartbeat_disable(engine);
1090                         intel_rc6_disable(&gt->rc6);
1091
1092                         err = __rps_down_interrupt(rps, engine);
1093
1094                         intel_rc6_enable(&gt->rc6);
1095                         engine_heartbeat_enable(engine, saved_heartbeat);
1096                         if (err)
1097                                 goto out;
1098                 }
1099         }
1100
1101 out:
1102         if (igt_flush_test(gt->i915))
1103                 err = -EIO;
1104
1105         igt_spinner_fini(&spin);
1106
1107         intel_gt_pm_wait_for_idle(gt);
1108         rps->work.func = saved_work;
1109
1110         return err;
1111 }
1112
1113 static u64 __measure_power(int duration_ms)
1114 {
1115         u64 dE, dt;
1116
1117         dt = ktime_get();
1118         dE = librapl_energy_uJ();
1119         usleep_range(1000 * duration_ms, 2000 * duration_ms);
1120         dE = librapl_energy_uJ() - dE;
1121         dt = ktime_get() - dt;
1122
1123         return div64_u64(1000 * 1000 * dE, dt);
1124 }
1125
1126 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1127 {
1128         u64 x[5];
1129         int i;
1130
1131         *freq = rps_set_check(rps, *freq);
1132         for (i = 0; i < 5; i++)
1133                 x[i] = __measure_power(5);
1134         *freq = (*freq + read_cagf(rps)) / 2;
1135
1136         /* A simple triangle filter for better result stability */
1137         sort(x, 5, sizeof(*x), cmp_u64, NULL);
1138         return div_u64(x[1] + 2 * x[2] + x[3], 4);
1139 }
1140
1141 int live_rps_power(void *arg)
1142 {
1143         struct intel_gt *gt = arg;
1144         struct intel_rps *rps = &gt->rps;
1145         void (*saved_work)(struct work_struct *wrk);
1146         struct intel_engine_cs *engine;
1147         enum intel_engine_id id;
1148         struct igt_spinner spin;
1149         int err = 0;
1150
1151         /*
1152          * Our fundamental assumption is that running at lower frequency
1153          * actually saves power. Let's see if our RAPL measurement support
1154          * that theory.
1155          */
1156
1157         if (!intel_rps_is_enabled(rps))
1158                 return 0;
1159
1160         if (!librapl_energy_uJ())
1161                 return 0;
1162
1163         if (igt_spinner_init(&spin, gt))
1164                 return -ENOMEM;
1165
1166         intel_gt_pm_wait_for_idle(gt);
1167         saved_work = rps->work.func;
1168         rps->work.func = dummy_rps_work;
1169
1170         for_each_engine(engine, gt, id) {
1171                 unsigned long saved_heartbeat;
1172                 struct i915_request *rq;
1173                 struct {
1174                         u64 power;
1175                         int freq;
1176                 } min, max;
1177
1178                 if (!intel_engine_can_store_dword(engine))
1179                         continue;
1180
1181                 saved_heartbeat = engine_heartbeat_disable(engine);
1182
1183                 rq = igt_spinner_create_request(&spin,
1184                                                 engine->kernel_context,
1185                                                 MI_NOOP);
1186                 if (IS_ERR(rq)) {
1187                         engine_heartbeat_enable(engine, saved_heartbeat);
1188                         err = PTR_ERR(rq);
1189                         break;
1190                 }
1191
1192                 i915_request_add(rq);
1193
1194                 if (!igt_wait_for_spinner(&spin, rq)) {
1195                         pr_err("%s: RPS spinner did not start\n",
1196                                engine->name);
1197                         igt_spinner_end(&spin);
1198                         engine_heartbeat_enable(engine, saved_heartbeat);
1199                         intel_gt_set_wedged(engine->gt);
1200                         err = -EIO;
1201                         break;
1202                 }
1203
1204                 max.freq = rps->max_freq;
1205                 max.power = measure_power_at(rps, &max.freq);
1206
1207                 min.freq = rps->min_freq;
1208                 min.power = measure_power_at(rps, &min.freq);
1209
1210                 igt_spinner_end(&spin);
1211                 engine_heartbeat_enable(engine, saved_heartbeat);
1212
1213                 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1214                         engine->name,
1215                         min.power, intel_gpu_freq(rps, min.freq),
1216                         max.power, intel_gpu_freq(rps, max.freq));
1217
1218                 if (10 * min.freq >= 9 * max.freq) {
1219                         pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1220                                   min.freq, intel_gpu_freq(rps, min.freq),
1221                                   max.freq, intel_gpu_freq(rps, max.freq));
1222                         continue;
1223                 }
1224
1225                 if (11 * min.power > 10 * max.power) {
1226                         pr_err("%s: did not conserve power when setting lower frequency!\n",
1227                                engine->name);
1228                         err = -EINVAL;
1229                         break;
1230                 }
1231
1232                 if (igt_flush_test(gt->i915)) {
1233                         err = -EIO;
1234                         break;
1235                 }
1236         }
1237
1238         igt_spinner_fini(&spin);
1239
1240         intel_gt_pm_wait_for_idle(gt);
1241         rps->work.func = saved_work;
1242
1243         return err;
1244 }
1245
1246 int live_rps_dynamic(void *arg)
1247 {
1248         struct intel_gt *gt = arg;
1249         struct intel_rps *rps = &gt->rps;
1250         struct intel_engine_cs *engine;
1251         enum intel_engine_id id;
1252         struct igt_spinner spin;
1253         int err = 0;
1254
1255         /*
1256          * We've looked at the bascs, and have established that we
1257          * can change the clock frequency and that the HW will generate
1258          * interrupts based on load. Now we check how we integrate those
1259          * moving parts into dynamic reclocking based on load.
1260          */
1261
1262         if (!intel_rps_is_enabled(rps))
1263                 return 0;
1264
1265         if (igt_spinner_init(&spin, gt))
1266                 return -ENOMEM;
1267
1268         for_each_engine(engine, gt, id) {
1269                 struct i915_request *rq;
1270                 struct {
1271                         ktime_t dt;
1272                         u8 freq;
1273                 } min, max;
1274
1275                 if (!intel_engine_can_store_dword(engine))
1276                         continue;
1277
1278                 intel_gt_pm_wait_for_idle(gt);
1279                 GEM_BUG_ON(intel_rps_is_active(rps));
1280                 rps->cur_freq = rps->min_freq;
1281
1282                 intel_engine_pm_get(engine);
1283                 intel_rc6_disable(&gt->rc6);
1284                 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1285
1286                 rq = igt_spinner_create_request(&spin,
1287                                                 engine->kernel_context,
1288                                                 MI_NOOP);
1289                 if (IS_ERR(rq)) {
1290                         err = PTR_ERR(rq);
1291                         goto err;
1292                 }
1293
1294                 i915_request_add(rq);
1295
1296                 max.dt = ktime_get();
1297                 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1298                 max.dt = ktime_sub(ktime_get(), max.dt);
1299
1300                 igt_spinner_end(&spin);
1301
1302                 min.dt = ktime_get();
1303                 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1304                 min.dt = ktime_sub(ktime_get(), min.dt);
1305
1306                 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1307                         engine->name,
1308                         max.freq, intel_gpu_freq(rps, max.freq),
1309                         ktime_to_ns(max.dt),
1310                         min.freq, intel_gpu_freq(rps, min.freq),
1311                         ktime_to_ns(min.dt));
1312                 if (min.freq >= max.freq) {
1313                         pr_err("%s: dynamic reclocking of spinner failed\n!",
1314                                engine->name);
1315                         err = -EINVAL;
1316                 }
1317
1318 err:
1319                 intel_rc6_enable(&gt->rc6);
1320                 intel_engine_pm_put(engine);
1321
1322                 if (igt_flush_test(gt->i915))
1323                         err = -EIO;
1324                 if (err)
1325                         break;
1326         }
1327
1328         igt_spinner_fini(&spin);
1329
1330         return err;
1331 }