8624f5d2a1f3554c8be6cdbb68f101d7b8a74247
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / selftest_rps.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt_clock_utils.h"
13 #include "intel_gt_pm.h"
14 #include "intel_rc6.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftest_rps.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/librapl.h"
20
21 /* Try to isolate the impact of cstates from determing frequency response */
22 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
23
24 static void dummy_rps_work(struct work_struct *wrk)
25 {
26 }
27
28 static int cmp_u64(const void *A, const void *B)
29 {
30         const u64 *a = A, *b = B;
31
32         if (*a < *b)
33                 return -1;
34         else if (*a > *b)
35                 return 1;
36         else
37                 return 0;
38 }
39
40 static int cmp_u32(const void *A, const void *B)
41 {
42         const u32 *a = A, *b = B;
43
44         if (*a < *b)
45                 return -1;
46         else if (*a > *b)
47                 return 1;
48         else
49                 return 0;
50 }
51
52 static struct i915_vma *
53 create_spin_counter(struct intel_engine_cs *engine,
54                     struct i915_address_space *vm,
55                     bool srm,
56                     u32 **cancel,
57                     u32 **counter)
58 {
59         enum {
60                 COUNT,
61                 INC,
62                 __NGPR__,
63         };
64 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
65         struct drm_i915_gem_object *obj;
66         struct i915_vma *vma;
67         unsigned long end;
68         u32 *base, *cs;
69         int loop, i;
70         int err;
71
72         obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
73         if (IS_ERR(obj))
74                 return ERR_CAST(obj);
75
76         end = obj->base.size / sizeof(u32) - 1;
77
78         vma = i915_vma_instance(obj, vm, NULL);
79         if (IS_ERR(vma)) {
80                 i915_gem_object_put(obj);
81                 return vma;
82         }
83
84         err = i915_vma_pin(vma, 0, 0, PIN_USER);
85         if (err) {
86                 i915_vma_put(vma);
87                 return ERR_PTR(err);
88         }
89
90         base = i915_gem_object_pin_map(obj, I915_MAP_WC);
91         if (IS_ERR(base)) {
92                 i915_gem_object_put(obj);
93                 return ERR_CAST(base);
94         }
95         cs = base;
96
97         *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
98         for (i = 0; i < __NGPR__; i++) {
99                 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
100                 *cs++ = 0;
101                 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
102                 *cs++ = 0;
103         }
104
105         *cs++ = MI_LOAD_REGISTER_IMM(1);
106         *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
107         *cs++ = 1;
108
109         loop = cs - base;
110
111         /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
112         for (i = 0; i < 1024; i++) {
113                 *cs++ = MI_MATH(4);
114                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
115                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
116                 *cs++ = MI_MATH_ADD;
117                 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
118
119                 if (srm) {
120                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
121                         *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
122                         *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
123                         *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
124                 }
125         }
126
127         *cs++ = MI_BATCH_BUFFER_START_GEN8;
128         *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
129         *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
130         GEM_BUG_ON(cs - base > end);
131
132         i915_gem_object_flush_map(obj);
133
134         *cancel = base + loop;
135         *counter = srm ? memset32(base + end, 0, 1) : NULL;
136         return vma;
137 }
138
139 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
140 {
141         u8 history[64], i;
142         unsigned long end;
143         int sleep;
144
145         i = 0;
146         memset(history, freq, sizeof(history));
147         sleep = 20;
148
149         /* The PCU does not change instantly, but drifts towards the goal? */
150         end = jiffies + msecs_to_jiffies(timeout_ms);
151         do {
152                 u8 act;
153
154                 act = read_cagf(rps);
155                 if (time_after(jiffies, end))
156                         return act;
157
158                 /* Target acquired */
159                 if (act == freq)
160                         return act;
161
162                 /* Any change within the last N samples? */
163                 if (!memchr_inv(history, act, sizeof(history)))
164                         return act;
165
166                 history[i] = act;
167                 i = (i + 1) % ARRAY_SIZE(history);
168
169                 usleep_range(sleep, 2 * sleep);
170                 sleep *= 2;
171                 if (sleep > timeout_ms * 20)
172                         sleep = timeout_ms * 20;
173         } while (1);
174 }
175
176 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
177 {
178         mutex_lock(&rps->lock);
179         GEM_BUG_ON(!intel_rps_is_active(rps));
180         intel_rps_set(rps, freq);
181         GEM_BUG_ON(rps->last_freq != freq);
182         mutex_unlock(&rps->lock);
183
184         return wait_for_freq(rps, freq, 50);
185 }
186
187 static void show_pstate_limits(struct intel_rps *rps)
188 {
189         struct drm_i915_private *i915 = rps_to_i915(rps);
190
191         if (IS_BROXTON(i915)) {
192                 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
193                         i915_mmio_reg_offset(BXT_RP_STATE_CAP),
194                         intel_uncore_read(rps_to_uncore(rps),
195                                           BXT_RP_STATE_CAP));
196         } else if (IS_GEN(i915, 9)) {
197                 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
198                         i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
199                         intel_uncore_read(rps_to_uncore(rps),
200                                           GEN9_RP_STATE_LIMITS));
201         }
202 }
203
204 int live_rps_clock_interval(void *arg)
205 {
206         struct intel_gt *gt = arg;
207         struct intel_rps *rps = &gt->rps;
208         void (*saved_work)(struct work_struct *wrk);
209         struct intel_engine_cs *engine;
210         enum intel_engine_id id;
211         struct igt_spinner spin;
212         int err = 0;
213
214         if (!intel_rps_is_enabled(rps))
215                 return 0;
216
217         if (igt_spinner_init(&spin, gt))
218                 return -ENOMEM;
219
220         intel_gt_pm_wait_for_idle(gt);
221         saved_work = rps->work.func;
222         rps->work.func = dummy_rps_work;
223
224         intel_gt_pm_get(gt);
225         intel_rps_disable(&gt->rps);
226
227         intel_gt_check_clock_frequency(gt);
228
229         for_each_engine(engine, gt, id) {
230                 struct i915_request *rq;
231                 u32 cycles;
232                 u64 dt;
233
234                 if (!intel_engine_can_store_dword(engine))
235                         continue;
236
237                 st_engine_heartbeat_disable(engine);
238
239                 rq = igt_spinner_create_request(&spin,
240                                                 engine->kernel_context,
241                                                 MI_NOOP);
242                 if (IS_ERR(rq)) {
243                         st_engine_heartbeat_enable(engine);
244                         err = PTR_ERR(rq);
245                         break;
246                 }
247
248                 i915_request_add(rq);
249
250                 if (!igt_wait_for_spinner(&spin, rq)) {
251                         pr_err("%s: RPS spinner did not start\n",
252                                engine->name);
253                         igt_spinner_end(&spin);
254                         st_engine_heartbeat_enable(engine);
255                         intel_gt_set_wedged(engine->gt);
256                         err = -EIO;
257                         break;
258                 }
259
260                 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
261
262                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
263
264                 /* Set the evaluation interval to infinity! */
265                 intel_uncore_write_fw(gt->uncore,
266                                       GEN6_RP_UP_EI, 0xffffffff);
267                 intel_uncore_write_fw(gt->uncore,
268                                       GEN6_RP_UP_THRESHOLD, 0xffffffff);
269
270                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
271                                       GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
272
273                 if (wait_for(intel_uncore_read_fw(gt->uncore,
274                                                   GEN6_RP_CUR_UP_EI),
275                              10)) {
276                         /* Just skip the test; assume lack of HW support */
277                         pr_notice("%s: rps evaluation interval not ticking\n",
278                                   engine->name);
279                         err = -ENODEV;
280                 } else {
281                         ktime_t dt_[5];
282                         u32 cycles_[5];
283                         int i;
284
285                         for (i = 0; i < 5; i++) {
286                                 preempt_disable();
287
288                                 dt_[i] = ktime_get();
289                                 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
290
291                                 udelay(1000);
292
293                                 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
294                                 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
295
296                                 preempt_enable();
297                         }
298
299                         /* Use the median of both cycle/dt; close enough */
300                         sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
301                         cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
302                         sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
303                         dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
304                 }
305
306                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
307                 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
308
309                 igt_spinner_end(&spin);
310                 st_engine_heartbeat_enable(engine);
311
312                 if (err == 0) {
313                         u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
314                         u32 expected =
315                                 intel_gt_ns_to_pm_interval(gt, dt);
316
317                         pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
318                                 engine->name, cycles, time, dt, expected,
319                                 gt->clock_frequency / 1000);
320
321                         if (10 * time < 8 * dt ||
322                             8 * time > 10 * dt) {
323                                 pr_err("%s: rps clock time does not match walltime!\n",
324                                        engine->name);
325                                 err = -EINVAL;
326                         }
327
328                         if (10 * expected < 8 * cycles ||
329                             8 * expected > 10 * cycles) {
330                                 pr_err("%s: walltime does not match rps clock ticks!\n",
331                                        engine->name);
332                                 err = -EINVAL;
333                         }
334                 }
335
336                 if (igt_flush_test(gt->i915))
337                         err = -EIO;
338
339                 break; /* once is enough */
340         }
341
342         intel_rps_enable(&gt->rps);
343         intel_gt_pm_put(gt);
344
345         igt_spinner_fini(&spin);
346
347         intel_gt_pm_wait_for_idle(gt);
348         rps->work.func = saved_work;
349
350         if (err == -ENODEV) /* skipped, don't report a fail */
351                 err = 0;
352
353         return err;
354 }
355
356 int live_rps_control(void *arg)
357 {
358         struct intel_gt *gt = arg;
359         struct intel_rps *rps = &gt->rps;
360         void (*saved_work)(struct work_struct *wrk);
361         struct intel_engine_cs *engine;
362         enum intel_engine_id id;
363         struct igt_spinner spin;
364         int err = 0;
365
366         /*
367          * Check that the actual frequency matches our requested frequency,
368          * to verify our control mechanism. We have to be careful that the
369          * PCU may throttle the GPU in which case the actual frequency used
370          * will be lowered than requested.
371          */
372
373         if (!intel_rps_is_enabled(rps))
374                 return 0;
375
376         if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
377                 return 0;
378
379         if (igt_spinner_init(&spin, gt))
380                 return -ENOMEM;
381
382         intel_gt_pm_wait_for_idle(gt);
383         saved_work = rps->work.func;
384         rps->work.func = dummy_rps_work;
385
386         intel_gt_pm_get(gt);
387         for_each_engine(engine, gt, id) {
388                 struct i915_request *rq;
389                 ktime_t min_dt, max_dt;
390                 int f, limit;
391                 int min, max;
392
393                 if (!intel_engine_can_store_dword(engine))
394                         continue;
395
396                 st_engine_heartbeat_disable(engine);
397
398                 rq = igt_spinner_create_request(&spin,
399                                                 engine->kernel_context,
400                                                 MI_NOOP);
401                 if (IS_ERR(rq)) {
402                         err = PTR_ERR(rq);
403                         break;
404                 }
405
406                 i915_request_add(rq);
407
408                 if (!igt_wait_for_spinner(&spin, rq)) {
409                         pr_err("%s: RPS spinner did not start\n",
410                                engine->name);
411                         igt_spinner_end(&spin);
412                         st_engine_heartbeat_enable(engine);
413                         intel_gt_set_wedged(engine->gt);
414                         err = -EIO;
415                         break;
416                 }
417
418                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
419                         pr_err("%s: could not set minimum frequency [%x], only %x!\n",
420                                engine->name, rps->min_freq, read_cagf(rps));
421                         igt_spinner_end(&spin);
422                         st_engine_heartbeat_enable(engine);
423                         show_pstate_limits(rps);
424                         err = -EINVAL;
425                         break;
426                 }
427
428                 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
429                         if (rps_set_check(rps, f) < f)
430                                 break;
431                 }
432
433                 limit = rps_set_check(rps, f);
434
435                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
436                         pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
437                                engine->name, rps->min_freq, read_cagf(rps));
438                         igt_spinner_end(&spin);
439                         st_engine_heartbeat_enable(engine);
440                         show_pstate_limits(rps);
441                         err = -EINVAL;
442                         break;
443                 }
444
445                 max_dt = ktime_get();
446                 max = rps_set_check(rps, limit);
447                 max_dt = ktime_sub(ktime_get(), max_dt);
448
449                 min_dt = ktime_get();
450                 min = rps_set_check(rps, rps->min_freq);
451                 min_dt = ktime_sub(ktime_get(), min_dt);
452
453                 igt_spinner_end(&spin);
454                 st_engine_heartbeat_enable(engine);
455
456                 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
457                         engine->name,
458                         rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
459                         rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
460                         limit, intel_gpu_freq(rps, limit),
461                         min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
462
463                 if (limit == rps->min_freq) {
464                         pr_err("%s: GPU throttled to minimum!\n",
465                                engine->name);
466                         show_pstate_limits(rps);
467                         err = -ENODEV;
468                         break;
469                 }
470
471                 if (igt_flush_test(gt->i915)) {
472                         err = -EIO;
473                         break;
474                 }
475         }
476         intel_gt_pm_put(gt);
477
478         igt_spinner_fini(&spin);
479
480         intel_gt_pm_wait_for_idle(gt);
481         rps->work.func = saved_work;
482
483         return err;
484 }
485
486 static void show_pcu_config(struct intel_rps *rps)
487 {
488         struct drm_i915_private *i915 = rps_to_i915(rps);
489         unsigned int max_gpu_freq, min_gpu_freq;
490         intel_wakeref_t wakeref;
491         int gpu_freq;
492
493         if (!HAS_LLC(i915))
494                 return;
495
496         min_gpu_freq = rps->min_freq;
497         max_gpu_freq = rps->max_freq;
498         if (INTEL_GEN(i915) >= 9) {
499                 /* Convert GT frequency to 50 HZ units */
500                 min_gpu_freq /= GEN9_FREQ_SCALER;
501                 max_gpu_freq /= GEN9_FREQ_SCALER;
502         }
503
504         wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
505
506         pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
507         for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
508                 int ia_freq = gpu_freq;
509
510                 sandybridge_pcode_read(i915,
511                                        GEN6_PCODE_READ_MIN_FREQ_TABLE,
512                                        &ia_freq, NULL);
513
514                 pr_info("%5d  %5d  %5d\n",
515                         gpu_freq * 50,
516                         ((ia_freq >> 0) & 0xff) * 100,
517                         ((ia_freq >> 8) & 0xff) * 100);
518         }
519
520         intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
521 }
522
523 static u64 __measure_frequency(u32 *cntr, int duration_ms)
524 {
525         u64 dc, dt;
526
527         dt = ktime_get();
528         dc = READ_ONCE(*cntr);
529         usleep_range(1000 * duration_ms, 2000 * duration_ms);
530         dc = READ_ONCE(*cntr) - dc;
531         dt = ktime_get() - dt;
532
533         return div64_u64(1000 * 1000 * dc, dt);
534 }
535
536 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
537 {
538         u64 x[5];
539         int i;
540
541         *freq = rps_set_check(rps, *freq);
542         for (i = 0; i < 5; i++)
543                 x[i] = __measure_frequency(cntr, 2);
544         *freq = (*freq + read_cagf(rps)) / 2;
545
546         /* A simple triangle filter for better result stability */
547         sort(x, 5, sizeof(*x), cmp_u64, NULL);
548         return div_u64(x[1] + 2 * x[2] + x[3], 4);
549 }
550
551 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
552                                   int duration_ms)
553 {
554         u64 dc, dt;
555
556         dt = ktime_get();
557         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
558         usleep_range(1000 * duration_ms, 2000 * duration_ms);
559         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
560         dt = ktime_get() - dt;
561
562         return div64_u64(1000 * 1000 * dc, dt);
563 }
564
565 static u64 measure_cs_frequency_at(struct intel_rps *rps,
566                                    struct intel_engine_cs *engine,
567                                    int *freq)
568 {
569         u64 x[5];
570         int i;
571
572         *freq = rps_set_check(rps, *freq);
573         for (i = 0; i < 5; i++)
574                 x[i] = __measure_cs_frequency(engine, 2);
575         *freq = (*freq + read_cagf(rps)) / 2;
576
577         /* A simple triangle filter for better result stability */
578         sort(x, 5, sizeof(*x), cmp_u64, NULL);
579         return div_u64(x[1] + 2 * x[2] + x[3], 4);
580 }
581
582 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
583 {
584         return f_d * x > f_n * y && f_n * x < f_d * y;
585 }
586
587 int live_rps_frequency_cs(void *arg)
588 {
589         void (*saved_work)(struct work_struct *wrk);
590         struct intel_gt *gt = arg;
591         struct intel_rps *rps = &gt->rps;
592         struct intel_engine_cs *engine;
593         struct pm_qos_request qos;
594         enum intel_engine_id id;
595         int err = 0;
596
597         /*
598          * The premise is that the GPU does change freqency at our behest.
599          * Let's check there is a correspondence between the requested
600          * frequency, the actual frequency, and the observed clock rate.
601          */
602
603         if (!intel_rps_is_enabled(rps))
604                 return 0;
605
606         if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
607                 return 0;
608
609         if (CPU_LATENCY >= 0)
610                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
611
612         intel_gt_pm_wait_for_idle(gt);
613         saved_work = rps->work.func;
614         rps->work.func = dummy_rps_work;
615
616         for_each_engine(engine, gt, id) {
617                 struct i915_request *rq;
618                 struct i915_vma *vma;
619                 u32 *cancel, *cntr;
620                 struct {
621                         u64 count;
622                         int freq;
623                 } min, max;
624
625                 st_engine_heartbeat_disable(engine);
626
627                 vma = create_spin_counter(engine,
628                                           engine->kernel_context->vm, false,
629                                           &cancel, &cntr);
630                 if (IS_ERR(vma)) {
631                         err = PTR_ERR(vma);
632                         st_engine_heartbeat_enable(engine);
633                         break;
634                 }
635
636                 rq = intel_engine_create_kernel_request(engine);
637                 if (IS_ERR(rq)) {
638                         err = PTR_ERR(rq);
639                         goto err_vma;
640                 }
641
642                 i915_vma_lock(vma);
643                 err = i915_request_await_object(rq, vma->obj, false);
644                 if (!err)
645                         err = i915_vma_move_to_active(vma, rq, 0);
646                 if (!err)
647                         err = rq->engine->emit_bb_start(rq,
648                                                         vma->node.start,
649                                                         PAGE_SIZE, 0);
650                 i915_vma_unlock(vma);
651                 i915_request_add(rq);
652                 if (err)
653                         goto err_vma;
654
655                 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
656                              10)) {
657                         pr_err("%s: timed loop did not start\n",
658                                engine->name);
659                         goto err_vma;
660                 }
661
662                 min.freq = rps->min_freq;
663                 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
664
665                 max.freq = rps->max_freq;
666                 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
667
668                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
669                         engine->name,
670                         min.count, intel_gpu_freq(rps, min.freq),
671                         max.count, intel_gpu_freq(rps, max.freq),
672                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
673                                                      max.freq * min.count));
674
675                 if (!scaled_within(max.freq * min.count,
676                                    min.freq * max.count,
677                                    2, 3)) {
678                         int f;
679
680                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
681                                engine->name,
682                                max.freq * min.count,
683                                min.freq * max.count);
684                         show_pcu_config(rps);
685
686                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
687                                 int act = f;
688                                 u64 count;
689
690                                 count = measure_cs_frequency_at(rps, engine, &act);
691                                 if (act < f)
692                                         break;
693
694                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
695                                         engine->name,
696                                         act, intel_gpu_freq(rps, act), count,
697                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
698                                                                      act * min.count));
699
700                                 f = act; /* may skip ahead [pcu granularity] */
701                         }
702
703                         err = -EINVAL;
704                 }
705
706 err_vma:
707                 *cancel = MI_BATCH_BUFFER_END;
708                 i915_gem_object_flush_map(vma->obj);
709                 i915_gem_object_unpin_map(vma->obj);
710                 i915_vma_unpin(vma);
711                 i915_vma_put(vma);
712
713                 st_engine_heartbeat_enable(engine);
714                 if (igt_flush_test(gt->i915))
715                         err = -EIO;
716                 if (err)
717                         break;
718         }
719
720         intel_gt_pm_wait_for_idle(gt);
721         rps->work.func = saved_work;
722
723         if (CPU_LATENCY >= 0)
724                 cpu_latency_qos_remove_request(&qos);
725
726         return err;
727 }
728
729 int live_rps_frequency_srm(void *arg)
730 {
731         void (*saved_work)(struct work_struct *wrk);
732         struct intel_gt *gt = arg;
733         struct intel_rps *rps = &gt->rps;
734         struct intel_engine_cs *engine;
735         struct pm_qos_request qos;
736         enum intel_engine_id id;
737         int err = 0;
738
739         /*
740          * The premise is that the GPU does change freqency at our behest.
741          * Let's check there is a correspondence between the requested
742          * frequency, the actual frequency, and the observed clock rate.
743          */
744
745         if (!intel_rps_is_enabled(rps))
746                 return 0;
747
748         if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
749                 return 0;
750
751         if (CPU_LATENCY >= 0)
752                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
753
754         intel_gt_pm_wait_for_idle(gt);
755         saved_work = rps->work.func;
756         rps->work.func = dummy_rps_work;
757
758         for_each_engine(engine, gt, id) {
759                 struct i915_request *rq;
760                 struct i915_vma *vma;
761                 u32 *cancel, *cntr;
762                 struct {
763                         u64 count;
764                         int freq;
765                 } min, max;
766
767                 st_engine_heartbeat_disable(engine);
768
769                 vma = create_spin_counter(engine,
770                                           engine->kernel_context->vm, true,
771                                           &cancel, &cntr);
772                 if (IS_ERR(vma)) {
773                         err = PTR_ERR(vma);
774                         st_engine_heartbeat_enable(engine);
775                         break;
776                 }
777
778                 rq = intel_engine_create_kernel_request(engine);
779                 if (IS_ERR(rq)) {
780                         err = PTR_ERR(rq);
781                         goto err_vma;
782                 }
783
784                 i915_vma_lock(vma);
785                 err = i915_request_await_object(rq, vma->obj, false);
786                 if (!err)
787                         err = i915_vma_move_to_active(vma, rq, 0);
788                 if (!err)
789                         err = rq->engine->emit_bb_start(rq,
790                                                         vma->node.start,
791                                                         PAGE_SIZE, 0);
792                 i915_vma_unlock(vma);
793                 i915_request_add(rq);
794                 if (err)
795                         goto err_vma;
796
797                 if (wait_for(READ_ONCE(*cntr), 10)) {
798                         pr_err("%s: timed loop did not start\n",
799                                engine->name);
800                         goto err_vma;
801                 }
802
803                 min.freq = rps->min_freq;
804                 min.count = measure_frequency_at(rps, cntr, &min.freq);
805
806                 max.freq = rps->max_freq;
807                 max.count = measure_frequency_at(rps, cntr, &max.freq);
808
809                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
810                         engine->name,
811                         min.count, intel_gpu_freq(rps, min.freq),
812                         max.count, intel_gpu_freq(rps, max.freq),
813                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
814                                                      max.freq * min.count));
815
816                 if (!scaled_within(max.freq * min.count,
817                                    min.freq * max.count,
818                                    1, 2)) {
819                         int f;
820
821                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
822                                engine->name,
823                                max.freq * min.count,
824                                min.freq * max.count);
825                         show_pcu_config(rps);
826
827                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
828                                 int act = f;
829                                 u64 count;
830
831                                 count = measure_frequency_at(rps, cntr, &act);
832                                 if (act < f)
833                                         break;
834
835                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
836                                         engine->name,
837                                         act, intel_gpu_freq(rps, act), count,
838                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
839                                                                      act * min.count));
840
841                                 f = act; /* may skip ahead [pcu granularity] */
842                         }
843
844                         err = -EINVAL;
845                 }
846
847 err_vma:
848                 *cancel = MI_BATCH_BUFFER_END;
849                 i915_gem_object_flush_map(vma->obj);
850                 i915_gem_object_unpin_map(vma->obj);
851                 i915_vma_unpin(vma);
852                 i915_vma_put(vma);
853
854                 st_engine_heartbeat_enable(engine);
855                 if (igt_flush_test(gt->i915))
856                         err = -EIO;
857                 if (err)
858                         break;
859         }
860
861         intel_gt_pm_wait_for_idle(gt);
862         rps->work.func = saved_work;
863
864         if (CPU_LATENCY >= 0)
865                 cpu_latency_qos_remove_request(&qos);
866
867         return err;
868 }
869
870 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
871 {
872         /* Flush any previous EI */
873         usleep_range(timeout_us, 2 * timeout_us);
874
875         /* Reset the interrupt status */
876         rps_disable_interrupts(rps);
877         GEM_BUG_ON(rps->pm_iir);
878         rps_enable_interrupts(rps);
879
880         /* And then wait for the timeout, for real this time */
881         usleep_range(2 * timeout_us, 3 * timeout_us);
882 }
883
884 static int __rps_up_interrupt(struct intel_rps *rps,
885                               struct intel_engine_cs *engine,
886                               struct igt_spinner *spin)
887 {
888         struct intel_uncore *uncore = engine->uncore;
889         struct i915_request *rq;
890         u32 timeout;
891
892         if (!intel_engine_can_store_dword(engine))
893                 return 0;
894
895         rps_set_check(rps, rps->min_freq);
896
897         rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
898         if (IS_ERR(rq))
899                 return PTR_ERR(rq);
900
901         i915_request_get(rq);
902         i915_request_add(rq);
903
904         if (!igt_wait_for_spinner(spin, rq)) {
905                 pr_err("%s: RPS spinner did not start\n",
906                        engine->name);
907                 i915_request_put(rq);
908                 intel_gt_set_wedged(engine->gt);
909                 return -EIO;
910         }
911
912         if (!intel_rps_is_active(rps)) {
913                 pr_err("%s: RPS not enabled on starting spinner\n",
914                        engine->name);
915                 igt_spinner_end(spin);
916                 i915_request_put(rq);
917                 return -EINVAL;
918         }
919
920         if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
921                 pr_err("%s: RPS did not register UP interrupt\n",
922                        engine->name);
923                 i915_request_put(rq);
924                 return -EINVAL;
925         }
926
927         if (rps->last_freq != rps->min_freq) {
928                 pr_err("%s: RPS did not program min frequency\n",
929                        engine->name);
930                 i915_request_put(rq);
931                 return -EINVAL;
932         }
933
934         timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
935         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
936         timeout = DIV_ROUND_UP(timeout, 1000);
937
938         sleep_for_ei(rps, timeout);
939         GEM_BUG_ON(i915_request_completed(rq));
940
941         igt_spinner_end(spin);
942         i915_request_put(rq);
943
944         if (rps->cur_freq != rps->min_freq) {
945                 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
946                        engine->name, intel_rps_read_actual_frequency(rps));
947                 return -EINVAL;
948         }
949
950         if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
951                 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
952                        engine->name, rps->pm_iir,
953                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
954                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
955                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
956                 return -EINVAL;
957         }
958
959         return 0;
960 }
961
962 static int __rps_down_interrupt(struct intel_rps *rps,
963                                 struct intel_engine_cs *engine)
964 {
965         struct intel_uncore *uncore = engine->uncore;
966         u32 timeout;
967
968         rps_set_check(rps, rps->max_freq);
969
970         if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
971                 pr_err("%s: RPS did not register DOWN interrupt\n",
972                        engine->name);
973                 return -EINVAL;
974         }
975
976         if (rps->last_freq != rps->max_freq) {
977                 pr_err("%s: RPS did not program max frequency\n",
978                        engine->name);
979                 return -EINVAL;
980         }
981
982         timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
983         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
984         timeout = DIV_ROUND_UP(timeout, 1000);
985
986         sleep_for_ei(rps, timeout);
987
988         if (rps->cur_freq != rps->max_freq) {
989                 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
990                        engine->name,
991                        intel_rps_read_actual_frequency(rps));
992                 return -EINVAL;
993         }
994
995         if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
996                 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
997                        engine->name, rps->pm_iir,
998                        intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
999                        intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1000                        intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1001                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1002                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1003                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
1004                 return -EINVAL;
1005         }
1006
1007         return 0;
1008 }
1009
1010 int live_rps_interrupt(void *arg)
1011 {
1012         struct intel_gt *gt = arg;
1013         struct intel_rps *rps = &gt->rps;
1014         void (*saved_work)(struct work_struct *wrk);
1015         struct intel_engine_cs *engine;
1016         enum intel_engine_id id;
1017         struct igt_spinner spin;
1018         u32 pm_events;
1019         int err = 0;
1020
1021         /*
1022          * First, let's check whether or not we are receiving interrupts.
1023          */
1024
1025         if (!intel_rps_has_interrupts(rps))
1026                 return 0;
1027
1028         intel_gt_pm_get(gt);
1029         pm_events = rps->pm_events;
1030         intel_gt_pm_put(gt);
1031         if (!pm_events) {
1032                 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1033                 return -ENODEV;
1034         }
1035
1036         if (igt_spinner_init(&spin, gt))
1037                 return -ENOMEM;
1038
1039         intel_gt_pm_wait_for_idle(gt);
1040         saved_work = rps->work.func;
1041         rps->work.func = dummy_rps_work;
1042
1043         for_each_engine(engine, gt, id) {
1044                 /* Keep the engine busy with a spinner; expect an UP! */
1045                 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1046                         intel_gt_pm_wait_for_idle(engine->gt);
1047                         GEM_BUG_ON(intel_rps_is_active(rps));
1048
1049                         st_engine_heartbeat_disable(engine);
1050
1051                         err = __rps_up_interrupt(rps, engine, &spin);
1052
1053                         st_engine_heartbeat_enable(engine);
1054                         if (err)
1055                                 goto out;
1056
1057                         intel_gt_pm_wait_for_idle(engine->gt);
1058                 }
1059
1060                 /* Keep the engine awake but idle and check for DOWN */
1061                 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1062                         st_engine_heartbeat_disable(engine);
1063                         intel_rc6_disable(&gt->rc6);
1064
1065                         err = __rps_down_interrupt(rps, engine);
1066
1067                         intel_rc6_enable(&gt->rc6);
1068                         st_engine_heartbeat_enable(engine);
1069                         if (err)
1070                                 goto out;
1071                 }
1072         }
1073
1074 out:
1075         if (igt_flush_test(gt->i915))
1076                 err = -EIO;
1077
1078         igt_spinner_fini(&spin);
1079
1080         intel_gt_pm_wait_for_idle(gt);
1081         rps->work.func = saved_work;
1082
1083         return err;
1084 }
1085
1086 static u64 __measure_power(int duration_ms)
1087 {
1088         u64 dE, dt;
1089
1090         dt = ktime_get();
1091         dE = librapl_energy_uJ();
1092         usleep_range(1000 * duration_ms, 2000 * duration_ms);
1093         dE = librapl_energy_uJ() - dE;
1094         dt = ktime_get() - dt;
1095
1096         return div64_u64(1000 * 1000 * dE, dt);
1097 }
1098
1099 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1100 {
1101         u64 x[5];
1102         int i;
1103
1104         *freq = rps_set_check(rps, *freq);
1105         for (i = 0; i < 5; i++)
1106                 x[i] = __measure_power(5);
1107         *freq = (*freq + read_cagf(rps)) / 2;
1108
1109         /* A simple triangle filter for better result stability */
1110         sort(x, 5, sizeof(*x), cmp_u64, NULL);
1111         return div_u64(x[1] + 2 * x[2] + x[3], 4);
1112 }
1113
1114 int live_rps_power(void *arg)
1115 {
1116         struct intel_gt *gt = arg;
1117         struct intel_rps *rps = &gt->rps;
1118         void (*saved_work)(struct work_struct *wrk);
1119         struct intel_engine_cs *engine;
1120         enum intel_engine_id id;
1121         struct igt_spinner spin;
1122         int err = 0;
1123
1124         /*
1125          * Our fundamental assumption is that running at lower frequency
1126          * actually saves power. Let's see if our RAPL measurement support
1127          * that theory.
1128          */
1129
1130         if (!intel_rps_is_enabled(rps))
1131                 return 0;
1132
1133         if (!librapl_energy_uJ())
1134                 return 0;
1135
1136         if (igt_spinner_init(&spin, gt))
1137                 return -ENOMEM;
1138
1139         intel_gt_pm_wait_for_idle(gt);
1140         saved_work = rps->work.func;
1141         rps->work.func = dummy_rps_work;
1142
1143         for_each_engine(engine, gt, id) {
1144                 struct i915_request *rq;
1145                 struct {
1146                         u64 power;
1147                         int freq;
1148                 } min, max;
1149
1150                 if (!intel_engine_can_store_dword(engine))
1151                         continue;
1152
1153                 st_engine_heartbeat_disable(engine);
1154
1155                 rq = igt_spinner_create_request(&spin,
1156                                                 engine->kernel_context,
1157                                                 MI_NOOP);
1158                 if (IS_ERR(rq)) {
1159                         st_engine_heartbeat_enable(engine);
1160                         err = PTR_ERR(rq);
1161                         break;
1162                 }
1163
1164                 i915_request_add(rq);
1165
1166                 if (!igt_wait_for_spinner(&spin, rq)) {
1167                         pr_err("%s: RPS spinner did not start\n",
1168                                engine->name);
1169                         igt_spinner_end(&spin);
1170                         st_engine_heartbeat_enable(engine);
1171                         intel_gt_set_wedged(engine->gt);
1172                         err = -EIO;
1173                         break;
1174                 }
1175
1176                 max.freq = rps->max_freq;
1177                 max.power = measure_power_at(rps, &max.freq);
1178
1179                 min.freq = rps->min_freq;
1180                 min.power = measure_power_at(rps, &min.freq);
1181
1182                 igt_spinner_end(&spin);
1183                 st_engine_heartbeat_enable(engine);
1184
1185                 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1186                         engine->name,
1187                         min.power, intel_gpu_freq(rps, min.freq),
1188                         max.power, intel_gpu_freq(rps, max.freq));
1189
1190                 if (10 * min.freq >= 9 * max.freq) {
1191                         pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1192                                   min.freq, intel_gpu_freq(rps, min.freq),
1193                                   max.freq, intel_gpu_freq(rps, max.freq));
1194                         continue;
1195                 }
1196
1197                 if (11 * min.power > 10 * max.power) {
1198                         pr_err("%s: did not conserve power when setting lower frequency!\n",
1199                                engine->name);
1200                         err = -EINVAL;
1201                         break;
1202                 }
1203
1204                 if (igt_flush_test(gt->i915)) {
1205                         err = -EIO;
1206                         break;
1207                 }
1208         }
1209
1210         igt_spinner_fini(&spin);
1211
1212         intel_gt_pm_wait_for_idle(gt);
1213         rps->work.func = saved_work;
1214
1215         return err;
1216 }
1217
1218 int live_rps_dynamic(void *arg)
1219 {
1220         struct intel_gt *gt = arg;
1221         struct intel_rps *rps = &gt->rps;
1222         struct intel_engine_cs *engine;
1223         enum intel_engine_id id;
1224         struct igt_spinner spin;
1225         int err = 0;
1226
1227         /*
1228          * We've looked at the bascs, and have established that we
1229          * can change the clock frequency and that the HW will generate
1230          * interrupts based on load. Now we check how we integrate those
1231          * moving parts into dynamic reclocking based on load.
1232          */
1233
1234         if (!intel_rps_is_enabled(rps))
1235                 return 0;
1236
1237         if (igt_spinner_init(&spin, gt))
1238                 return -ENOMEM;
1239
1240         if (intel_rps_has_interrupts(rps))
1241                 pr_info("RPS has interrupt support\n");
1242         if (intel_rps_uses_timer(rps))
1243                 pr_info("RPS has timer support\n");
1244
1245         for_each_engine(engine, gt, id) {
1246                 struct i915_request *rq;
1247                 struct {
1248                         ktime_t dt;
1249                         u8 freq;
1250                 } min, max;
1251
1252                 if (!intel_engine_can_store_dword(engine))
1253                         continue;
1254
1255                 intel_gt_pm_wait_for_idle(gt);
1256                 GEM_BUG_ON(intel_rps_is_active(rps));
1257                 rps->cur_freq = rps->min_freq;
1258
1259                 intel_engine_pm_get(engine);
1260                 intel_rc6_disable(&gt->rc6);
1261                 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1262
1263                 rq = igt_spinner_create_request(&spin,
1264                                                 engine->kernel_context,
1265                                                 MI_NOOP);
1266                 if (IS_ERR(rq)) {
1267                         err = PTR_ERR(rq);
1268                         goto err;
1269                 }
1270
1271                 i915_request_add(rq);
1272
1273                 max.dt = ktime_get();
1274                 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1275                 max.dt = ktime_sub(ktime_get(), max.dt);
1276
1277                 igt_spinner_end(&spin);
1278
1279                 min.dt = ktime_get();
1280                 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1281                 min.dt = ktime_sub(ktime_get(), min.dt);
1282
1283                 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1284                         engine->name,
1285                         max.freq, intel_gpu_freq(rps, max.freq),
1286                         ktime_to_ns(max.dt),
1287                         min.freq, intel_gpu_freq(rps, min.freq),
1288                         ktime_to_ns(min.dt));
1289                 if (min.freq >= max.freq) {
1290                         pr_err("%s: dynamic reclocking of spinner failed\n!",
1291                                engine->name);
1292                         err = -EINVAL;
1293                 }
1294
1295 err:
1296                 intel_rc6_enable(&gt->rc6);
1297                 intel_engine_pm_put(engine);
1298
1299                 if (igt_flush_test(gt->i915))
1300                         err = -EIO;
1301                 if (err)
1302                         break;
1303         }
1304
1305         igt_spinner_fini(&spin);
1306
1307         return err;
1308 }