Merge tag 'for-linus-5.15-1' of git://github.com/cminyard/linux-ipmi
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / selftest_rps.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt_clock_utils.h"
13 #include "intel_gt_pm.h"
14 #include "intel_rc6.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftest_rps.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/librapl.h"
20
21 /* Try to isolate the impact of cstates from determing frequency response */
22 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
23
24 static void dummy_rps_work(struct work_struct *wrk)
25 {
26 }
27
28 static int cmp_u64(const void *A, const void *B)
29 {
30         const u64 *a = A, *b = B;
31
32         if (*a < *b)
33                 return -1;
34         else if (*a > *b)
35                 return 1;
36         else
37                 return 0;
38 }
39
40 static int cmp_u32(const void *A, const void *B)
41 {
42         const u32 *a = A, *b = B;
43
44         if (*a < *b)
45                 return -1;
46         else if (*a > *b)
47                 return 1;
48         else
49                 return 0;
50 }
51
52 static struct i915_vma *
53 create_spin_counter(struct intel_engine_cs *engine,
54                     struct i915_address_space *vm,
55                     bool srm,
56                     u32 **cancel,
57                     u32 **counter)
58 {
59         enum {
60                 COUNT,
61                 INC,
62                 __NGPR__,
63         };
64 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
65         struct drm_i915_gem_object *obj;
66         struct i915_vma *vma;
67         unsigned long end;
68         u32 *base, *cs;
69         int loop, i;
70         int err;
71
72         obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
73         if (IS_ERR(obj))
74                 return ERR_CAST(obj);
75
76         end = obj->base.size / sizeof(u32) - 1;
77
78         vma = i915_vma_instance(obj, vm, NULL);
79         if (IS_ERR(vma)) {
80                 err = PTR_ERR(vma);
81                 goto err_put;
82         }
83
84         err = i915_vma_pin(vma, 0, 0, PIN_USER);
85         if (err)
86                 goto err_unlock;
87
88         i915_vma_lock(vma);
89
90         base = i915_gem_object_pin_map(obj, I915_MAP_WC);
91         if (IS_ERR(base)) {
92                 err = PTR_ERR(base);
93                 goto err_unpin;
94         }
95         cs = base;
96
97         *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
98         for (i = 0; i < __NGPR__; i++) {
99                 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
100                 *cs++ = 0;
101                 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
102                 *cs++ = 0;
103         }
104
105         *cs++ = MI_LOAD_REGISTER_IMM(1);
106         *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
107         *cs++ = 1;
108
109         loop = cs - base;
110
111         /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
112         for (i = 0; i < 1024; i++) {
113                 *cs++ = MI_MATH(4);
114                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
115                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
116                 *cs++ = MI_MATH_ADD;
117                 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
118
119                 if (srm) {
120                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
121                         *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
122                         *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
123                         *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
124                 }
125         }
126
127         *cs++ = MI_BATCH_BUFFER_START_GEN8;
128         *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
129         *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
130         GEM_BUG_ON(cs - base > end);
131
132         i915_gem_object_flush_map(obj);
133
134         *cancel = base + loop;
135         *counter = srm ? memset32(base + end, 0, 1) : NULL;
136         return vma;
137
138 err_unpin:
139         i915_vma_unpin(vma);
140 err_unlock:
141         i915_vma_unlock(vma);
142 err_put:
143         i915_gem_object_put(obj);
144         return ERR_PTR(err);
145 }
146
147 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
148 {
149         u8 history[64], i;
150         unsigned long end;
151         int sleep;
152
153         i = 0;
154         memset(history, freq, sizeof(history));
155         sleep = 20;
156
157         /* The PCU does not change instantly, but drifts towards the goal? */
158         end = jiffies + msecs_to_jiffies(timeout_ms);
159         do {
160                 u8 act;
161
162                 act = read_cagf(rps);
163                 if (time_after(jiffies, end))
164                         return act;
165
166                 /* Target acquired */
167                 if (act == freq)
168                         return act;
169
170                 /* Any change within the last N samples? */
171                 if (!memchr_inv(history, act, sizeof(history)))
172                         return act;
173
174                 history[i] = act;
175                 i = (i + 1) % ARRAY_SIZE(history);
176
177                 usleep_range(sleep, 2 * sleep);
178                 sleep *= 2;
179                 if (sleep > timeout_ms * 20)
180                         sleep = timeout_ms * 20;
181         } while (1);
182 }
183
184 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
185 {
186         mutex_lock(&rps->lock);
187         GEM_BUG_ON(!intel_rps_is_active(rps));
188         if (wait_for(!intel_rps_set(rps, freq), 50)) {
189                 mutex_unlock(&rps->lock);
190                 return 0;
191         }
192         GEM_BUG_ON(rps->last_freq != freq);
193         mutex_unlock(&rps->lock);
194
195         return wait_for_freq(rps, freq, 50);
196 }
197
198 static void show_pstate_limits(struct intel_rps *rps)
199 {
200         struct drm_i915_private *i915 = rps_to_i915(rps);
201
202         if (IS_BROXTON(i915)) {
203                 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
204                         i915_mmio_reg_offset(BXT_RP_STATE_CAP),
205                         intel_uncore_read(rps_to_uncore(rps),
206                                           BXT_RP_STATE_CAP));
207         } else if (GRAPHICS_VER(i915) == 9) {
208                 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
209                         i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
210                         intel_uncore_read(rps_to_uncore(rps),
211                                           GEN9_RP_STATE_LIMITS));
212         }
213 }
214
215 int live_rps_clock_interval(void *arg)
216 {
217         struct intel_gt *gt = arg;
218         struct intel_rps *rps = &gt->rps;
219         void (*saved_work)(struct work_struct *wrk);
220         struct intel_engine_cs *engine;
221         enum intel_engine_id id;
222         struct igt_spinner spin;
223         int err = 0;
224
225         if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
226                 return 0;
227
228         if (igt_spinner_init(&spin, gt))
229                 return -ENOMEM;
230
231         intel_gt_pm_wait_for_idle(gt);
232         saved_work = rps->work.func;
233         rps->work.func = dummy_rps_work;
234
235         intel_gt_pm_get(gt);
236         intel_rps_disable(&gt->rps);
237
238         intel_gt_check_clock_frequency(gt);
239
240         for_each_engine(engine, gt, id) {
241                 struct i915_request *rq;
242                 u32 cycles;
243                 u64 dt;
244
245                 if (!intel_engine_can_store_dword(engine))
246                         continue;
247
248                 st_engine_heartbeat_disable(engine);
249
250                 rq = igt_spinner_create_request(&spin,
251                                                 engine->kernel_context,
252                                                 MI_NOOP);
253                 if (IS_ERR(rq)) {
254                         st_engine_heartbeat_enable(engine);
255                         err = PTR_ERR(rq);
256                         break;
257                 }
258
259                 i915_request_add(rq);
260
261                 if (!igt_wait_for_spinner(&spin, rq)) {
262                         pr_err("%s: RPS spinner did not start\n",
263                                engine->name);
264                         igt_spinner_end(&spin);
265                         st_engine_heartbeat_enable(engine);
266                         intel_gt_set_wedged(engine->gt);
267                         err = -EIO;
268                         break;
269                 }
270
271                 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
272
273                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
274
275                 /* Set the evaluation interval to infinity! */
276                 intel_uncore_write_fw(gt->uncore,
277                                       GEN6_RP_UP_EI, 0xffffffff);
278                 intel_uncore_write_fw(gt->uncore,
279                                       GEN6_RP_UP_THRESHOLD, 0xffffffff);
280
281                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
282                                       GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
283
284                 if (wait_for(intel_uncore_read_fw(gt->uncore,
285                                                   GEN6_RP_CUR_UP_EI),
286                              10)) {
287                         /* Just skip the test; assume lack of HW support */
288                         pr_notice("%s: rps evaluation interval not ticking\n",
289                                   engine->name);
290                         err = -ENODEV;
291                 } else {
292                         ktime_t dt_[5];
293                         u32 cycles_[5];
294                         int i;
295
296                         for (i = 0; i < 5; i++) {
297                                 preempt_disable();
298
299                                 dt_[i] = ktime_get();
300                                 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
301
302                                 udelay(1000);
303
304                                 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
305                                 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
306
307                                 preempt_enable();
308                         }
309
310                         /* Use the median of both cycle/dt; close enough */
311                         sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
312                         cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
313                         sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
314                         dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
315                 }
316
317                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
318                 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
319
320                 igt_spinner_end(&spin);
321                 st_engine_heartbeat_enable(engine);
322
323                 if (err == 0) {
324                         u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
325                         u32 expected =
326                                 intel_gt_ns_to_pm_interval(gt, dt);
327
328                         pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
329                                 engine->name, cycles, time, dt, expected,
330                                 gt->clock_frequency / 1000);
331
332                         if (10 * time < 8 * dt ||
333                             8 * time > 10 * dt) {
334                                 pr_err("%s: rps clock time does not match walltime!\n",
335                                        engine->name);
336                                 err = -EINVAL;
337                         }
338
339                         if (10 * expected < 8 * cycles ||
340                             8 * expected > 10 * cycles) {
341                                 pr_err("%s: walltime does not match rps clock ticks!\n",
342                                        engine->name);
343                                 err = -EINVAL;
344                         }
345                 }
346
347                 if (igt_flush_test(gt->i915))
348                         err = -EIO;
349
350                 break; /* once is enough */
351         }
352
353         intel_rps_enable(&gt->rps);
354         intel_gt_pm_put(gt);
355
356         igt_spinner_fini(&spin);
357
358         intel_gt_pm_wait_for_idle(gt);
359         rps->work.func = saved_work;
360
361         if (err == -ENODEV) /* skipped, don't report a fail */
362                 err = 0;
363
364         return err;
365 }
366
367 int live_rps_control(void *arg)
368 {
369         struct intel_gt *gt = arg;
370         struct intel_rps *rps = &gt->rps;
371         void (*saved_work)(struct work_struct *wrk);
372         struct intel_engine_cs *engine;
373         enum intel_engine_id id;
374         struct igt_spinner spin;
375         int err = 0;
376
377         /*
378          * Check that the actual frequency matches our requested frequency,
379          * to verify our control mechanism. We have to be careful that the
380          * PCU may throttle the GPU in which case the actual frequency used
381          * will be lowered than requested.
382          */
383
384         if (!intel_rps_is_enabled(rps))
385                 return 0;
386
387         if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
388                 return 0;
389
390         if (igt_spinner_init(&spin, gt))
391                 return -ENOMEM;
392
393         intel_gt_pm_wait_for_idle(gt);
394         saved_work = rps->work.func;
395         rps->work.func = dummy_rps_work;
396
397         intel_gt_pm_get(gt);
398         for_each_engine(engine, gt, id) {
399                 struct i915_request *rq;
400                 ktime_t min_dt, max_dt;
401                 int f, limit;
402                 int min, max;
403
404                 if (!intel_engine_can_store_dword(engine))
405                         continue;
406
407                 st_engine_heartbeat_disable(engine);
408
409                 rq = igt_spinner_create_request(&spin,
410                                                 engine->kernel_context,
411                                                 MI_NOOP);
412                 if (IS_ERR(rq)) {
413                         err = PTR_ERR(rq);
414                         break;
415                 }
416
417                 i915_request_add(rq);
418
419                 if (!igt_wait_for_spinner(&spin, rq)) {
420                         pr_err("%s: RPS spinner did not start\n",
421                                engine->name);
422                         igt_spinner_end(&spin);
423                         st_engine_heartbeat_enable(engine);
424                         intel_gt_set_wedged(engine->gt);
425                         err = -EIO;
426                         break;
427                 }
428
429                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
430                         pr_err("%s: could not set minimum frequency [%x], only %x!\n",
431                                engine->name, rps->min_freq, read_cagf(rps));
432                         igt_spinner_end(&spin);
433                         st_engine_heartbeat_enable(engine);
434                         show_pstate_limits(rps);
435                         err = -EINVAL;
436                         break;
437                 }
438
439                 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
440                         if (rps_set_check(rps, f) < f)
441                                 break;
442                 }
443
444                 limit = rps_set_check(rps, f);
445
446                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
447                         pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
448                                engine->name, rps->min_freq, read_cagf(rps));
449                         igt_spinner_end(&spin);
450                         st_engine_heartbeat_enable(engine);
451                         show_pstate_limits(rps);
452                         err = -EINVAL;
453                         break;
454                 }
455
456                 max_dt = ktime_get();
457                 max = rps_set_check(rps, limit);
458                 max_dt = ktime_sub(ktime_get(), max_dt);
459
460                 min_dt = ktime_get();
461                 min = rps_set_check(rps, rps->min_freq);
462                 min_dt = ktime_sub(ktime_get(), min_dt);
463
464                 igt_spinner_end(&spin);
465                 st_engine_heartbeat_enable(engine);
466
467                 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
468                         engine->name,
469                         rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
470                         rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
471                         limit, intel_gpu_freq(rps, limit),
472                         min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
473
474                 if (limit == rps->min_freq) {
475                         pr_err("%s: GPU throttled to minimum!\n",
476                                engine->name);
477                         show_pstate_limits(rps);
478                         err = -ENODEV;
479                         break;
480                 }
481
482                 if (igt_flush_test(gt->i915)) {
483                         err = -EIO;
484                         break;
485                 }
486         }
487         intel_gt_pm_put(gt);
488
489         igt_spinner_fini(&spin);
490
491         intel_gt_pm_wait_for_idle(gt);
492         rps->work.func = saved_work;
493
494         return err;
495 }
496
497 static void show_pcu_config(struct intel_rps *rps)
498 {
499         struct drm_i915_private *i915 = rps_to_i915(rps);
500         unsigned int max_gpu_freq, min_gpu_freq;
501         intel_wakeref_t wakeref;
502         int gpu_freq;
503
504         if (!HAS_LLC(i915))
505                 return;
506
507         min_gpu_freq = rps->min_freq;
508         max_gpu_freq = rps->max_freq;
509         if (GRAPHICS_VER(i915) >= 9) {
510                 /* Convert GT frequency to 50 HZ units */
511                 min_gpu_freq /= GEN9_FREQ_SCALER;
512                 max_gpu_freq /= GEN9_FREQ_SCALER;
513         }
514
515         wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
516
517         pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
518         for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
519                 int ia_freq = gpu_freq;
520
521                 sandybridge_pcode_read(i915,
522                                        GEN6_PCODE_READ_MIN_FREQ_TABLE,
523                                        &ia_freq, NULL);
524
525                 pr_info("%5d  %5d  %5d\n",
526                         gpu_freq * 50,
527                         ((ia_freq >> 0) & 0xff) * 100,
528                         ((ia_freq >> 8) & 0xff) * 100);
529         }
530
531         intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
532 }
533
534 static u64 __measure_frequency(u32 *cntr, int duration_ms)
535 {
536         u64 dc, dt;
537
538         dt = ktime_get();
539         dc = READ_ONCE(*cntr);
540         usleep_range(1000 * duration_ms, 2000 * duration_ms);
541         dc = READ_ONCE(*cntr) - dc;
542         dt = ktime_get() - dt;
543
544         return div64_u64(1000 * 1000 * dc, dt);
545 }
546
547 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
548 {
549         u64 x[5];
550         int i;
551
552         *freq = rps_set_check(rps, *freq);
553         for (i = 0; i < 5; i++)
554                 x[i] = __measure_frequency(cntr, 2);
555         *freq = (*freq + read_cagf(rps)) / 2;
556
557         /* A simple triangle filter for better result stability */
558         sort(x, 5, sizeof(*x), cmp_u64, NULL);
559         return div_u64(x[1] + 2 * x[2] + x[3], 4);
560 }
561
562 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
563                                   int duration_ms)
564 {
565         u64 dc, dt;
566
567         dt = ktime_get();
568         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
569         usleep_range(1000 * duration_ms, 2000 * duration_ms);
570         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
571         dt = ktime_get() - dt;
572
573         return div64_u64(1000 * 1000 * dc, dt);
574 }
575
576 static u64 measure_cs_frequency_at(struct intel_rps *rps,
577                                    struct intel_engine_cs *engine,
578                                    int *freq)
579 {
580         u64 x[5];
581         int i;
582
583         *freq = rps_set_check(rps, *freq);
584         for (i = 0; i < 5; i++)
585                 x[i] = __measure_cs_frequency(engine, 2);
586         *freq = (*freq + read_cagf(rps)) / 2;
587
588         /* A simple triangle filter for better result stability */
589         sort(x, 5, sizeof(*x), cmp_u64, NULL);
590         return div_u64(x[1] + 2 * x[2] + x[3], 4);
591 }
592
593 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
594 {
595         return f_d * x > f_n * y && f_n * x < f_d * y;
596 }
597
598 int live_rps_frequency_cs(void *arg)
599 {
600         void (*saved_work)(struct work_struct *wrk);
601         struct intel_gt *gt = arg;
602         struct intel_rps *rps = &gt->rps;
603         struct intel_engine_cs *engine;
604         struct pm_qos_request qos;
605         enum intel_engine_id id;
606         int err = 0;
607
608         /*
609          * The premise is that the GPU does change frequency at our behest.
610          * Let's check there is a correspondence between the requested
611          * frequency, the actual frequency, and the observed clock rate.
612          */
613
614         if (!intel_rps_is_enabled(rps))
615                 return 0;
616
617         if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
618                 return 0;
619
620         if (CPU_LATENCY >= 0)
621                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
622
623         intel_gt_pm_wait_for_idle(gt);
624         saved_work = rps->work.func;
625         rps->work.func = dummy_rps_work;
626
627         for_each_engine(engine, gt, id) {
628                 struct i915_request *rq;
629                 struct i915_vma *vma;
630                 u32 *cancel, *cntr;
631                 struct {
632                         u64 count;
633                         int freq;
634                 } min, max;
635
636                 st_engine_heartbeat_disable(engine);
637
638                 vma = create_spin_counter(engine,
639                                           engine->kernel_context->vm, false,
640                                           &cancel, &cntr);
641                 if (IS_ERR(vma)) {
642                         err = PTR_ERR(vma);
643                         st_engine_heartbeat_enable(engine);
644                         break;
645                 }
646
647                 rq = intel_engine_create_kernel_request(engine);
648                 if (IS_ERR(rq)) {
649                         err = PTR_ERR(rq);
650                         goto err_vma;
651                 }
652
653                 err = i915_request_await_object(rq, vma->obj, false);
654                 if (!err)
655                         err = i915_vma_move_to_active(vma, rq, 0);
656                 if (!err)
657                         err = rq->engine->emit_bb_start(rq,
658                                                         vma->node.start,
659                                                         PAGE_SIZE, 0);
660                 i915_request_add(rq);
661                 if (err)
662                         goto err_vma;
663
664                 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
665                              10)) {
666                         pr_err("%s: timed loop did not start\n",
667                                engine->name);
668                         goto err_vma;
669                 }
670
671                 min.freq = rps->min_freq;
672                 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
673
674                 max.freq = rps->max_freq;
675                 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
676
677                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
678                         engine->name,
679                         min.count, intel_gpu_freq(rps, min.freq),
680                         max.count, intel_gpu_freq(rps, max.freq),
681                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
682                                                      max.freq * min.count));
683
684                 if (!scaled_within(max.freq * min.count,
685                                    min.freq * max.count,
686                                    2, 3)) {
687                         int f;
688
689                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
690                                engine->name,
691                                max.freq * min.count,
692                                min.freq * max.count);
693                         show_pcu_config(rps);
694
695                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
696                                 int act = f;
697                                 u64 count;
698
699                                 count = measure_cs_frequency_at(rps, engine, &act);
700                                 if (act < f)
701                                         break;
702
703                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
704                                         engine->name,
705                                         act, intel_gpu_freq(rps, act), count,
706                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
707                                                                      act * min.count));
708
709                                 f = act; /* may skip ahead [pcu granularity] */
710                         }
711
712                         err = -EINTR; /* ignore error, continue on with test */
713                 }
714
715 err_vma:
716                 *cancel = MI_BATCH_BUFFER_END;
717                 i915_gem_object_flush_map(vma->obj);
718                 i915_gem_object_unpin_map(vma->obj);
719                 i915_vma_unpin(vma);
720                 i915_vma_unlock(vma);
721                 i915_vma_put(vma);
722
723                 st_engine_heartbeat_enable(engine);
724                 if (igt_flush_test(gt->i915))
725                         err = -EIO;
726                 if (err)
727                         break;
728         }
729
730         intel_gt_pm_wait_for_idle(gt);
731         rps->work.func = saved_work;
732
733         if (CPU_LATENCY >= 0)
734                 cpu_latency_qos_remove_request(&qos);
735
736         return err;
737 }
738
739 int live_rps_frequency_srm(void *arg)
740 {
741         void (*saved_work)(struct work_struct *wrk);
742         struct intel_gt *gt = arg;
743         struct intel_rps *rps = &gt->rps;
744         struct intel_engine_cs *engine;
745         struct pm_qos_request qos;
746         enum intel_engine_id id;
747         int err = 0;
748
749         /*
750          * The premise is that the GPU does change frequency at our behest.
751          * Let's check there is a correspondence between the requested
752          * frequency, the actual frequency, and the observed clock rate.
753          */
754
755         if (!intel_rps_is_enabled(rps))
756                 return 0;
757
758         if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
759                 return 0;
760
761         if (CPU_LATENCY >= 0)
762                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
763
764         intel_gt_pm_wait_for_idle(gt);
765         saved_work = rps->work.func;
766         rps->work.func = dummy_rps_work;
767
768         for_each_engine(engine, gt, id) {
769                 struct i915_request *rq;
770                 struct i915_vma *vma;
771                 u32 *cancel, *cntr;
772                 struct {
773                         u64 count;
774                         int freq;
775                 } min, max;
776
777                 st_engine_heartbeat_disable(engine);
778
779                 vma = create_spin_counter(engine,
780                                           engine->kernel_context->vm, true,
781                                           &cancel, &cntr);
782                 if (IS_ERR(vma)) {
783                         err = PTR_ERR(vma);
784                         st_engine_heartbeat_enable(engine);
785                         break;
786                 }
787
788                 rq = intel_engine_create_kernel_request(engine);
789                 if (IS_ERR(rq)) {
790                         err = PTR_ERR(rq);
791                         goto err_vma;
792                 }
793
794                 err = i915_request_await_object(rq, vma->obj, false);
795                 if (!err)
796                         err = i915_vma_move_to_active(vma, rq, 0);
797                 if (!err)
798                         err = rq->engine->emit_bb_start(rq,
799                                                         vma->node.start,
800                                                         PAGE_SIZE, 0);
801                 i915_request_add(rq);
802                 if (err)
803                         goto err_vma;
804
805                 if (wait_for(READ_ONCE(*cntr), 10)) {
806                         pr_err("%s: timed loop did not start\n",
807                                engine->name);
808                         goto err_vma;
809                 }
810
811                 min.freq = rps->min_freq;
812                 min.count = measure_frequency_at(rps, cntr, &min.freq);
813
814                 max.freq = rps->max_freq;
815                 max.count = measure_frequency_at(rps, cntr, &max.freq);
816
817                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
818                         engine->name,
819                         min.count, intel_gpu_freq(rps, min.freq),
820                         max.count, intel_gpu_freq(rps, max.freq),
821                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
822                                                      max.freq * min.count));
823
824                 if (!scaled_within(max.freq * min.count,
825                                    min.freq * max.count,
826                                    1, 2)) {
827                         int f;
828
829                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
830                                engine->name,
831                                max.freq * min.count,
832                                min.freq * max.count);
833                         show_pcu_config(rps);
834
835                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
836                                 int act = f;
837                                 u64 count;
838
839                                 count = measure_frequency_at(rps, cntr, &act);
840                                 if (act < f)
841                                         break;
842
843                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
844                                         engine->name,
845                                         act, intel_gpu_freq(rps, act), count,
846                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
847                                                                      act * min.count));
848
849                                 f = act; /* may skip ahead [pcu granularity] */
850                         }
851
852                         err = -EINTR; /* ignore error, continue on with test */
853                 }
854
855 err_vma:
856                 *cancel = MI_BATCH_BUFFER_END;
857                 i915_gem_object_flush_map(vma->obj);
858                 i915_gem_object_unpin_map(vma->obj);
859                 i915_vma_unpin(vma);
860                 i915_vma_unlock(vma);
861                 i915_vma_put(vma);
862
863                 st_engine_heartbeat_enable(engine);
864                 if (igt_flush_test(gt->i915))
865                         err = -EIO;
866                 if (err)
867                         break;
868         }
869
870         intel_gt_pm_wait_for_idle(gt);
871         rps->work.func = saved_work;
872
873         if (CPU_LATENCY >= 0)
874                 cpu_latency_qos_remove_request(&qos);
875
876         return err;
877 }
878
879 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
880 {
881         /* Flush any previous EI */
882         usleep_range(timeout_us, 2 * timeout_us);
883
884         /* Reset the interrupt status */
885         rps_disable_interrupts(rps);
886         GEM_BUG_ON(rps->pm_iir);
887         rps_enable_interrupts(rps);
888
889         /* And then wait for the timeout, for real this time */
890         usleep_range(2 * timeout_us, 3 * timeout_us);
891 }
892
893 static int __rps_up_interrupt(struct intel_rps *rps,
894                               struct intel_engine_cs *engine,
895                               struct igt_spinner *spin)
896 {
897         struct intel_uncore *uncore = engine->uncore;
898         struct i915_request *rq;
899         u32 timeout;
900
901         if (!intel_engine_can_store_dword(engine))
902                 return 0;
903
904         rps_set_check(rps, rps->min_freq);
905
906         rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
907         if (IS_ERR(rq))
908                 return PTR_ERR(rq);
909
910         i915_request_get(rq);
911         i915_request_add(rq);
912
913         if (!igt_wait_for_spinner(spin, rq)) {
914                 pr_err("%s: RPS spinner did not start\n",
915                        engine->name);
916                 i915_request_put(rq);
917                 intel_gt_set_wedged(engine->gt);
918                 return -EIO;
919         }
920
921         if (!intel_rps_is_active(rps)) {
922                 pr_err("%s: RPS not enabled on starting spinner\n",
923                        engine->name);
924                 igt_spinner_end(spin);
925                 i915_request_put(rq);
926                 return -EINVAL;
927         }
928
929         if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
930                 pr_err("%s: RPS did not register UP interrupt\n",
931                        engine->name);
932                 i915_request_put(rq);
933                 return -EINVAL;
934         }
935
936         if (rps->last_freq != rps->min_freq) {
937                 pr_err("%s: RPS did not program min frequency\n",
938                        engine->name);
939                 i915_request_put(rq);
940                 return -EINVAL;
941         }
942
943         timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
944         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
945         timeout = DIV_ROUND_UP(timeout, 1000);
946
947         sleep_for_ei(rps, timeout);
948         GEM_BUG_ON(i915_request_completed(rq));
949
950         igt_spinner_end(spin);
951         i915_request_put(rq);
952
953         if (rps->cur_freq != rps->min_freq) {
954                 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
955                        engine->name, intel_rps_read_actual_frequency(rps));
956                 return -EINVAL;
957         }
958
959         if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
960                 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
961                        engine->name, rps->pm_iir,
962                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
963                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
964                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
965                 return -EINVAL;
966         }
967
968         return 0;
969 }
970
971 static int __rps_down_interrupt(struct intel_rps *rps,
972                                 struct intel_engine_cs *engine)
973 {
974         struct intel_uncore *uncore = engine->uncore;
975         u32 timeout;
976
977         rps_set_check(rps, rps->max_freq);
978
979         if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
980                 pr_err("%s: RPS did not register DOWN interrupt\n",
981                        engine->name);
982                 return -EINVAL;
983         }
984
985         if (rps->last_freq != rps->max_freq) {
986                 pr_err("%s: RPS did not program max frequency\n",
987                        engine->name);
988                 return -EINVAL;
989         }
990
991         timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
992         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
993         timeout = DIV_ROUND_UP(timeout, 1000);
994
995         sleep_for_ei(rps, timeout);
996
997         if (rps->cur_freq != rps->max_freq) {
998                 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
999                        engine->name,
1000                        intel_rps_read_actual_frequency(rps));
1001                 return -EINVAL;
1002         }
1003
1004         if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1005                 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1006                        engine->name, rps->pm_iir,
1007                        intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1008                        intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1009                        intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1010                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1011                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1012                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
1013                 return -EINVAL;
1014         }
1015
1016         return 0;
1017 }
1018
1019 int live_rps_interrupt(void *arg)
1020 {
1021         struct intel_gt *gt = arg;
1022         struct intel_rps *rps = &gt->rps;
1023         void (*saved_work)(struct work_struct *wrk);
1024         struct intel_engine_cs *engine;
1025         enum intel_engine_id id;
1026         struct igt_spinner spin;
1027         u32 pm_events;
1028         int err = 0;
1029
1030         /*
1031          * First, let's check whether or not we are receiving interrupts.
1032          */
1033
1034         if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1035                 return 0;
1036
1037         intel_gt_pm_get(gt);
1038         pm_events = rps->pm_events;
1039         intel_gt_pm_put(gt);
1040         if (!pm_events) {
1041                 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1042                 return -ENODEV;
1043         }
1044
1045         if (igt_spinner_init(&spin, gt))
1046                 return -ENOMEM;
1047
1048         intel_gt_pm_wait_for_idle(gt);
1049         saved_work = rps->work.func;
1050         rps->work.func = dummy_rps_work;
1051
1052         for_each_engine(engine, gt, id) {
1053                 /* Keep the engine busy with a spinner; expect an UP! */
1054                 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1055                         intel_gt_pm_wait_for_idle(engine->gt);
1056                         GEM_BUG_ON(intel_rps_is_active(rps));
1057
1058                         st_engine_heartbeat_disable(engine);
1059
1060                         err = __rps_up_interrupt(rps, engine, &spin);
1061
1062                         st_engine_heartbeat_enable(engine);
1063                         if (err)
1064                                 goto out;
1065
1066                         intel_gt_pm_wait_for_idle(engine->gt);
1067                 }
1068
1069                 /* Keep the engine awake but idle and check for DOWN */
1070                 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1071                         st_engine_heartbeat_disable(engine);
1072                         intel_rc6_disable(&gt->rc6);
1073
1074                         err = __rps_down_interrupt(rps, engine);
1075
1076                         intel_rc6_enable(&gt->rc6);
1077                         st_engine_heartbeat_enable(engine);
1078                         if (err)
1079                                 goto out;
1080                 }
1081         }
1082
1083 out:
1084         if (igt_flush_test(gt->i915))
1085                 err = -EIO;
1086
1087         igt_spinner_fini(&spin);
1088
1089         intel_gt_pm_wait_for_idle(gt);
1090         rps->work.func = saved_work;
1091
1092         return err;
1093 }
1094
1095 static u64 __measure_power(int duration_ms)
1096 {
1097         u64 dE, dt;
1098
1099         dt = ktime_get();
1100         dE = librapl_energy_uJ();
1101         usleep_range(1000 * duration_ms, 2000 * duration_ms);
1102         dE = librapl_energy_uJ() - dE;
1103         dt = ktime_get() - dt;
1104
1105         return div64_u64(1000 * 1000 * dE, dt);
1106 }
1107
1108 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1109 {
1110         u64 x[5];
1111         int i;
1112
1113         *freq = rps_set_check(rps, *freq);
1114         for (i = 0; i < 5; i++)
1115                 x[i] = __measure_power(5);
1116         *freq = (*freq + read_cagf(rps)) / 2;
1117
1118         /* A simple triangle filter for better result stability */
1119         sort(x, 5, sizeof(*x), cmp_u64, NULL);
1120         return div_u64(x[1] + 2 * x[2] + x[3], 4);
1121 }
1122
1123 int live_rps_power(void *arg)
1124 {
1125         struct intel_gt *gt = arg;
1126         struct intel_rps *rps = &gt->rps;
1127         void (*saved_work)(struct work_struct *wrk);
1128         struct intel_engine_cs *engine;
1129         enum intel_engine_id id;
1130         struct igt_spinner spin;
1131         int err = 0;
1132
1133         /*
1134          * Our fundamental assumption is that running at lower frequency
1135          * actually saves power. Let's see if our RAPL measurement support
1136          * that theory.
1137          */
1138
1139         if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1140                 return 0;
1141
1142         if (!librapl_supported(gt->i915))
1143                 return 0;
1144
1145         if (igt_spinner_init(&spin, gt))
1146                 return -ENOMEM;
1147
1148         intel_gt_pm_wait_for_idle(gt);
1149         saved_work = rps->work.func;
1150         rps->work.func = dummy_rps_work;
1151
1152         for_each_engine(engine, gt, id) {
1153                 struct i915_request *rq;
1154                 struct {
1155                         u64 power;
1156                         int freq;
1157                 } min, max;
1158
1159                 if (!intel_engine_can_store_dword(engine))
1160                         continue;
1161
1162                 st_engine_heartbeat_disable(engine);
1163
1164                 rq = igt_spinner_create_request(&spin,
1165                                                 engine->kernel_context,
1166                                                 MI_NOOP);
1167                 if (IS_ERR(rq)) {
1168                         st_engine_heartbeat_enable(engine);
1169                         err = PTR_ERR(rq);
1170                         break;
1171                 }
1172
1173                 i915_request_add(rq);
1174
1175                 if (!igt_wait_for_spinner(&spin, rq)) {
1176                         pr_err("%s: RPS spinner did not start\n",
1177                                engine->name);
1178                         igt_spinner_end(&spin);
1179                         st_engine_heartbeat_enable(engine);
1180                         intel_gt_set_wedged(engine->gt);
1181                         err = -EIO;
1182                         break;
1183                 }
1184
1185                 max.freq = rps->max_freq;
1186                 max.power = measure_power_at(rps, &max.freq);
1187
1188                 min.freq = rps->min_freq;
1189                 min.power = measure_power_at(rps, &min.freq);
1190
1191                 igt_spinner_end(&spin);
1192                 st_engine_heartbeat_enable(engine);
1193
1194                 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1195                         engine->name,
1196                         min.power, intel_gpu_freq(rps, min.freq),
1197                         max.power, intel_gpu_freq(rps, max.freq));
1198
1199                 if (10 * min.freq >= 9 * max.freq) {
1200                         pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1201                                   min.freq, intel_gpu_freq(rps, min.freq),
1202                                   max.freq, intel_gpu_freq(rps, max.freq));
1203                         continue;
1204                 }
1205
1206                 if (11 * min.power > 10 * max.power) {
1207                         pr_err("%s: did not conserve power when setting lower frequency!\n",
1208                                engine->name);
1209                         err = -EINVAL;
1210                         break;
1211                 }
1212
1213                 if (igt_flush_test(gt->i915)) {
1214                         err = -EIO;
1215                         break;
1216                 }
1217         }
1218
1219         igt_spinner_fini(&spin);
1220
1221         intel_gt_pm_wait_for_idle(gt);
1222         rps->work.func = saved_work;
1223
1224         return err;
1225 }
1226
1227 int live_rps_dynamic(void *arg)
1228 {
1229         struct intel_gt *gt = arg;
1230         struct intel_rps *rps = &gt->rps;
1231         struct intel_engine_cs *engine;
1232         enum intel_engine_id id;
1233         struct igt_spinner spin;
1234         int err = 0;
1235
1236         /*
1237          * We've looked at the bascs, and have established that we
1238          * can change the clock frequency and that the HW will generate
1239          * interrupts based on load. Now we check how we integrate those
1240          * moving parts into dynamic reclocking based on load.
1241          */
1242
1243         if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1244                 return 0;
1245
1246         if (igt_spinner_init(&spin, gt))
1247                 return -ENOMEM;
1248
1249         if (intel_rps_has_interrupts(rps))
1250                 pr_info("RPS has interrupt support\n");
1251         if (intel_rps_uses_timer(rps))
1252                 pr_info("RPS has timer support\n");
1253
1254         for_each_engine(engine, gt, id) {
1255                 struct i915_request *rq;
1256                 struct {
1257                         ktime_t dt;
1258                         u8 freq;
1259                 } min, max;
1260
1261                 if (!intel_engine_can_store_dword(engine))
1262                         continue;
1263
1264                 intel_gt_pm_wait_for_idle(gt);
1265                 GEM_BUG_ON(intel_rps_is_active(rps));
1266                 rps->cur_freq = rps->min_freq;
1267
1268                 intel_engine_pm_get(engine);
1269                 intel_rc6_disable(&gt->rc6);
1270                 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1271
1272                 rq = igt_spinner_create_request(&spin,
1273                                                 engine->kernel_context,
1274                                                 MI_NOOP);
1275                 if (IS_ERR(rq)) {
1276                         err = PTR_ERR(rq);
1277                         goto err;
1278                 }
1279
1280                 i915_request_add(rq);
1281
1282                 max.dt = ktime_get();
1283                 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1284                 max.dt = ktime_sub(ktime_get(), max.dt);
1285
1286                 igt_spinner_end(&spin);
1287
1288                 min.dt = ktime_get();
1289                 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1290                 min.dt = ktime_sub(ktime_get(), min.dt);
1291
1292                 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1293                         engine->name,
1294                         max.freq, intel_gpu_freq(rps, max.freq),
1295                         ktime_to_ns(max.dt),
1296                         min.freq, intel_gpu_freq(rps, min.freq),
1297                         ktime_to_ns(min.dt));
1298                 if (min.freq >= max.freq) {
1299                         pr_err("%s: dynamic reclocking of spinner failed\n!",
1300                                engine->name);
1301                         err = -EINVAL;
1302                 }
1303
1304 err:
1305                 intel_rc6_enable(&gt->rc6);
1306                 intel_engine_pm_put(engine);
1307
1308                 if (igt_flush_test(gt->i915))
1309                         err = -EIO;
1310                 if (err)
1311                         break;
1312         }
1313
1314         igt_spinner_fini(&spin);
1315
1316         return err;
1317 }