9d9c8e0aa2e704b2e594bf07150854f78356f0bc
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / selftest_rps.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt_pm.h"
13 #include "intel_rc6.h"
14 #include "selftest_rps.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_spinner.h"
17 #include "selftests/librapl.h"
18
19 /* Try to isolate the impact of cstates from determing frequency response */
20 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
21
22 static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine)
23 {
24         unsigned long old;
25
26         old = fetch_and_zero(&engine->props.heartbeat_interval_ms);
27
28         intel_engine_pm_get(engine);
29         intel_engine_park_heartbeat(engine);
30
31         return old;
32 }
33
34 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
35                                     unsigned long saved)
36 {
37         intel_engine_pm_put(engine);
38
39         engine->props.heartbeat_interval_ms = saved;
40 }
41
42 static void dummy_rps_work(struct work_struct *wrk)
43 {
44 }
45
46 static int cmp_u64(const void *A, const void *B)
47 {
48         const u64 *a = A, *b = B;
49
50         if (a < b)
51                 return -1;
52         else if (a > b)
53                 return 1;
54         else
55                 return 0;
56 }
57
58 static struct i915_vma *
59 create_spin_counter(struct intel_engine_cs *engine,
60                     struct i915_address_space *vm,
61                     bool srm,
62                     u32 **cancel,
63                     u32 **counter)
64 {
65         enum {
66                 COUNT,
67                 INC,
68                 __NGPR__,
69         };
70 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
71         struct drm_i915_gem_object *obj;
72         struct i915_vma *vma;
73         unsigned long end;
74         u32 *base, *cs;
75         int loop, i;
76         int err;
77
78         obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
79         if (IS_ERR(obj))
80                 return ERR_CAST(obj);
81
82         end = obj->base.size / sizeof(u32) - 1;
83
84         vma = i915_vma_instance(obj, vm, NULL);
85         if (IS_ERR(vma)) {
86                 i915_gem_object_put(obj);
87                 return vma;
88         }
89
90         err = i915_vma_pin(vma, 0, 0, PIN_USER);
91         if (err) {
92                 i915_vma_put(vma);
93                 return ERR_PTR(err);
94         }
95
96         base = i915_gem_object_pin_map(obj, I915_MAP_WC);
97         if (IS_ERR(base)) {
98                 i915_gem_object_put(obj);
99                 return ERR_CAST(base);
100         }
101         cs = base;
102
103         *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
104         for (i = 0; i < __NGPR__; i++) {
105                 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
106                 *cs++ = 0;
107                 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
108                 *cs++ = 0;
109         }
110
111         *cs++ = MI_LOAD_REGISTER_IMM(1);
112         *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
113         *cs++ = 1;
114
115         loop = cs - base;
116
117         /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
118         for (i = 0; i < 1024; i++) {
119                 *cs++ = MI_MATH(4);
120                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
121                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
122                 *cs++ = MI_MATH_ADD;
123                 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
124
125                 if (srm) {
126                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
127                         *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
128                         *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
129                         *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
130                 }
131         }
132
133         *cs++ = MI_BATCH_BUFFER_START_GEN8;
134         *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
135         *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
136         GEM_BUG_ON(cs - base > end);
137
138         i915_gem_object_flush_map(obj);
139
140         *cancel = base + loop;
141         *counter = srm ? memset32(base + end, 0, 1) : NULL;
142         return vma;
143 }
144
145 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
146 {
147         u8 history[64], i;
148         unsigned long end;
149         int sleep;
150
151         i = 0;
152         memset(history, freq, sizeof(history));
153         sleep = 20;
154
155         /* The PCU does not change instantly, but drifts towards the goal? */
156         end = jiffies + msecs_to_jiffies(timeout_ms);
157         do {
158                 u8 act;
159
160                 act = read_cagf(rps);
161                 if (time_after(jiffies, end))
162                         return act;
163
164                 /* Target acquired */
165                 if (act == freq)
166                         return act;
167
168                 /* Any change within the last N samples? */
169                 if (!memchr_inv(history, act, sizeof(history)))
170                         return act;
171
172                 history[i] = act;
173                 i = (i + 1) % ARRAY_SIZE(history);
174
175                 usleep_range(sleep, 2 * sleep);
176                 sleep *= 2;
177                 if (sleep > timeout_ms * 20)
178                         sleep = timeout_ms * 20;
179         } while (1);
180 }
181
182 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
183 {
184         mutex_lock(&rps->lock);
185         GEM_BUG_ON(!rps->active);
186         intel_rps_set(rps, freq);
187         GEM_BUG_ON(rps->last_freq != freq);
188         mutex_unlock(&rps->lock);
189
190         return wait_for_freq(rps, freq, 50);
191 }
192
193 static void show_pstate_limits(struct intel_rps *rps)
194 {
195         struct drm_i915_private *i915 = rps_to_i915(rps);
196
197         if (IS_BROXTON(i915)) {
198                 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
199                         i915_mmio_reg_offset(BXT_RP_STATE_CAP),
200                         intel_uncore_read(rps_to_uncore(rps),
201                                           BXT_RP_STATE_CAP));
202         } else if (IS_GEN(i915, 9)) {
203                 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
204                         i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
205                         intel_uncore_read(rps_to_uncore(rps),
206                                           GEN9_RP_STATE_LIMITS));
207         }
208 }
209
210 int live_rps_control(void *arg)
211 {
212         struct intel_gt *gt = arg;
213         struct intel_rps *rps = &gt->rps;
214         void (*saved_work)(struct work_struct *wrk);
215         struct intel_engine_cs *engine;
216         enum intel_engine_id id;
217         struct igt_spinner spin;
218         int err = 0;
219
220         /*
221          * Check that the actual frequency matches our requested frequency,
222          * to verify our control mechanism. We have to be careful that the
223          * PCU may throttle the GPU in which case the actual frequency used
224          * will be lowered than requested.
225          */
226
227         if (!rps->enabled || rps->max_freq <= rps->min_freq)
228                 return 0;
229
230         if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
231                 return 0;
232
233         if (igt_spinner_init(&spin, gt))
234                 return -ENOMEM;
235
236         intel_gt_pm_wait_for_idle(gt);
237         saved_work = rps->work.func;
238         rps->work.func = dummy_rps_work;
239
240         intel_gt_pm_get(gt);
241         for_each_engine(engine, gt, id) {
242                 unsigned long saved_heartbeat;
243                 struct i915_request *rq;
244                 ktime_t min_dt, max_dt;
245                 int f, limit;
246                 int min, max;
247
248                 if (!intel_engine_can_store_dword(engine))
249                         continue;
250
251                 saved_heartbeat = engine_heartbeat_disable(engine);
252
253                 rq = igt_spinner_create_request(&spin,
254                                                 engine->kernel_context,
255                                                 MI_NOOP);
256                 if (IS_ERR(rq)) {
257                         err = PTR_ERR(rq);
258                         break;
259                 }
260
261                 i915_request_add(rq);
262
263                 if (!igt_wait_for_spinner(&spin, rq)) {
264                         pr_err("%s: RPS spinner did not start\n",
265                                engine->name);
266                         igt_spinner_end(&spin);
267                         engine_heartbeat_enable(engine, saved_heartbeat);
268                         intel_gt_set_wedged(engine->gt);
269                         err = -EIO;
270                         break;
271                 }
272
273                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
274                         pr_err("%s: could not set minimum frequency [%x], only %x!\n",
275                                engine->name, rps->min_freq, read_cagf(rps));
276                         igt_spinner_end(&spin);
277                         engine_heartbeat_enable(engine, saved_heartbeat);
278                         show_pstate_limits(rps);
279                         err = -EINVAL;
280                         break;
281                 }
282
283                 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
284                         if (rps_set_check(rps, f) < f)
285                                 break;
286                 }
287
288                 limit = rps_set_check(rps, f);
289
290                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
291                         pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
292                                engine->name, rps->min_freq, read_cagf(rps));
293                         igt_spinner_end(&spin);
294                         engine_heartbeat_enable(engine, saved_heartbeat);
295                         show_pstate_limits(rps);
296                         err = -EINVAL;
297                         break;
298                 }
299
300                 max_dt = ktime_get();
301                 max = rps_set_check(rps, limit);
302                 max_dt = ktime_sub(ktime_get(), max_dt);
303
304                 min_dt = ktime_get();
305                 min = rps_set_check(rps, rps->min_freq);
306                 min_dt = ktime_sub(ktime_get(), min_dt);
307
308                 igt_spinner_end(&spin);
309                 engine_heartbeat_enable(engine, saved_heartbeat);
310
311                 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
312                         engine->name,
313                         rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
314                         rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
315                         limit, intel_gpu_freq(rps, limit),
316                         min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
317
318                 if (limit == rps->min_freq) {
319                         pr_err("%s: GPU throttled to minimum!\n",
320                                engine->name);
321                         show_pstate_limits(rps);
322                         err = -ENODEV;
323                         break;
324                 }
325
326                 if (igt_flush_test(gt->i915)) {
327                         err = -EIO;
328                         break;
329                 }
330         }
331         intel_gt_pm_put(gt);
332
333         igt_spinner_fini(&spin);
334
335         intel_gt_pm_wait_for_idle(gt);
336         rps->work.func = saved_work;
337
338         return err;
339 }
340
341 static void show_pcu_config(struct intel_rps *rps)
342 {
343         struct drm_i915_private *i915 = rps_to_i915(rps);
344         unsigned int max_gpu_freq, min_gpu_freq;
345         intel_wakeref_t wakeref;
346         int gpu_freq;
347
348         if (!HAS_LLC(i915))
349                 return;
350
351         min_gpu_freq = rps->min_freq;
352         max_gpu_freq = rps->max_freq;
353         if (INTEL_GEN(i915) >= 9) {
354                 /* Convert GT frequency to 50 HZ units */
355                 min_gpu_freq /= GEN9_FREQ_SCALER;
356                 max_gpu_freq /= GEN9_FREQ_SCALER;
357         }
358
359         wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
360
361         pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
362         for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
363                 int ia_freq = gpu_freq;
364
365                 sandybridge_pcode_read(i915,
366                                        GEN6_PCODE_READ_MIN_FREQ_TABLE,
367                                        &ia_freq, NULL);
368
369                 pr_info("%5d  %5d  %5d\n",
370                         gpu_freq * 50,
371                         ((ia_freq >> 0) & 0xff) * 100,
372                         ((ia_freq >> 8) & 0xff) * 100);
373         }
374
375         intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
376 }
377
378 static u64 __measure_frequency(u32 *cntr, int duration_ms)
379 {
380         u64 dc, dt;
381
382         dt = ktime_get();
383         dc = READ_ONCE(*cntr);
384         usleep_range(1000 * duration_ms, 2000 * duration_ms);
385         dc = READ_ONCE(*cntr) - dc;
386         dt = ktime_get() - dt;
387
388         return div64_u64(1000 * 1000 * dc, dt);
389 }
390
391 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
392 {
393         u64 x[5];
394         int i;
395
396         *freq = rps_set_check(rps, *freq);
397         for (i = 0; i < 5; i++)
398                 x[i] = __measure_frequency(cntr, 2);
399         *freq = (*freq + read_cagf(rps)) / 2;
400
401         /* A simple triangle filter for better result stability */
402         sort(x, 5, sizeof(*x), cmp_u64, NULL);
403         return div_u64(x[1] + 2 * x[2] + x[3], 4);
404 }
405
406 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
407                                   int duration_ms)
408 {
409         u64 dc, dt;
410
411         dt = ktime_get();
412         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
413         usleep_range(1000 * duration_ms, 2000 * duration_ms);
414         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
415         dt = ktime_get() - dt;
416
417         return div64_u64(1000 * 1000 * dc, dt);
418 }
419
420 static u64 measure_cs_frequency_at(struct intel_rps *rps,
421                                    struct intel_engine_cs *engine,
422                                    int *freq)
423 {
424         u64 x[5];
425         int i;
426
427         *freq = rps_set_check(rps, *freq);
428         for (i = 0; i < 5; i++)
429                 x[i] = __measure_cs_frequency(engine, 2);
430         *freq = (*freq + read_cagf(rps)) / 2;
431
432         /* A simple triangle filter for better result stability */
433         sort(x, 5, sizeof(*x), cmp_u64, NULL);
434         return div_u64(x[1] + 2 * x[2] + x[3], 4);
435 }
436
437 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
438 {
439         return f_d * x > f_n * y && f_n * x < f_d * y;
440 }
441
442 int live_rps_frequency_cs(void *arg)
443 {
444         void (*saved_work)(struct work_struct *wrk);
445         struct intel_gt *gt = arg;
446         struct intel_rps *rps = &gt->rps;
447         struct intel_engine_cs *engine;
448         struct pm_qos_request qos;
449         enum intel_engine_id id;
450         int err = 0;
451
452         /*
453          * The premise is that the GPU does change freqency at our behest.
454          * Let's check there is a correspondence between the requested
455          * frequency, the actual frequency, and the observed clock rate.
456          */
457
458         if (!rps->enabled || rps->max_freq <= rps->min_freq)
459                 return 0;
460
461         if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
462                 return 0;
463
464         if (CPU_LATENCY >= 0)
465                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
466
467         intel_gt_pm_wait_for_idle(gt);
468         saved_work = rps->work.func;
469         rps->work.func = dummy_rps_work;
470
471         for_each_engine(engine, gt, id) {
472                 unsigned long saved_heartbeat;
473                 struct i915_request *rq;
474                 struct i915_vma *vma;
475                 u32 *cancel, *cntr;
476                 struct {
477                         u64 count;
478                         int freq;
479                 } min, max;
480
481                 saved_heartbeat = engine_heartbeat_disable(engine);
482
483                 vma = create_spin_counter(engine,
484                                           engine->kernel_context->vm, false,
485                                           &cancel, &cntr);
486                 if (IS_ERR(vma)) {
487                         err = PTR_ERR(vma);
488                         engine_heartbeat_enable(engine, saved_heartbeat);
489                         break;
490                 }
491
492                 rq = intel_engine_create_kernel_request(engine);
493                 if (IS_ERR(rq)) {
494                         err = PTR_ERR(rq);
495                         goto err_vma;
496                 }
497
498                 i915_vma_lock(vma);
499                 err = i915_request_await_object(rq, vma->obj, false);
500                 if (!err)
501                         err = i915_vma_move_to_active(vma, rq, 0);
502                 if (!err)
503                         err = rq->engine->emit_bb_start(rq,
504                                                         vma->node.start,
505                                                         PAGE_SIZE, 0);
506                 i915_vma_unlock(vma);
507                 i915_request_add(rq);
508                 if (err)
509                         goto err_vma;
510
511                 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
512                              10)) {
513                         pr_err("%s: timed loop did not start\n",
514                                engine->name);
515                         goto err_vma;
516                 }
517
518                 min.freq = rps->min_freq;
519                 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
520
521                 max.freq = rps->max_freq;
522                 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
523
524                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
525                         engine->name,
526                         min.count, intel_gpu_freq(rps, min.freq),
527                         max.count, intel_gpu_freq(rps, max.freq),
528                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
529                                                      max.freq * min.count));
530
531                 if (!scaled_within(max.freq * min.count,
532                                    min.freq * max.count,
533                                    2, 3)) {
534                         int f;
535
536                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
537                                engine->name,
538                                max.freq * min.count,
539                                min.freq * max.count);
540                         show_pcu_config(rps);
541
542                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
543                                 int act = f;
544                                 u64 count;
545
546                                 count = measure_cs_frequency_at(rps, engine, &act);
547                                 if (act < f)
548                                         break;
549
550                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
551                                         engine->name,
552                                         act, intel_gpu_freq(rps, act), count,
553                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
554                                                                      act * min.count));
555
556                                 f = act; /* may skip ahead [pcu granularity] */
557                         }
558
559                         err = -EINVAL;
560                 }
561
562 err_vma:
563                 *cancel = MI_BATCH_BUFFER_END;
564                 i915_gem_object_unpin_map(vma->obj);
565                 i915_vma_unpin(vma);
566                 i915_vma_put(vma);
567
568                 engine_heartbeat_enable(engine, saved_heartbeat);
569                 if (igt_flush_test(gt->i915))
570                         err = -EIO;
571                 if (err)
572                         break;
573         }
574
575         intel_gt_pm_wait_for_idle(gt);
576         rps->work.func = saved_work;
577
578         if (CPU_LATENCY >= 0)
579                 cpu_latency_qos_remove_request(&qos);
580
581         return err;
582 }
583
584 int live_rps_frequency_srm(void *arg)
585 {
586         void (*saved_work)(struct work_struct *wrk);
587         struct intel_gt *gt = arg;
588         struct intel_rps *rps = &gt->rps;
589         struct intel_engine_cs *engine;
590         struct pm_qos_request qos;
591         enum intel_engine_id id;
592         int err = 0;
593
594         /*
595          * The premise is that the GPU does change freqency at our behest.
596          * Let's check there is a correspondence between the requested
597          * frequency, the actual frequency, and the observed clock rate.
598          */
599
600         if (!rps->enabled || rps->max_freq <= rps->min_freq)
601                 return 0;
602
603         if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
604                 return 0;
605
606         if (CPU_LATENCY >= 0)
607                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
608
609         intel_gt_pm_wait_for_idle(gt);
610         saved_work = rps->work.func;
611         rps->work.func = dummy_rps_work;
612
613         for_each_engine(engine, gt, id) {
614                 unsigned long saved_heartbeat;
615                 struct i915_request *rq;
616                 struct i915_vma *vma;
617                 u32 *cancel, *cntr;
618                 struct {
619                         u64 count;
620                         int freq;
621                 } min, max;
622
623                 saved_heartbeat = engine_heartbeat_disable(engine);
624
625                 vma = create_spin_counter(engine,
626                                           engine->kernel_context->vm, true,
627                                           &cancel, &cntr);
628                 if (IS_ERR(vma)) {
629                         err = PTR_ERR(vma);
630                         engine_heartbeat_enable(engine, saved_heartbeat);
631                         break;
632                 }
633
634                 rq = intel_engine_create_kernel_request(engine);
635                 if (IS_ERR(rq)) {
636                         err = PTR_ERR(rq);
637                         goto err_vma;
638                 }
639
640                 i915_vma_lock(vma);
641                 err = i915_request_await_object(rq, vma->obj, false);
642                 if (!err)
643                         err = i915_vma_move_to_active(vma, rq, 0);
644                 if (!err)
645                         err = rq->engine->emit_bb_start(rq,
646                                                         vma->node.start,
647                                                         PAGE_SIZE, 0);
648                 i915_vma_unlock(vma);
649                 i915_request_add(rq);
650                 if (err)
651                         goto err_vma;
652
653                 if (wait_for(READ_ONCE(*cntr), 10)) {
654                         pr_err("%s: timed loop did not start\n",
655                                engine->name);
656                         goto err_vma;
657                 }
658
659                 min.freq = rps->min_freq;
660                 min.count = measure_frequency_at(rps, cntr, &min.freq);
661
662                 max.freq = rps->max_freq;
663                 max.count = measure_frequency_at(rps, cntr, &max.freq);
664
665                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
666                         engine->name,
667                         min.count, intel_gpu_freq(rps, min.freq),
668                         max.count, intel_gpu_freq(rps, max.freq),
669                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
670                                                      max.freq * min.count));
671
672                 if (!scaled_within(max.freq * min.count,
673                                    min.freq * max.count,
674                                    1, 2)) {
675                         int f;
676
677                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
678                                engine->name,
679                                max.freq * min.count,
680                                min.freq * max.count);
681                         show_pcu_config(rps);
682
683                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
684                                 int act = f;
685                                 u64 count;
686
687                                 count = measure_frequency_at(rps, cntr, &act);
688                                 if (act < f)
689                                         break;
690
691                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
692                                         engine->name,
693                                         act, intel_gpu_freq(rps, act), count,
694                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
695                                                                      act * min.count));
696
697                                 f = act; /* may skip ahead [pcu granularity] */
698                         }
699
700                         err = -EINVAL;
701                 }
702
703 err_vma:
704                 *cancel = MI_BATCH_BUFFER_END;
705                 i915_gem_object_unpin_map(vma->obj);
706                 i915_vma_unpin(vma);
707                 i915_vma_put(vma);
708
709                 engine_heartbeat_enable(engine, saved_heartbeat);
710                 if (igt_flush_test(gt->i915))
711                         err = -EIO;
712                 if (err)
713                         break;
714         }
715
716         intel_gt_pm_wait_for_idle(gt);
717         rps->work.func = saved_work;
718
719         if (CPU_LATENCY >= 0)
720                 cpu_latency_qos_remove_request(&qos);
721
722         return err;
723 }
724
725 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
726 {
727         /* Flush any previous EI */
728         usleep_range(timeout_us, 2 * timeout_us);
729
730         /* Reset the interrupt status */
731         rps_disable_interrupts(rps);
732         GEM_BUG_ON(rps->pm_iir);
733         rps_enable_interrupts(rps);
734
735         /* And then wait for the timeout, for real this time */
736         usleep_range(2 * timeout_us, 3 * timeout_us);
737 }
738
739 static int __rps_up_interrupt(struct intel_rps *rps,
740                               struct intel_engine_cs *engine,
741                               struct igt_spinner *spin)
742 {
743         struct intel_uncore *uncore = engine->uncore;
744         struct i915_request *rq;
745         u32 timeout;
746
747         if (!intel_engine_can_store_dword(engine))
748                 return 0;
749
750         rps_set_check(rps, rps->min_freq);
751
752         rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
753         if (IS_ERR(rq))
754                 return PTR_ERR(rq);
755
756         i915_request_get(rq);
757         i915_request_add(rq);
758
759         if (!igt_wait_for_spinner(spin, rq)) {
760                 pr_err("%s: RPS spinner did not start\n",
761                        engine->name);
762                 i915_request_put(rq);
763                 intel_gt_set_wedged(engine->gt);
764                 return -EIO;
765         }
766
767         if (!rps->active) {
768                 pr_err("%s: RPS not enabled on starting spinner\n",
769                        engine->name);
770                 igt_spinner_end(spin);
771                 i915_request_put(rq);
772                 return -EINVAL;
773         }
774
775         if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
776                 pr_err("%s: RPS did not register UP interrupt\n",
777                        engine->name);
778                 i915_request_put(rq);
779                 return -EINVAL;
780         }
781
782         if (rps->last_freq != rps->min_freq) {
783                 pr_err("%s: RPS did not program min frequency\n",
784                        engine->name);
785                 i915_request_put(rq);
786                 return -EINVAL;
787         }
788
789         timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
790         timeout = GT_PM_INTERVAL_TO_US(engine->i915, timeout);
791
792         sleep_for_ei(rps, timeout);
793         GEM_BUG_ON(i915_request_completed(rq));
794
795         igt_spinner_end(spin);
796         i915_request_put(rq);
797
798         if (rps->cur_freq != rps->min_freq) {
799                 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
800                        engine->name, intel_rps_read_actual_frequency(rps));
801                 return -EINVAL;
802         }
803
804         if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
805                 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
806                        engine->name, rps->pm_iir,
807                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
808                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
809                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
810                 return -EINVAL;
811         }
812
813         return 0;
814 }
815
816 static int __rps_down_interrupt(struct intel_rps *rps,
817                                 struct intel_engine_cs *engine)
818 {
819         struct intel_uncore *uncore = engine->uncore;
820         u32 timeout;
821
822         rps_set_check(rps, rps->max_freq);
823
824         if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
825                 pr_err("%s: RPS did not register DOWN interrupt\n",
826                        engine->name);
827                 return -EINVAL;
828         }
829
830         if (rps->last_freq != rps->max_freq) {
831                 pr_err("%s: RPS did not program max frequency\n",
832                        engine->name);
833                 return -EINVAL;
834         }
835
836         timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
837         timeout = GT_PM_INTERVAL_TO_US(engine->i915, timeout);
838
839         sleep_for_ei(rps, timeout);
840
841         if (rps->cur_freq != rps->max_freq) {
842                 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
843                        engine->name,
844                        intel_rps_read_actual_frequency(rps));
845                 return -EINVAL;
846         }
847
848         if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
849                 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
850                        engine->name, rps->pm_iir,
851                        intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
852                        intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
853                        intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
854                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
855                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
856                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
857                 return -EINVAL;
858         }
859
860         return 0;
861 }
862
863 int live_rps_interrupt(void *arg)
864 {
865         struct intel_gt *gt = arg;
866         struct intel_rps *rps = &gt->rps;
867         void (*saved_work)(struct work_struct *wrk);
868         struct intel_engine_cs *engine;
869         enum intel_engine_id id;
870         struct igt_spinner spin;
871         u32 pm_events;
872         int err = 0;
873
874         /*
875          * First, let's check whether or not we are receiving interrupts.
876          */
877
878         if (!rps->enabled || rps->max_freq <= rps->min_freq)
879                 return 0;
880
881         intel_gt_pm_get(gt);
882         pm_events = rps->pm_events;
883         intel_gt_pm_put(gt);
884         if (!pm_events) {
885                 pr_err("No RPS PM events registered, but RPS is enabled?\n");
886                 return -ENODEV;
887         }
888
889         if (igt_spinner_init(&spin, gt))
890                 return -ENOMEM;
891
892         intel_gt_pm_wait_for_idle(gt);
893         saved_work = rps->work.func;
894         rps->work.func = dummy_rps_work;
895
896         for_each_engine(engine, gt, id) {
897                 /* Keep the engine busy with a spinner; expect an UP! */
898                 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
899                         unsigned long saved_heartbeat;
900
901                         intel_gt_pm_wait_for_idle(engine->gt);
902                         GEM_BUG_ON(rps->active);
903
904                         saved_heartbeat = engine_heartbeat_disable(engine);
905
906                         err = __rps_up_interrupt(rps, engine, &spin);
907
908                         engine_heartbeat_enable(engine, saved_heartbeat);
909                         if (err)
910                                 goto out;
911
912                         intel_gt_pm_wait_for_idle(engine->gt);
913                 }
914
915                 /* Keep the engine awake but idle and check for DOWN */
916                 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
917                         unsigned long saved_heartbeat;
918
919                         saved_heartbeat = engine_heartbeat_disable(engine);
920                         intel_rc6_disable(&gt->rc6);
921
922                         err = __rps_down_interrupt(rps, engine);
923
924                         intel_rc6_enable(&gt->rc6);
925                         engine_heartbeat_enable(engine, saved_heartbeat);
926                         if (err)
927                                 goto out;
928                 }
929         }
930
931 out:
932         if (igt_flush_test(gt->i915))
933                 err = -EIO;
934
935         igt_spinner_fini(&spin);
936
937         intel_gt_pm_wait_for_idle(gt);
938         rps->work.func = saved_work;
939
940         return err;
941 }
942
943 static u64 __measure_power(int duration_ms)
944 {
945         u64 dE, dt;
946
947         dt = ktime_get();
948         dE = librapl_energy_uJ();
949         usleep_range(1000 * duration_ms, 2000 * duration_ms);
950         dE = librapl_energy_uJ() - dE;
951         dt = ktime_get() - dt;
952
953         return div64_u64(1000 * 1000 * dE, dt);
954 }
955
956 static u64 measure_power_at(struct intel_rps *rps, int *freq)
957 {
958         u64 x[5];
959         int i;
960
961         *freq = rps_set_check(rps, *freq);
962         for (i = 0; i < 5; i++)
963                 x[i] = __measure_power(5);
964         *freq = (*freq + read_cagf(rps)) / 2;
965
966         /* A simple triangle filter for better result stability */
967         sort(x, 5, sizeof(*x), cmp_u64, NULL);
968         return div_u64(x[1] + 2 * x[2] + x[3], 4);
969 }
970
971 int live_rps_power(void *arg)
972 {
973         struct intel_gt *gt = arg;
974         struct intel_rps *rps = &gt->rps;
975         void (*saved_work)(struct work_struct *wrk);
976         struct intel_engine_cs *engine;
977         enum intel_engine_id id;
978         struct igt_spinner spin;
979         int err = 0;
980
981         /*
982          * Our fundamental assumption is that running at lower frequency
983          * actually saves power. Let's see if our RAPL measurement support
984          * that theory.
985          */
986
987         if (!rps->enabled || rps->max_freq <= rps->min_freq)
988                 return 0;
989
990         if (!librapl_energy_uJ())
991                 return 0;
992
993         if (igt_spinner_init(&spin, gt))
994                 return -ENOMEM;
995
996         intel_gt_pm_wait_for_idle(gt);
997         saved_work = rps->work.func;
998         rps->work.func = dummy_rps_work;
999
1000         for_each_engine(engine, gt, id) {
1001                 unsigned long saved_heartbeat;
1002                 struct i915_request *rq;
1003                 struct {
1004                         u64 power;
1005                         int freq;
1006                 } min, max;
1007
1008                 if (!intel_engine_can_store_dword(engine))
1009                         continue;
1010
1011                 saved_heartbeat = engine_heartbeat_disable(engine);
1012
1013                 rq = igt_spinner_create_request(&spin,
1014                                                 engine->kernel_context,
1015                                                 MI_NOOP);
1016                 if (IS_ERR(rq)) {
1017                         engine_heartbeat_enable(engine, saved_heartbeat);
1018                         err = PTR_ERR(rq);
1019                         break;
1020                 }
1021
1022                 i915_request_add(rq);
1023
1024                 if (!igt_wait_for_spinner(&spin, rq)) {
1025                         pr_err("%s: RPS spinner did not start\n",
1026                                engine->name);
1027                         igt_spinner_end(&spin);
1028                         engine_heartbeat_enable(engine, saved_heartbeat);
1029                         intel_gt_set_wedged(engine->gt);
1030                         err = -EIO;
1031                         break;
1032                 }
1033
1034                 max.freq = rps->max_freq;
1035                 max.power = measure_power_at(rps, &max.freq);
1036
1037                 min.freq = rps->min_freq;
1038                 min.power = measure_power_at(rps, &min.freq);
1039
1040                 igt_spinner_end(&spin);
1041                 engine_heartbeat_enable(engine, saved_heartbeat);
1042
1043                 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1044                         engine->name,
1045                         min.power, intel_gpu_freq(rps, min.freq),
1046                         max.power, intel_gpu_freq(rps, max.freq));
1047
1048                 if (10 * min.freq >= 9 * max.freq) {
1049                         pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1050                                   min.freq, intel_gpu_freq(rps, min.freq),
1051                                   max.freq, intel_gpu_freq(rps, max.freq));
1052                         continue;
1053                 }
1054
1055                 if (11 * min.power > 10 * max.power) {
1056                         pr_err("%s: did not conserve power when setting lower frequency!\n",
1057                                engine->name);
1058                         err = -EINVAL;
1059                         break;
1060                 }
1061
1062                 if (igt_flush_test(gt->i915)) {
1063                         err = -EIO;
1064                         break;
1065                 }
1066         }
1067
1068         igt_spinner_fini(&spin);
1069
1070         intel_gt_pm_wait_for_idle(gt);
1071         rps->work.func = saved_work;
1072
1073         return err;
1074 }
1075
1076 int live_rps_dynamic(void *arg)
1077 {
1078         struct intel_gt *gt = arg;
1079         struct intel_rps *rps = &gt->rps;
1080         struct intel_engine_cs *engine;
1081         enum intel_engine_id id;
1082         struct igt_spinner spin;
1083         int err = 0;
1084
1085         /*
1086          * We've looked at the bascs, and have established that we
1087          * can change the clock frequency and that the HW will generate
1088          * interrupts based on load. Now we check how we integrate those
1089          * moving parts into dynamic reclocking based on load.
1090          */
1091
1092         if (!rps->enabled || rps->max_freq <= rps->min_freq)
1093                 return 0;
1094
1095         if (igt_spinner_init(&spin, gt))
1096                 return -ENOMEM;
1097
1098         for_each_engine(engine, gt, id) {
1099                 struct i915_request *rq;
1100                 struct {
1101                         ktime_t dt;
1102                         u8 freq;
1103                 } min, max;
1104
1105                 if (!intel_engine_can_store_dword(engine))
1106                         continue;
1107
1108                 intel_gt_pm_wait_for_idle(gt);
1109                 GEM_BUG_ON(rps->active);
1110                 rps->cur_freq = rps->min_freq;
1111
1112                 intel_engine_pm_get(engine);
1113                 intel_rc6_disable(&gt->rc6);
1114                 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1115
1116                 rq = igt_spinner_create_request(&spin,
1117                                                 engine->kernel_context,
1118                                                 MI_NOOP);
1119                 if (IS_ERR(rq)) {
1120                         err = PTR_ERR(rq);
1121                         goto err;
1122                 }
1123
1124                 i915_request_add(rq);
1125
1126                 max.dt = ktime_get();
1127                 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1128                 max.dt = ktime_sub(ktime_get(), max.dt);
1129
1130                 igt_spinner_end(&spin);
1131
1132                 min.dt = ktime_get();
1133                 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1134                 min.dt = ktime_sub(ktime_get(), min.dt);
1135
1136                 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1137                         engine->name,
1138                         max.freq, intel_gpu_freq(rps, max.freq),
1139                         ktime_to_ns(max.dt),
1140                         min.freq, intel_gpu_freq(rps, min.freq),
1141                         ktime_to_ns(min.dt));
1142                 if (min.freq >= max.freq) {
1143                         pr_err("%s: dynamic reclocking of spinner failed\n!",
1144                                engine->name);
1145                         err = -EINVAL;
1146                 }
1147
1148 err:
1149                 intel_rc6_enable(&gt->rc6);
1150                 intel_engine_pm_put(engine);
1151
1152                 if (igt_flush_test(gt->i915))
1153                         err = -EIO;
1154                 if (err)
1155                         break;
1156         }
1157
1158         igt_spinner_fini(&spin);
1159
1160         return err;
1161 }