1 // SPDX-License-Identifier: MIT
3 * Copyright © 2020 Intel Corporation
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt_clock_utils.h"
13 #include "intel_gt_pm.h"
14 #include "intel_rc6.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftest_rps.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/librapl.h"
21 /* Try to isolate the impact of cstates from determing frequency response */
22 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
24 static void dummy_rps_work(struct work_struct *wrk)
28 static int cmp_u64(const void *A, const void *B)
30 const u64 *a = A, *b = B;
40 static int cmp_u32(const void *A, const void *B)
42 const u32 *a = A, *b = B;
52 static struct i915_vma *
53 create_spin_counter(struct intel_engine_cs *engine,
54 struct i915_address_space *vm,
64 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
65 struct drm_i915_gem_object *obj;
72 obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
76 end = obj->base.size / sizeof(u32) - 1;
78 vma = i915_vma_instance(obj, vm, NULL);
80 i915_gem_object_put(obj);
84 err = i915_vma_pin(vma, 0, 0, PIN_USER);
90 base = i915_gem_object_pin_map(obj, I915_MAP_WC);
92 i915_gem_object_put(obj);
93 return ERR_CAST(base);
97 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
98 for (i = 0; i < __NGPR__; i++) {
99 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
101 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
105 *cs++ = MI_LOAD_REGISTER_IMM(1);
106 *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
111 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
112 for (i = 0; i < 1024; i++) {
114 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
115 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
117 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
120 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
121 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
122 *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
123 *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
127 *cs++ = MI_BATCH_BUFFER_START_GEN8;
128 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
129 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
130 GEM_BUG_ON(cs - base > end);
132 i915_gem_object_flush_map(obj);
134 *cancel = base + loop;
135 *counter = srm ? memset32(base + end, 0, 1) : NULL;
139 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
146 memset(history, freq, sizeof(history));
149 /* The PCU does not change instantly, but drifts towards the goal? */
150 end = jiffies + msecs_to_jiffies(timeout_ms);
154 act = read_cagf(rps);
155 if (time_after(jiffies, end))
158 /* Target acquired */
162 /* Any change within the last N samples? */
163 if (!memchr_inv(history, act, sizeof(history)))
167 i = (i + 1) % ARRAY_SIZE(history);
169 usleep_range(sleep, 2 * sleep);
171 if (sleep > timeout_ms * 20)
172 sleep = timeout_ms * 20;
176 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
178 mutex_lock(&rps->lock);
179 GEM_BUG_ON(!intel_rps_is_active(rps));
180 intel_rps_set(rps, freq);
181 GEM_BUG_ON(rps->last_freq != freq);
182 mutex_unlock(&rps->lock);
184 return wait_for_freq(rps, freq, 50);
187 static void show_pstate_limits(struct intel_rps *rps)
189 struct drm_i915_private *i915 = rps_to_i915(rps);
191 if (IS_BROXTON(i915)) {
192 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
193 i915_mmio_reg_offset(BXT_RP_STATE_CAP),
194 intel_uncore_read(rps_to_uncore(rps),
196 } else if (IS_GEN(i915, 9)) {
197 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
198 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
199 intel_uncore_read(rps_to_uncore(rps),
200 GEN9_RP_STATE_LIMITS));
204 int live_rps_clock_interval(void *arg)
206 struct intel_gt *gt = arg;
207 struct intel_rps *rps = >->rps;
208 void (*saved_work)(struct work_struct *wrk);
209 struct intel_engine_cs *engine;
210 enum intel_engine_id id;
211 struct igt_spinner spin;
214 if (!intel_rps_is_enabled(rps))
217 if (igt_spinner_init(&spin, gt))
220 intel_gt_pm_wait_for_idle(gt);
221 saved_work = rps->work.func;
222 rps->work.func = dummy_rps_work;
225 intel_rps_disable(>->rps);
227 intel_gt_check_clock_frequency(gt);
229 for_each_engine(engine, gt, id) {
230 struct i915_request *rq;
234 if (!intel_engine_can_store_dword(engine))
237 st_engine_heartbeat_disable(engine);
239 rq = igt_spinner_create_request(&spin,
240 engine->kernel_context,
243 st_engine_heartbeat_enable(engine);
248 i915_request_add(rq);
250 if (!igt_wait_for_spinner(&spin, rq)) {
251 pr_err("%s: RPS spinner did not start\n",
253 igt_spinner_end(&spin);
254 st_engine_heartbeat_enable(engine);
255 intel_gt_set_wedged(engine->gt);
260 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
262 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
264 /* Set the evaluation interval to infinity! */
265 intel_uncore_write_fw(gt->uncore,
266 GEN6_RP_UP_EI, 0xffffffff);
267 intel_uncore_write_fw(gt->uncore,
268 GEN6_RP_UP_THRESHOLD, 0xffffffff);
270 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
271 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
273 if (wait_for(intel_uncore_read_fw(gt->uncore,
276 /* Just skip the test; assume lack of HW support */
277 pr_notice("%s: rps evaluation interval not ticking\n",
285 for (i = 0; i < 5; i++) {
288 dt_[i] = ktime_get();
289 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
293 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
294 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
299 /* Use the median of both cycle/dt; close enough */
300 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
301 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
302 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
303 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
306 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
307 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
309 igt_spinner_end(&spin);
310 st_engine_heartbeat_enable(engine);
313 u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
315 intel_gt_ns_to_pm_interval(gt, dt);
317 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
318 engine->name, cycles, time, dt, expected,
319 gt->clock_frequency / 1000);
321 if (10 * time < 8 * dt ||
322 8 * time > 10 * dt) {
323 pr_err("%s: rps clock time does not match walltime!\n",
328 if (10 * expected < 8 * cycles ||
329 8 * expected > 10 * cycles) {
330 pr_err("%s: walltime does not match rps clock ticks!\n",
336 if (igt_flush_test(gt->i915))
339 break; /* once is enough */
342 intel_rps_enable(>->rps);
345 igt_spinner_fini(&spin);
347 intel_gt_pm_wait_for_idle(gt);
348 rps->work.func = saved_work;
350 if (err == -ENODEV) /* skipped, don't report a fail */
356 int live_rps_control(void *arg)
358 struct intel_gt *gt = arg;
359 struct intel_rps *rps = >->rps;
360 void (*saved_work)(struct work_struct *wrk);
361 struct intel_engine_cs *engine;
362 enum intel_engine_id id;
363 struct igt_spinner spin;
367 * Check that the actual frequency matches our requested frequency,
368 * to verify our control mechanism. We have to be careful that the
369 * PCU may throttle the GPU in which case the actual frequency used
370 * will be lowered than requested.
373 if (!intel_rps_is_enabled(rps))
376 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
379 if (igt_spinner_init(&spin, gt))
382 intel_gt_pm_wait_for_idle(gt);
383 saved_work = rps->work.func;
384 rps->work.func = dummy_rps_work;
387 for_each_engine(engine, gt, id) {
388 struct i915_request *rq;
389 ktime_t min_dt, max_dt;
393 if (!intel_engine_can_store_dword(engine))
396 st_engine_heartbeat_disable(engine);
398 rq = igt_spinner_create_request(&spin,
399 engine->kernel_context,
406 i915_request_add(rq);
408 if (!igt_wait_for_spinner(&spin, rq)) {
409 pr_err("%s: RPS spinner did not start\n",
411 igt_spinner_end(&spin);
412 st_engine_heartbeat_enable(engine);
413 intel_gt_set_wedged(engine->gt);
418 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
419 pr_err("%s: could not set minimum frequency [%x], only %x!\n",
420 engine->name, rps->min_freq, read_cagf(rps));
421 igt_spinner_end(&spin);
422 st_engine_heartbeat_enable(engine);
423 show_pstate_limits(rps);
428 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
429 if (rps_set_check(rps, f) < f)
433 limit = rps_set_check(rps, f);
435 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
436 pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
437 engine->name, rps->min_freq, read_cagf(rps));
438 igt_spinner_end(&spin);
439 st_engine_heartbeat_enable(engine);
440 show_pstate_limits(rps);
445 max_dt = ktime_get();
446 max = rps_set_check(rps, limit);
447 max_dt = ktime_sub(ktime_get(), max_dt);
449 min_dt = ktime_get();
450 min = rps_set_check(rps, rps->min_freq);
451 min_dt = ktime_sub(ktime_get(), min_dt);
453 igt_spinner_end(&spin);
454 st_engine_heartbeat_enable(engine);
456 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
458 rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
459 rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
460 limit, intel_gpu_freq(rps, limit),
461 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
463 if (limit == rps->min_freq) {
464 pr_err("%s: GPU throttled to minimum!\n",
466 show_pstate_limits(rps);
471 if (igt_flush_test(gt->i915)) {
478 igt_spinner_fini(&spin);
480 intel_gt_pm_wait_for_idle(gt);
481 rps->work.func = saved_work;
486 static void show_pcu_config(struct intel_rps *rps)
488 struct drm_i915_private *i915 = rps_to_i915(rps);
489 unsigned int max_gpu_freq, min_gpu_freq;
490 intel_wakeref_t wakeref;
496 min_gpu_freq = rps->min_freq;
497 max_gpu_freq = rps->max_freq;
498 if (INTEL_GEN(i915) >= 9) {
499 /* Convert GT frequency to 50 HZ units */
500 min_gpu_freq /= GEN9_FREQ_SCALER;
501 max_gpu_freq /= GEN9_FREQ_SCALER;
504 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
506 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
507 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
508 int ia_freq = gpu_freq;
510 sandybridge_pcode_read(i915,
511 GEN6_PCODE_READ_MIN_FREQ_TABLE,
514 pr_info("%5d %5d %5d\n",
516 ((ia_freq >> 0) & 0xff) * 100,
517 ((ia_freq >> 8) & 0xff) * 100);
520 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
523 static u64 __measure_frequency(u32 *cntr, int duration_ms)
528 dc = READ_ONCE(*cntr);
529 usleep_range(1000 * duration_ms, 2000 * duration_ms);
530 dc = READ_ONCE(*cntr) - dc;
531 dt = ktime_get() - dt;
533 return div64_u64(1000 * 1000 * dc, dt);
536 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
541 *freq = rps_set_check(rps, *freq);
542 for (i = 0; i < 5; i++)
543 x[i] = __measure_frequency(cntr, 2);
544 *freq = (*freq + read_cagf(rps)) / 2;
546 /* A simple triangle filter for better result stability */
547 sort(x, 5, sizeof(*x), cmp_u64, NULL);
548 return div_u64(x[1] + 2 * x[2] + x[3], 4);
551 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
557 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
558 usleep_range(1000 * duration_ms, 2000 * duration_ms);
559 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
560 dt = ktime_get() - dt;
562 return div64_u64(1000 * 1000 * dc, dt);
565 static u64 measure_cs_frequency_at(struct intel_rps *rps,
566 struct intel_engine_cs *engine,
572 *freq = rps_set_check(rps, *freq);
573 for (i = 0; i < 5; i++)
574 x[i] = __measure_cs_frequency(engine, 2);
575 *freq = (*freq + read_cagf(rps)) / 2;
577 /* A simple triangle filter for better result stability */
578 sort(x, 5, sizeof(*x), cmp_u64, NULL);
579 return div_u64(x[1] + 2 * x[2] + x[3], 4);
582 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
584 return f_d * x > f_n * y && f_n * x < f_d * y;
587 int live_rps_frequency_cs(void *arg)
589 void (*saved_work)(struct work_struct *wrk);
590 struct intel_gt *gt = arg;
591 struct intel_rps *rps = >->rps;
592 struct intel_engine_cs *engine;
593 struct pm_qos_request qos;
594 enum intel_engine_id id;
598 * The premise is that the GPU does change freqency at our behest.
599 * Let's check there is a correspondence between the requested
600 * frequency, the actual frequency, and the observed clock rate.
603 if (!intel_rps_is_enabled(rps))
606 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
609 if (CPU_LATENCY >= 0)
610 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
612 intel_gt_pm_wait_for_idle(gt);
613 saved_work = rps->work.func;
614 rps->work.func = dummy_rps_work;
616 for_each_engine(engine, gt, id) {
617 struct i915_request *rq;
618 struct i915_vma *vma;
625 st_engine_heartbeat_disable(engine);
627 vma = create_spin_counter(engine,
628 engine->kernel_context->vm, false,
632 st_engine_heartbeat_enable(engine);
636 rq = intel_engine_create_kernel_request(engine);
643 err = i915_request_await_object(rq, vma->obj, false);
645 err = i915_vma_move_to_active(vma, rq, 0);
647 err = rq->engine->emit_bb_start(rq,
650 i915_vma_unlock(vma);
651 i915_request_add(rq);
655 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
657 pr_err("%s: timed loop did not start\n",
662 min.freq = rps->min_freq;
663 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
665 max.freq = rps->max_freq;
666 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
668 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
670 min.count, intel_gpu_freq(rps, min.freq),
671 max.count, intel_gpu_freq(rps, max.freq),
672 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
673 max.freq * min.count));
675 if (!scaled_within(max.freq * min.count,
676 min.freq * max.count,
680 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
682 max.freq * min.count,
683 min.freq * max.count);
684 show_pcu_config(rps);
686 for (f = min.freq + 1; f <= rps->max_freq; f++) {
690 count = measure_cs_frequency_at(rps, engine, &act);
694 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
696 act, intel_gpu_freq(rps, act), count,
697 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
700 f = act; /* may skip ahead [pcu granularity] */
707 *cancel = MI_BATCH_BUFFER_END;
708 i915_gem_object_flush_map(vma->obj);
709 i915_gem_object_unpin_map(vma->obj);
713 st_engine_heartbeat_enable(engine);
714 if (igt_flush_test(gt->i915))
720 intel_gt_pm_wait_for_idle(gt);
721 rps->work.func = saved_work;
723 if (CPU_LATENCY >= 0)
724 cpu_latency_qos_remove_request(&qos);
729 int live_rps_frequency_srm(void *arg)
731 void (*saved_work)(struct work_struct *wrk);
732 struct intel_gt *gt = arg;
733 struct intel_rps *rps = >->rps;
734 struct intel_engine_cs *engine;
735 struct pm_qos_request qos;
736 enum intel_engine_id id;
740 * The premise is that the GPU does change freqency at our behest.
741 * Let's check there is a correspondence between the requested
742 * frequency, the actual frequency, and the observed clock rate.
745 if (!intel_rps_is_enabled(rps))
748 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
751 if (CPU_LATENCY >= 0)
752 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
754 intel_gt_pm_wait_for_idle(gt);
755 saved_work = rps->work.func;
756 rps->work.func = dummy_rps_work;
758 for_each_engine(engine, gt, id) {
759 struct i915_request *rq;
760 struct i915_vma *vma;
767 st_engine_heartbeat_disable(engine);
769 vma = create_spin_counter(engine,
770 engine->kernel_context->vm, true,
774 st_engine_heartbeat_enable(engine);
778 rq = intel_engine_create_kernel_request(engine);
785 err = i915_request_await_object(rq, vma->obj, false);
787 err = i915_vma_move_to_active(vma, rq, 0);
789 err = rq->engine->emit_bb_start(rq,
792 i915_vma_unlock(vma);
793 i915_request_add(rq);
797 if (wait_for(READ_ONCE(*cntr), 10)) {
798 pr_err("%s: timed loop did not start\n",
803 min.freq = rps->min_freq;
804 min.count = measure_frequency_at(rps, cntr, &min.freq);
806 max.freq = rps->max_freq;
807 max.count = measure_frequency_at(rps, cntr, &max.freq);
809 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
811 min.count, intel_gpu_freq(rps, min.freq),
812 max.count, intel_gpu_freq(rps, max.freq),
813 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
814 max.freq * min.count));
816 if (!scaled_within(max.freq * min.count,
817 min.freq * max.count,
821 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
823 max.freq * min.count,
824 min.freq * max.count);
825 show_pcu_config(rps);
827 for (f = min.freq + 1; f <= rps->max_freq; f++) {
831 count = measure_frequency_at(rps, cntr, &act);
835 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
837 act, intel_gpu_freq(rps, act), count,
838 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
841 f = act; /* may skip ahead [pcu granularity] */
848 *cancel = MI_BATCH_BUFFER_END;
849 i915_gem_object_flush_map(vma->obj);
850 i915_gem_object_unpin_map(vma->obj);
854 st_engine_heartbeat_enable(engine);
855 if (igt_flush_test(gt->i915))
861 intel_gt_pm_wait_for_idle(gt);
862 rps->work.func = saved_work;
864 if (CPU_LATENCY >= 0)
865 cpu_latency_qos_remove_request(&qos);
870 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
872 /* Flush any previous EI */
873 usleep_range(timeout_us, 2 * timeout_us);
875 /* Reset the interrupt status */
876 rps_disable_interrupts(rps);
877 GEM_BUG_ON(rps->pm_iir);
878 rps_enable_interrupts(rps);
880 /* And then wait for the timeout, for real this time */
881 usleep_range(2 * timeout_us, 3 * timeout_us);
884 static int __rps_up_interrupt(struct intel_rps *rps,
885 struct intel_engine_cs *engine,
886 struct igt_spinner *spin)
888 struct intel_uncore *uncore = engine->uncore;
889 struct i915_request *rq;
892 if (!intel_engine_can_store_dword(engine))
895 rps_set_check(rps, rps->min_freq);
897 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
901 i915_request_get(rq);
902 i915_request_add(rq);
904 if (!igt_wait_for_spinner(spin, rq)) {
905 pr_err("%s: RPS spinner did not start\n",
907 i915_request_put(rq);
908 intel_gt_set_wedged(engine->gt);
912 if (!intel_rps_is_active(rps)) {
913 pr_err("%s: RPS not enabled on starting spinner\n",
915 igt_spinner_end(spin);
916 i915_request_put(rq);
920 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
921 pr_err("%s: RPS did not register UP interrupt\n",
923 i915_request_put(rq);
927 if (rps->last_freq != rps->min_freq) {
928 pr_err("%s: RPS did not program min frequency\n",
930 i915_request_put(rq);
934 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
935 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
936 timeout = DIV_ROUND_UP(timeout, 1000);
938 sleep_for_ei(rps, timeout);
939 GEM_BUG_ON(i915_request_completed(rq));
941 igt_spinner_end(spin);
942 i915_request_put(rq);
944 if (rps->cur_freq != rps->min_freq) {
945 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
946 engine->name, intel_rps_read_actual_frequency(rps));
950 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
951 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
952 engine->name, rps->pm_iir,
953 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
954 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
955 intel_uncore_read(uncore, GEN6_RP_UP_EI));
962 static int __rps_down_interrupt(struct intel_rps *rps,
963 struct intel_engine_cs *engine)
965 struct intel_uncore *uncore = engine->uncore;
968 rps_set_check(rps, rps->max_freq);
970 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
971 pr_err("%s: RPS did not register DOWN interrupt\n",
976 if (rps->last_freq != rps->max_freq) {
977 pr_err("%s: RPS did not program max frequency\n",
982 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
983 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
984 timeout = DIV_ROUND_UP(timeout, 1000);
986 sleep_for_ei(rps, timeout);
988 if (rps->cur_freq != rps->max_freq) {
989 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
991 intel_rps_read_actual_frequency(rps));
995 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
996 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
997 engine->name, rps->pm_iir,
998 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
999 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1000 intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1001 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1002 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1003 intel_uncore_read(uncore, GEN6_RP_UP_EI));
1010 int live_rps_interrupt(void *arg)
1012 struct intel_gt *gt = arg;
1013 struct intel_rps *rps = >->rps;
1014 void (*saved_work)(struct work_struct *wrk);
1015 struct intel_engine_cs *engine;
1016 enum intel_engine_id id;
1017 struct igt_spinner spin;
1022 * First, let's check whether or not we are receiving interrupts.
1025 if (!intel_rps_has_interrupts(rps))
1028 intel_gt_pm_get(gt);
1029 pm_events = rps->pm_events;
1030 intel_gt_pm_put(gt);
1032 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1036 if (igt_spinner_init(&spin, gt))
1039 intel_gt_pm_wait_for_idle(gt);
1040 saved_work = rps->work.func;
1041 rps->work.func = dummy_rps_work;
1043 for_each_engine(engine, gt, id) {
1044 /* Keep the engine busy with a spinner; expect an UP! */
1045 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1046 intel_gt_pm_wait_for_idle(engine->gt);
1047 GEM_BUG_ON(intel_rps_is_active(rps));
1049 st_engine_heartbeat_disable(engine);
1051 err = __rps_up_interrupt(rps, engine, &spin);
1053 st_engine_heartbeat_enable(engine);
1057 intel_gt_pm_wait_for_idle(engine->gt);
1060 /* Keep the engine awake but idle and check for DOWN */
1061 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1062 st_engine_heartbeat_disable(engine);
1063 intel_rc6_disable(>->rc6);
1065 err = __rps_down_interrupt(rps, engine);
1067 intel_rc6_enable(>->rc6);
1068 st_engine_heartbeat_enable(engine);
1075 if (igt_flush_test(gt->i915))
1078 igt_spinner_fini(&spin);
1080 intel_gt_pm_wait_for_idle(gt);
1081 rps->work.func = saved_work;
1086 static u64 __measure_power(int duration_ms)
1091 dE = librapl_energy_uJ();
1092 usleep_range(1000 * duration_ms, 2000 * duration_ms);
1093 dE = librapl_energy_uJ() - dE;
1094 dt = ktime_get() - dt;
1096 return div64_u64(1000 * 1000 * dE, dt);
1099 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1104 *freq = rps_set_check(rps, *freq);
1105 for (i = 0; i < 5; i++)
1106 x[i] = __measure_power(5);
1107 *freq = (*freq + read_cagf(rps)) / 2;
1109 /* A simple triangle filter for better result stability */
1110 sort(x, 5, sizeof(*x), cmp_u64, NULL);
1111 return div_u64(x[1] + 2 * x[2] + x[3], 4);
1114 int live_rps_power(void *arg)
1116 struct intel_gt *gt = arg;
1117 struct intel_rps *rps = >->rps;
1118 void (*saved_work)(struct work_struct *wrk);
1119 struct intel_engine_cs *engine;
1120 enum intel_engine_id id;
1121 struct igt_spinner spin;
1125 * Our fundamental assumption is that running at lower frequency
1126 * actually saves power. Let's see if our RAPL measurement support
1130 if (!intel_rps_is_enabled(rps))
1133 if (!librapl_energy_uJ())
1136 if (igt_spinner_init(&spin, gt))
1139 intel_gt_pm_wait_for_idle(gt);
1140 saved_work = rps->work.func;
1141 rps->work.func = dummy_rps_work;
1143 for_each_engine(engine, gt, id) {
1144 struct i915_request *rq;
1150 if (!intel_engine_can_store_dword(engine))
1153 st_engine_heartbeat_disable(engine);
1155 rq = igt_spinner_create_request(&spin,
1156 engine->kernel_context,
1159 st_engine_heartbeat_enable(engine);
1164 i915_request_add(rq);
1166 if (!igt_wait_for_spinner(&spin, rq)) {
1167 pr_err("%s: RPS spinner did not start\n",
1169 igt_spinner_end(&spin);
1170 st_engine_heartbeat_enable(engine);
1171 intel_gt_set_wedged(engine->gt);
1176 max.freq = rps->max_freq;
1177 max.power = measure_power_at(rps, &max.freq);
1179 min.freq = rps->min_freq;
1180 min.power = measure_power_at(rps, &min.freq);
1182 igt_spinner_end(&spin);
1183 st_engine_heartbeat_enable(engine);
1185 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1187 min.power, intel_gpu_freq(rps, min.freq),
1188 max.power, intel_gpu_freq(rps, max.freq));
1190 if (10 * min.freq >= 9 * max.freq) {
1191 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1192 min.freq, intel_gpu_freq(rps, min.freq),
1193 max.freq, intel_gpu_freq(rps, max.freq));
1197 if (11 * min.power > 10 * max.power) {
1198 pr_err("%s: did not conserve power when setting lower frequency!\n",
1204 if (igt_flush_test(gt->i915)) {
1210 igt_spinner_fini(&spin);
1212 intel_gt_pm_wait_for_idle(gt);
1213 rps->work.func = saved_work;
1218 int live_rps_dynamic(void *arg)
1220 struct intel_gt *gt = arg;
1221 struct intel_rps *rps = >->rps;
1222 struct intel_engine_cs *engine;
1223 enum intel_engine_id id;
1224 struct igt_spinner spin;
1228 * We've looked at the bascs, and have established that we
1229 * can change the clock frequency and that the HW will generate
1230 * interrupts based on load. Now we check how we integrate those
1231 * moving parts into dynamic reclocking based on load.
1234 if (!intel_rps_is_enabled(rps))
1237 if (igt_spinner_init(&spin, gt))
1240 if (intel_rps_has_interrupts(rps))
1241 pr_info("RPS has interrupt support\n");
1242 if (intel_rps_uses_timer(rps))
1243 pr_info("RPS has timer support\n");
1245 for_each_engine(engine, gt, id) {
1246 struct i915_request *rq;
1252 if (!intel_engine_can_store_dword(engine))
1255 intel_gt_pm_wait_for_idle(gt);
1256 GEM_BUG_ON(intel_rps_is_active(rps));
1257 rps->cur_freq = rps->min_freq;
1259 intel_engine_pm_get(engine);
1260 intel_rc6_disable(>->rc6);
1261 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1263 rq = igt_spinner_create_request(&spin,
1264 engine->kernel_context,
1271 i915_request_add(rq);
1273 max.dt = ktime_get();
1274 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1275 max.dt = ktime_sub(ktime_get(), max.dt);
1277 igt_spinner_end(&spin);
1279 min.dt = ktime_get();
1280 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1281 min.dt = ktime_sub(ktime_get(), min.dt);
1283 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1285 max.freq, intel_gpu_freq(rps, max.freq),
1286 ktime_to_ns(max.dt),
1287 min.freq, intel_gpu_freq(rps, min.freq),
1288 ktime_to_ns(min.dt));
1289 if (min.freq >= max.freq) {
1290 pr_err("%s: dynamic reclocking of spinner failed\n!",
1296 intel_rc6_enable(>->rc6);
1297 intel_engine_pm_put(engine);
1299 if (igt_flush_test(gt->i915))
1305 igt_spinner_fini(&spin);