1 // SPDX-License-Identifier: MIT
3 * Copyright © 2020 Intel Corporation
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt_clock_utils.h"
13 #include "intel_gt_pm.h"
14 #include "intel_rc6.h"
15 #include "selftest_rps.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/librapl.h"
20 /* Try to isolate the impact of cstates from determing frequency response */
21 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
23 static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine)
27 old = fetch_and_zero(&engine->props.heartbeat_interval_ms);
29 intel_engine_pm_get(engine);
30 intel_engine_park_heartbeat(engine);
35 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
38 intel_engine_pm_put(engine);
40 engine->props.heartbeat_interval_ms = saved;
43 static void dummy_rps_work(struct work_struct *wrk)
47 static int cmp_u64(const void *A, const void *B)
49 const u64 *a = A, *b = B;
59 static int cmp_u32(const void *A, const void *B)
61 const u32 *a = A, *b = B;
71 static struct i915_vma *
72 create_spin_counter(struct intel_engine_cs *engine,
73 struct i915_address_space *vm,
83 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
84 struct drm_i915_gem_object *obj;
91 obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
95 end = obj->base.size / sizeof(u32) - 1;
97 vma = i915_vma_instance(obj, vm, NULL);
99 i915_gem_object_put(obj);
103 err = i915_vma_pin(vma, 0, 0, PIN_USER);
109 base = i915_gem_object_pin_map(obj, I915_MAP_WC);
111 i915_gem_object_put(obj);
112 return ERR_CAST(base);
116 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
117 for (i = 0; i < __NGPR__; i++) {
118 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
120 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
124 *cs++ = MI_LOAD_REGISTER_IMM(1);
125 *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
130 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
131 for (i = 0; i < 1024; i++) {
133 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
134 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
136 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
139 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
140 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
141 *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
142 *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
146 *cs++ = MI_BATCH_BUFFER_START_GEN8;
147 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
148 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
149 GEM_BUG_ON(cs - base > end);
151 i915_gem_object_flush_map(obj);
153 *cancel = base + loop;
154 *counter = srm ? memset32(base + end, 0, 1) : NULL;
158 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
165 memset(history, freq, sizeof(history));
168 /* The PCU does not change instantly, but drifts towards the goal? */
169 end = jiffies + msecs_to_jiffies(timeout_ms);
173 act = read_cagf(rps);
174 if (time_after(jiffies, end))
177 /* Target acquired */
181 /* Any change within the last N samples? */
182 if (!memchr_inv(history, act, sizeof(history)))
186 i = (i + 1) % ARRAY_SIZE(history);
188 usleep_range(sleep, 2 * sleep);
190 if (sleep > timeout_ms * 20)
191 sleep = timeout_ms * 20;
195 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
197 mutex_lock(&rps->lock);
198 GEM_BUG_ON(!intel_rps_is_active(rps));
199 intel_rps_set(rps, freq);
200 GEM_BUG_ON(rps->last_freq != freq);
201 mutex_unlock(&rps->lock);
203 return wait_for_freq(rps, freq, 50);
206 static void show_pstate_limits(struct intel_rps *rps)
208 struct drm_i915_private *i915 = rps_to_i915(rps);
210 if (IS_BROXTON(i915)) {
211 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
212 i915_mmio_reg_offset(BXT_RP_STATE_CAP),
213 intel_uncore_read(rps_to_uncore(rps),
215 } else if (IS_GEN(i915, 9)) {
216 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
217 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
218 intel_uncore_read(rps_to_uncore(rps),
219 GEN9_RP_STATE_LIMITS));
223 int live_rps_clock_interval(void *arg)
225 struct intel_gt *gt = arg;
226 struct intel_rps *rps = >->rps;
227 void (*saved_work)(struct work_struct *wrk);
228 struct intel_engine_cs *engine;
229 enum intel_engine_id id;
230 struct igt_spinner spin;
233 if (!intel_rps_is_enabled(rps))
236 if (igt_spinner_init(&spin, gt))
239 intel_gt_pm_wait_for_idle(gt);
240 saved_work = rps->work.func;
241 rps->work.func = dummy_rps_work;
244 intel_rps_disable(>->rps);
246 intel_gt_check_clock_frequency(gt);
248 for_each_engine(engine, gt, id) {
249 unsigned long saved_heartbeat;
250 struct i915_request *rq;
254 if (!intel_engine_can_store_dword(engine))
257 saved_heartbeat = engine_heartbeat_disable(engine);
259 rq = igt_spinner_create_request(&spin,
260 engine->kernel_context,
263 engine_heartbeat_enable(engine, saved_heartbeat);
268 i915_request_add(rq);
270 if (!igt_wait_for_spinner(&spin, rq)) {
271 pr_err("%s: RPS spinner did not start\n",
273 igt_spinner_end(&spin);
274 engine_heartbeat_enable(engine, saved_heartbeat);
275 intel_gt_set_wedged(engine->gt);
280 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
282 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
284 /* Set the evaluation interval to infinity! */
285 intel_uncore_write_fw(gt->uncore,
286 GEN6_RP_UP_EI, 0xffffffff);
287 intel_uncore_write_fw(gt->uncore,
288 GEN6_RP_UP_THRESHOLD, 0xffffffff);
290 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
291 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
293 if (wait_for(intel_uncore_read_fw(gt->uncore,
296 /* Just skip the test; assume lack of HW support */
297 pr_notice("%s: rps evaluation interval not ticking\n",
305 for (i = 0; i < 5; i++) {
308 dt_[i] = ktime_get();
309 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
313 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
314 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
319 /* Use the median of both cycle/dt; close enough */
320 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
321 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
322 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
323 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
326 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
327 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
329 igt_spinner_end(&spin);
330 engine_heartbeat_enable(engine, saved_heartbeat);
333 u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
335 intel_gt_ns_to_pm_interval(gt, dt);
337 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
338 engine->name, cycles, time, dt, expected,
339 gt->clock_frequency / 1000);
341 if (10 * time < 8 * dt ||
342 8 * time > 10 * dt) {
343 pr_err("%s: rps clock time does not match walltime!\n",
348 if (10 * expected < 8 * cycles ||
349 8 * expected > 10 * cycles) {
350 pr_err("%s: walltime does not match rps clock ticks!\n",
356 if (igt_flush_test(gt->i915))
359 break; /* once is enough */
362 intel_rps_enable(>->rps);
365 igt_spinner_fini(&spin);
367 intel_gt_pm_wait_for_idle(gt);
368 rps->work.func = saved_work;
370 if (err == -ENODEV) /* skipped, don't report a fail */
376 int live_rps_control(void *arg)
378 struct intel_gt *gt = arg;
379 struct intel_rps *rps = >->rps;
380 void (*saved_work)(struct work_struct *wrk);
381 struct intel_engine_cs *engine;
382 enum intel_engine_id id;
383 struct igt_spinner spin;
387 * Check that the actual frequency matches our requested frequency,
388 * to verify our control mechanism. We have to be careful that the
389 * PCU may throttle the GPU in which case the actual frequency used
390 * will be lowered than requested.
393 if (!intel_rps_is_enabled(rps))
396 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
399 if (igt_spinner_init(&spin, gt))
402 intel_gt_pm_wait_for_idle(gt);
403 saved_work = rps->work.func;
404 rps->work.func = dummy_rps_work;
407 for_each_engine(engine, gt, id) {
408 unsigned long saved_heartbeat;
409 struct i915_request *rq;
410 ktime_t min_dt, max_dt;
414 if (!intel_engine_can_store_dword(engine))
417 saved_heartbeat = engine_heartbeat_disable(engine);
419 rq = igt_spinner_create_request(&spin,
420 engine->kernel_context,
427 i915_request_add(rq);
429 if (!igt_wait_for_spinner(&spin, rq)) {
430 pr_err("%s: RPS spinner did not start\n",
432 igt_spinner_end(&spin);
433 engine_heartbeat_enable(engine, saved_heartbeat);
434 intel_gt_set_wedged(engine->gt);
439 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
440 pr_err("%s: could not set minimum frequency [%x], only %x!\n",
441 engine->name, rps->min_freq, read_cagf(rps));
442 igt_spinner_end(&spin);
443 engine_heartbeat_enable(engine, saved_heartbeat);
444 show_pstate_limits(rps);
449 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
450 if (rps_set_check(rps, f) < f)
454 limit = rps_set_check(rps, f);
456 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
457 pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
458 engine->name, rps->min_freq, read_cagf(rps));
459 igt_spinner_end(&spin);
460 engine_heartbeat_enable(engine, saved_heartbeat);
461 show_pstate_limits(rps);
466 max_dt = ktime_get();
467 max = rps_set_check(rps, limit);
468 max_dt = ktime_sub(ktime_get(), max_dt);
470 min_dt = ktime_get();
471 min = rps_set_check(rps, rps->min_freq);
472 min_dt = ktime_sub(ktime_get(), min_dt);
474 igt_spinner_end(&spin);
475 engine_heartbeat_enable(engine, saved_heartbeat);
477 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
479 rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
480 rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
481 limit, intel_gpu_freq(rps, limit),
482 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
484 if (limit == rps->min_freq) {
485 pr_err("%s: GPU throttled to minimum!\n",
487 show_pstate_limits(rps);
492 if (igt_flush_test(gt->i915)) {
499 igt_spinner_fini(&spin);
501 intel_gt_pm_wait_for_idle(gt);
502 rps->work.func = saved_work;
507 static void show_pcu_config(struct intel_rps *rps)
509 struct drm_i915_private *i915 = rps_to_i915(rps);
510 unsigned int max_gpu_freq, min_gpu_freq;
511 intel_wakeref_t wakeref;
517 min_gpu_freq = rps->min_freq;
518 max_gpu_freq = rps->max_freq;
519 if (INTEL_GEN(i915) >= 9) {
520 /* Convert GT frequency to 50 HZ units */
521 min_gpu_freq /= GEN9_FREQ_SCALER;
522 max_gpu_freq /= GEN9_FREQ_SCALER;
525 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
527 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
528 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
529 int ia_freq = gpu_freq;
531 sandybridge_pcode_read(i915,
532 GEN6_PCODE_READ_MIN_FREQ_TABLE,
535 pr_info("%5d %5d %5d\n",
537 ((ia_freq >> 0) & 0xff) * 100,
538 ((ia_freq >> 8) & 0xff) * 100);
541 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
544 static u64 __measure_frequency(u32 *cntr, int duration_ms)
549 dc = READ_ONCE(*cntr);
550 usleep_range(1000 * duration_ms, 2000 * duration_ms);
551 dc = READ_ONCE(*cntr) - dc;
552 dt = ktime_get() - dt;
554 return div64_u64(1000 * 1000 * dc, dt);
557 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
562 *freq = rps_set_check(rps, *freq);
563 for (i = 0; i < 5; i++)
564 x[i] = __measure_frequency(cntr, 2);
565 *freq = (*freq + read_cagf(rps)) / 2;
567 /* A simple triangle filter for better result stability */
568 sort(x, 5, sizeof(*x), cmp_u64, NULL);
569 return div_u64(x[1] + 2 * x[2] + x[3], 4);
572 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
578 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
579 usleep_range(1000 * duration_ms, 2000 * duration_ms);
580 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
581 dt = ktime_get() - dt;
583 return div64_u64(1000 * 1000 * dc, dt);
586 static u64 measure_cs_frequency_at(struct intel_rps *rps,
587 struct intel_engine_cs *engine,
593 *freq = rps_set_check(rps, *freq);
594 for (i = 0; i < 5; i++)
595 x[i] = __measure_cs_frequency(engine, 2);
596 *freq = (*freq + read_cagf(rps)) / 2;
598 /* A simple triangle filter for better result stability */
599 sort(x, 5, sizeof(*x), cmp_u64, NULL);
600 return div_u64(x[1] + 2 * x[2] + x[3], 4);
603 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
605 return f_d * x > f_n * y && f_n * x < f_d * y;
608 int live_rps_frequency_cs(void *arg)
610 void (*saved_work)(struct work_struct *wrk);
611 struct intel_gt *gt = arg;
612 struct intel_rps *rps = >->rps;
613 struct intel_engine_cs *engine;
614 struct pm_qos_request qos;
615 enum intel_engine_id id;
619 * The premise is that the GPU does change freqency at our behest.
620 * Let's check there is a correspondence between the requested
621 * frequency, the actual frequency, and the observed clock rate.
624 if (!intel_rps_is_enabled(rps))
627 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
630 if (CPU_LATENCY >= 0)
631 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
633 intel_gt_pm_wait_for_idle(gt);
634 saved_work = rps->work.func;
635 rps->work.func = dummy_rps_work;
637 for_each_engine(engine, gt, id) {
638 unsigned long saved_heartbeat;
639 struct i915_request *rq;
640 struct i915_vma *vma;
647 saved_heartbeat = engine_heartbeat_disable(engine);
649 vma = create_spin_counter(engine,
650 engine->kernel_context->vm, false,
654 engine_heartbeat_enable(engine, saved_heartbeat);
658 rq = intel_engine_create_kernel_request(engine);
665 err = i915_request_await_object(rq, vma->obj, false);
667 err = i915_vma_move_to_active(vma, rq, 0);
669 err = rq->engine->emit_bb_start(rq,
672 i915_vma_unlock(vma);
673 i915_request_add(rq);
677 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
679 pr_err("%s: timed loop did not start\n",
684 min.freq = rps->min_freq;
685 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
687 max.freq = rps->max_freq;
688 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
690 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
692 min.count, intel_gpu_freq(rps, min.freq),
693 max.count, intel_gpu_freq(rps, max.freq),
694 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
695 max.freq * min.count));
697 if (!scaled_within(max.freq * min.count,
698 min.freq * max.count,
702 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
704 max.freq * min.count,
705 min.freq * max.count);
706 show_pcu_config(rps);
708 for (f = min.freq + 1; f <= rps->max_freq; f++) {
712 count = measure_cs_frequency_at(rps, engine, &act);
716 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
718 act, intel_gpu_freq(rps, act), count,
719 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
722 f = act; /* may skip ahead [pcu granularity] */
729 *cancel = MI_BATCH_BUFFER_END;
730 i915_gem_object_flush_map(vma->obj);
731 i915_gem_object_unpin_map(vma->obj);
735 engine_heartbeat_enable(engine, saved_heartbeat);
736 if (igt_flush_test(gt->i915))
742 intel_gt_pm_wait_for_idle(gt);
743 rps->work.func = saved_work;
745 if (CPU_LATENCY >= 0)
746 cpu_latency_qos_remove_request(&qos);
751 int live_rps_frequency_srm(void *arg)
753 void (*saved_work)(struct work_struct *wrk);
754 struct intel_gt *gt = arg;
755 struct intel_rps *rps = >->rps;
756 struct intel_engine_cs *engine;
757 struct pm_qos_request qos;
758 enum intel_engine_id id;
762 * The premise is that the GPU does change freqency at our behest.
763 * Let's check there is a correspondence between the requested
764 * frequency, the actual frequency, and the observed clock rate.
767 if (!intel_rps_is_enabled(rps))
770 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
773 if (CPU_LATENCY >= 0)
774 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
776 intel_gt_pm_wait_for_idle(gt);
777 saved_work = rps->work.func;
778 rps->work.func = dummy_rps_work;
780 for_each_engine(engine, gt, id) {
781 unsigned long saved_heartbeat;
782 struct i915_request *rq;
783 struct i915_vma *vma;
790 saved_heartbeat = engine_heartbeat_disable(engine);
792 vma = create_spin_counter(engine,
793 engine->kernel_context->vm, true,
797 engine_heartbeat_enable(engine, saved_heartbeat);
801 rq = intel_engine_create_kernel_request(engine);
808 err = i915_request_await_object(rq, vma->obj, false);
810 err = i915_vma_move_to_active(vma, rq, 0);
812 err = rq->engine->emit_bb_start(rq,
815 i915_vma_unlock(vma);
816 i915_request_add(rq);
820 if (wait_for(READ_ONCE(*cntr), 10)) {
821 pr_err("%s: timed loop did not start\n",
826 min.freq = rps->min_freq;
827 min.count = measure_frequency_at(rps, cntr, &min.freq);
829 max.freq = rps->max_freq;
830 max.count = measure_frequency_at(rps, cntr, &max.freq);
832 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
834 min.count, intel_gpu_freq(rps, min.freq),
835 max.count, intel_gpu_freq(rps, max.freq),
836 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
837 max.freq * min.count));
839 if (!scaled_within(max.freq * min.count,
840 min.freq * max.count,
844 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
846 max.freq * min.count,
847 min.freq * max.count);
848 show_pcu_config(rps);
850 for (f = min.freq + 1; f <= rps->max_freq; f++) {
854 count = measure_frequency_at(rps, cntr, &act);
858 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
860 act, intel_gpu_freq(rps, act), count,
861 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
864 f = act; /* may skip ahead [pcu granularity] */
871 *cancel = MI_BATCH_BUFFER_END;
872 i915_gem_object_flush_map(vma->obj);
873 i915_gem_object_unpin_map(vma->obj);
877 engine_heartbeat_enable(engine, saved_heartbeat);
878 if (igt_flush_test(gt->i915))
884 intel_gt_pm_wait_for_idle(gt);
885 rps->work.func = saved_work;
887 if (CPU_LATENCY >= 0)
888 cpu_latency_qos_remove_request(&qos);
893 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
895 /* Flush any previous EI */
896 usleep_range(timeout_us, 2 * timeout_us);
898 /* Reset the interrupt status */
899 rps_disable_interrupts(rps);
900 GEM_BUG_ON(rps->pm_iir);
901 rps_enable_interrupts(rps);
903 /* And then wait for the timeout, for real this time */
904 usleep_range(2 * timeout_us, 3 * timeout_us);
907 static int __rps_up_interrupt(struct intel_rps *rps,
908 struct intel_engine_cs *engine,
909 struct igt_spinner *spin)
911 struct intel_uncore *uncore = engine->uncore;
912 struct i915_request *rq;
915 if (!intel_engine_can_store_dword(engine))
918 rps_set_check(rps, rps->min_freq);
920 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
924 i915_request_get(rq);
925 i915_request_add(rq);
927 if (!igt_wait_for_spinner(spin, rq)) {
928 pr_err("%s: RPS spinner did not start\n",
930 i915_request_put(rq);
931 intel_gt_set_wedged(engine->gt);
935 if (!intel_rps_is_active(rps)) {
936 pr_err("%s: RPS not enabled on starting spinner\n",
938 igt_spinner_end(spin);
939 i915_request_put(rq);
943 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
944 pr_err("%s: RPS did not register UP interrupt\n",
946 i915_request_put(rq);
950 if (rps->last_freq != rps->min_freq) {
951 pr_err("%s: RPS did not program min frequency\n",
953 i915_request_put(rq);
957 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
958 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
959 timeout = DIV_ROUND_UP(timeout, 1000);
961 sleep_for_ei(rps, timeout);
962 GEM_BUG_ON(i915_request_completed(rq));
964 igt_spinner_end(spin);
965 i915_request_put(rq);
967 if (rps->cur_freq != rps->min_freq) {
968 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
969 engine->name, intel_rps_read_actual_frequency(rps));
973 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
974 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
975 engine->name, rps->pm_iir,
976 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
977 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
978 intel_uncore_read(uncore, GEN6_RP_UP_EI));
985 static int __rps_down_interrupt(struct intel_rps *rps,
986 struct intel_engine_cs *engine)
988 struct intel_uncore *uncore = engine->uncore;
991 rps_set_check(rps, rps->max_freq);
993 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
994 pr_err("%s: RPS did not register DOWN interrupt\n",
999 if (rps->last_freq != rps->max_freq) {
1000 pr_err("%s: RPS did not program max frequency\n",
1005 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
1006 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
1007 timeout = DIV_ROUND_UP(timeout, 1000);
1009 sleep_for_ei(rps, timeout);
1011 if (rps->cur_freq != rps->max_freq) {
1012 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1014 intel_rps_read_actual_frequency(rps));
1018 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1019 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1020 engine->name, rps->pm_iir,
1021 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1022 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1023 intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1024 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1025 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1026 intel_uncore_read(uncore, GEN6_RP_UP_EI));
1033 int live_rps_interrupt(void *arg)
1035 struct intel_gt *gt = arg;
1036 struct intel_rps *rps = >->rps;
1037 void (*saved_work)(struct work_struct *wrk);
1038 struct intel_engine_cs *engine;
1039 enum intel_engine_id id;
1040 struct igt_spinner spin;
1045 * First, let's check whether or not we are receiving interrupts.
1048 if (!intel_rps_has_interrupts(rps))
1051 intel_gt_pm_get(gt);
1052 pm_events = rps->pm_events;
1053 intel_gt_pm_put(gt);
1055 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1059 if (igt_spinner_init(&spin, gt))
1062 intel_gt_pm_wait_for_idle(gt);
1063 saved_work = rps->work.func;
1064 rps->work.func = dummy_rps_work;
1066 for_each_engine(engine, gt, id) {
1067 /* Keep the engine busy with a spinner; expect an UP! */
1068 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1069 unsigned long saved_heartbeat;
1071 intel_gt_pm_wait_for_idle(engine->gt);
1072 GEM_BUG_ON(intel_rps_is_active(rps));
1074 saved_heartbeat = engine_heartbeat_disable(engine);
1076 err = __rps_up_interrupt(rps, engine, &spin);
1078 engine_heartbeat_enable(engine, saved_heartbeat);
1082 intel_gt_pm_wait_for_idle(engine->gt);
1085 /* Keep the engine awake but idle and check for DOWN */
1086 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1087 unsigned long saved_heartbeat;
1089 saved_heartbeat = engine_heartbeat_disable(engine);
1090 intel_rc6_disable(>->rc6);
1092 err = __rps_down_interrupt(rps, engine);
1094 intel_rc6_enable(>->rc6);
1095 engine_heartbeat_enable(engine, saved_heartbeat);
1102 if (igt_flush_test(gt->i915))
1105 igt_spinner_fini(&spin);
1107 intel_gt_pm_wait_for_idle(gt);
1108 rps->work.func = saved_work;
1113 static u64 __measure_power(int duration_ms)
1118 dE = librapl_energy_uJ();
1119 usleep_range(1000 * duration_ms, 2000 * duration_ms);
1120 dE = librapl_energy_uJ() - dE;
1121 dt = ktime_get() - dt;
1123 return div64_u64(1000 * 1000 * dE, dt);
1126 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1131 *freq = rps_set_check(rps, *freq);
1132 for (i = 0; i < 5; i++)
1133 x[i] = __measure_power(5);
1134 *freq = (*freq + read_cagf(rps)) / 2;
1136 /* A simple triangle filter for better result stability */
1137 sort(x, 5, sizeof(*x), cmp_u64, NULL);
1138 return div_u64(x[1] + 2 * x[2] + x[3], 4);
1141 int live_rps_power(void *arg)
1143 struct intel_gt *gt = arg;
1144 struct intel_rps *rps = >->rps;
1145 void (*saved_work)(struct work_struct *wrk);
1146 struct intel_engine_cs *engine;
1147 enum intel_engine_id id;
1148 struct igt_spinner spin;
1152 * Our fundamental assumption is that running at lower frequency
1153 * actually saves power. Let's see if our RAPL measurement support
1157 if (!intel_rps_is_enabled(rps))
1160 if (!librapl_energy_uJ())
1163 if (igt_spinner_init(&spin, gt))
1166 intel_gt_pm_wait_for_idle(gt);
1167 saved_work = rps->work.func;
1168 rps->work.func = dummy_rps_work;
1170 for_each_engine(engine, gt, id) {
1171 unsigned long saved_heartbeat;
1172 struct i915_request *rq;
1178 if (!intel_engine_can_store_dword(engine))
1181 saved_heartbeat = engine_heartbeat_disable(engine);
1183 rq = igt_spinner_create_request(&spin,
1184 engine->kernel_context,
1187 engine_heartbeat_enable(engine, saved_heartbeat);
1192 i915_request_add(rq);
1194 if (!igt_wait_for_spinner(&spin, rq)) {
1195 pr_err("%s: RPS spinner did not start\n",
1197 igt_spinner_end(&spin);
1198 engine_heartbeat_enable(engine, saved_heartbeat);
1199 intel_gt_set_wedged(engine->gt);
1204 max.freq = rps->max_freq;
1205 max.power = measure_power_at(rps, &max.freq);
1207 min.freq = rps->min_freq;
1208 min.power = measure_power_at(rps, &min.freq);
1210 igt_spinner_end(&spin);
1211 engine_heartbeat_enable(engine, saved_heartbeat);
1213 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1215 min.power, intel_gpu_freq(rps, min.freq),
1216 max.power, intel_gpu_freq(rps, max.freq));
1218 if (10 * min.freq >= 9 * max.freq) {
1219 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1220 min.freq, intel_gpu_freq(rps, min.freq),
1221 max.freq, intel_gpu_freq(rps, max.freq));
1225 if (11 * min.power > 10 * max.power) {
1226 pr_err("%s: did not conserve power when setting lower frequency!\n",
1232 if (igt_flush_test(gt->i915)) {
1238 igt_spinner_fini(&spin);
1240 intel_gt_pm_wait_for_idle(gt);
1241 rps->work.func = saved_work;
1246 int live_rps_dynamic(void *arg)
1248 struct intel_gt *gt = arg;
1249 struct intel_rps *rps = >->rps;
1250 struct intel_engine_cs *engine;
1251 enum intel_engine_id id;
1252 struct igt_spinner spin;
1256 * We've looked at the bascs, and have established that we
1257 * can change the clock frequency and that the HW will generate
1258 * interrupts based on load. Now we check how we integrate those
1259 * moving parts into dynamic reclocking based on load.
1262 if (!intel_rps_is_enabled(rps))
1265 if (igt_spinner_init(&spin, gt))
1268 for_each_engine(engine, gt, id) {
1269 struct i915_request *rq;
1275 if (!intel_engine_can_store_dword(engine))
1278 intel_gt_pm_wait_for_idle(gt);
1279 GEM_BUG_ON(intel_rps_is_active(rps));
1280 rps->cur_freq = rps->min_freq;
1282 intel_engine_pm_get(engine);
1283 intel_rc6_disable(>->rc6);
1284 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1286 rq = igt_spinner_create_request(&spin,
1287 engine->kernel_context,
1294 i915_request_add(rq);
1296 max.dt = ktime_get();
1297 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1298 max.dt = ktime_sub(ktime_get(), max.dt);
1300 igt_spinner_end(&spin);
1302 min.dt = ktime_get();
1303 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1304 min.dt = ktime_sub(ktime_get(), min.dt);
1306 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1308 max.freq, intel_gpu_freq(rps, max.freq),
1309 ktime_to_ns(max.dt),
1310 min.freq, intel_gpu_freq(rps, min.freq),
1311 ktime_to_ns(min.dt));
1312 if (min.freq >= max.freq) {
1313 pr_err("%s: dynamic reclocking of spinner failed\n!",
1319 intel_rc6_enable(>->rc6);
1320 intel_engine_pm_put(engine);
1322 if (igt_flush_test(gt->i915))
1328 igt_spinner_fini(&spin);