1 // SPDX-License-Identifier: MIT
3 * Copyright © 2020 Intel Corporation
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt_pm.h"
13 #include "intel_rc6.h"
14 #include "selftest_rps.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_spinner.h"
17 #include "selftests/librapl.h"
19 /* Try to isolate the impact of cstates from determing frequency response */
20 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
22 static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine)
26 old = fetch_and_zero(&engine->props.heartbeat_interval_ms);
28 intel_engine_pm_get(engine);
29 intel_engine_park_heartbeat(engine);
34 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
37 intel_engine_pm_put(engine);
39 engine->props.heartbeat_interval_ms = saved;
42 static void dummy_rps_work(struct work_struct *wrk)
46 static int cmp_u64(const void *A, const void *B)
48 const u64 *a = A, *b = B;
58 static struct i915_vma *
59 create_spin_counter(struct intel_engine_cs *engine,
60 struct i915_address_space *vm,
70 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
71 struct drm_i915_gem_object *obj;
78 obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
82 end = obj->base.size / sizeof(u32) - 1;
84 vma = i915_vma_instance(obj, vm, NULL);
86 i915_gem_object_put(obj);
90 err = i915_vma_pin(vma, 0, 0, PIN_USER);
96 base = i915_gem_object_pin_map(obj, I915_MAP_WC);
98 i915_gem_object_put(obj);
99 return ERR_CAST(base);
103 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
104 for (i = 0; i < __NGPR__; i++) {
105 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
107 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
111 *cs++ = MI_LOAD_REGISTER_IMM(1);
112 *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
117 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
118 for (i = 0; i < 1024; i++) {
120 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
121 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
123 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
126 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
127 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
128 *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
129 *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
133 *cs++ = MI_BATCH_BUFFER_START_GEN8;
134 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
135 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
136 GEM_BUG_ON(cs - base > end);
138 i915_gem_object_flush_map(obj);
140 *cancel = base + loop;
141 *counter = srm ? memset32(base + end, 0, 1) : NULL;
145 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
152 memset(history, freq, sizeof(history));
155 /* The PCU does not change instantly, but drifts towards the goal? */
156 end = jiffies + msecs_to_jiffies(timeout_ms);
160 act = read_cagf(rps);
161 if (time_after(jiffies, end))
164 /* Target acquired */
168 /* Any change within the last N samples? */
169 if (!memchr_inv(history, act, sizeof(history)))
173 i = (i + 1) % ARRAY_SIZE(history);
175 usleep_range(sleep, 2 * sleep);
177 if (sleep > timeout_ms * 20)
178 sleep = timeout_ms * 20;
182 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
184 mutex_lock(&rps->lock);
185 GEM_BUG_ON(!rps->active);
186 intel_rps_set(rps, freq);
187 GEM_BUG_ON(rps->last_freq != freq);
188 mutex_unlock(&rps->lock);
190 return wait_for_freq(rps, freq, 50);
193 static void show_pstate_limits(struct intel_rps *rps)
195 struct drm_i915_private *i915 = rps_to_i915(rps);
197 if (IS_BROXTON(i915)) {
198 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
199 i915_mmio_reg_offset(BXT_RP_STATE_CAP),
200 intel_uncore_read(rps_to_uncore(rps),
202 } else if (IS_GEN(i915, 9)) {
203 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
204 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
205 intel_uncore_read(rps_to_uncore(rps),
206 GEN9_RP_STATE_LIMITS));
210 int live_rps_control(void *arg)
212 struct intel_gt *gt = arg;
213 struct intel_rps *rps = >->rps;
214 void (*saved_work)(struct work_struct *wrk);
215 struct intel_engine_cs *engine;
216 enum intel_engine_id id;
217 struct igt_spinner spin;
221 * Check that the actual frequency matches our requested frequency,
222 * to verify our control mechanism. We have to be careful that the
223 * PCU may throttle the GPU in which case the actual frequency used
224 * will be lowered than requested.
227 if (!rps->enabled || rps->max_freq <= rps->min_freq)
230 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
233 if (igt_spinner_init(&spin, gt))
236 intel_gt_pm_wait_for_idle(gt);
237 saved_work = rps->work.func;
238 rps->work.func = dummy_rps_work;
241 for_each_engine(engine, gt, id) {
242 unsigned long saved_heartbeat;
243 struct i915_request *rq;
244 ktime_t min_dt, max_dt;
248 if (!intel_engine_can_store_dword(engine))
251 saved_heartbeat = engine_heartbeat_disable(engine);
253 rq = igt_spinner_create_request(&spin,
254 engine->kernel_context,
261 i915_request_add(rq);
263 if (!igt_wait_for_spinner(&spin, rq)) {
264 pr_err("%s: RPS spinner did not start\n",
266 igt_spinner_end(&spin);
267 engine_heartbeat_enable(engine, saved_heartbeat);
268 intel_gt_set_wedged(engine->gt);
273 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
274 pr_err("%s: could not set minimum frequency [%x], only %x!\n",
275 engine->name, rps->min_freq, read_cagf(rps));
276 igt_spinner_end(&spin);
277 engine_heartbeat_enable(engine, saved_heartbeat);
278 show_pstate_limits(rps);
283 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
284 if (rps_set_check(rps, f) < f)
288 limit = rps_set_check(rps, f);
290 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
291 pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
292 engine->name, rps->min_freq, read_cagf(rps));
293 igt_spinner_end(&spin);
294 engine_heartbeat_enable(engine, saved_heartbeat);
295 show_pstate_limits(rps);
300 max_dt = ktime_get();
301 max = rps_set_check(rps, limit);
302 max_dt = ktime_sub(ktime_get(), max_dt);
304 min_dt = ktime_get();
305 min = rps_set_check(rps, rps->min_freq);
306 min_dt = ktime_sub(ktime_get(), min_dt);
308 igt_spinner_end(&spin);
309 engine_heartbeat_enable(engine, saved_heartbeat);
311 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
313 rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
314 rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
315 limit, intel_gpu_freq(rps, limit),
316 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
318 if (limit == rps->min_freq) {
319 pr_err("%s: GPU throttled to minimum!\n",
321 show_pstate_limits(rps);
326 if (igt_flush_test(gt->i915)) {
333 igt_spinner_fini(&spin);
335 intel_gt_pm_wait_for_idle(gt);
336 rps->work.func = saved_work;
341 static void show_pcu_config(struct intel_rps *rps)
343 struct drm_i915_private *i915 = rps_to_i915(rps);
344 unsigned int max_gpu_freq, min_gpu_freq;
345 intel_wakeref_t wakeref;
351 min_gpu_freq = rps->min_freq;
352 max_gpu_freq = rps->max_freq;
353 if (INTEL_GEN(i915) >= 9) {
354 /* Convert GT frequency to 50 HZ units */
355 min_gpu_freq /= GEN9_FREQ_SCALER;
356 max_gpu_freq /= GEN9_FREQ_SCALER;
359 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
361 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
362 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
363 int ia_freq = gpu_freq;
365 sandybridge_pcode_read(i915,
366 GEN6_PCODE_READ_MIN_FREQ_TABLE,
369 pr_info("%5d %5d %5d\n",
371 ((ia_freq >> 0) & 0xff) * 100,
372 ((ia_freq >> 8) & 0xff) * 100);
375 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
378 static u64 __measure_frequency(u32 *cntr, int duration_ms)
383 dc = READ_ONCE(*cntr);
384 usleep_range(1000 * duration_ms, 2000 * duration_ms);
385 dc = READ_ONCE(*cntr) - dc;
386 dt = ktime_get() - dt;
388 return div64_u64(1000 * 1000 * dc, dt);
391 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
396 *freq = rps_set_check(rps, *freq);
397 for (i = 0; i < 5; i++)
398 x[i] = __measure_frequency(cntr, 2);
399 *freq = (*freq + read_cagf(rps)) / 2;
401 /* A simple triangle filter for better result stability */
402 sort(x, 5, sizeof(*x), cmp_u64, NULL);
403 return div_u64(x[1] + 2 * x[2] + x[3], 4);
406 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
412 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
413 usleep_range(1000 * duration_ms, 2000 * duration_ms);
414 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
415 dt = ktime_get() - dt;
417 return div64_u64(1000 * 1000 * dc, dt);
420 static u64 measure_cs_frequency_at(struct intel_rps *rps,
421 struct intel_engine_cs *engine,
427 *freq = rps_set_check(rps, *freq);
428 for (i = 0; i < 5; i++)
429 x[i] = __measure_cs_frequency(engine, 2);
430 *freq = (*freq + read_cagf(rps)) / 2;
432 /* A simple triangle filter for better result stability */
433 sort(x, 5, sizeof(*x), cmp_u64, NULL);
434 return div_u64(x[1] + 2 * x[2] + x[3], 4);
437 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
439 return f_d * x > f_n * y && f_n * x < f_d * y;
442 int live_rps_frequency_cs(void *arg)
444 void (*saved_work)(struct work_struct *wrk);
445 struct intel_gt *gt = arg;
446 struct intel_rps *rps = >->rps;
447 struct intel_engine_cs *engine;
448 struct pm_qos_request qos;
449 enum intel_engine_id id;
453 * The premise is that the GPU does change freqency at our behest.
454 * Let's check there is a correspondence between the requested
455 * frequency, the actual frequency, and the observed clock rate.
458 if (!rps->enabled || rps->max_freq <= rps->min_freq)
461 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
464 if (CPU_LATENCY >= 0)
465 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
467 intel_gt_pm_wait_for_idle(gt);
468 saved_work = rps->work.func;
469 rps->work.func = dummy_rps_work;
471 for_each_engine(engine, gt, id) {
472 unsigned long saved_heartbeat;
473 struct i915_request *rq;
474 struct i915_vma *vma;
481 saved_heartbeat = engine_heartbeat_disable(engine);
483 vma = create_spin_counter(engine,
484 engine->kernel_context->vm, false,
488 engine_heartbeat_enable(engine, saved_heartbeat);
492 rq = intel_engine_create_kernel_request(engine);
499 err = i915_request_await_object(rq, vma->obj, false);
501 err = i915_vma_move_to_active(vma, rq, 0);
503 err = rq->engine->emit_bb_start(rq,
506 i915_vma_unlock(vma);
507 i915_request_add(rq);
511 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
513 pr_err("%s: timed loop did not start\n",
518 min.freq = rps->min_freq;
519 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
521 max.freq = rps->max_freq;
522 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
524 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
526 min.count, intel_gpu_freq(rps, min.freq),
527 max.count, intel_gpu_freq(rps, max.freq),
528 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
529 max.freq * min.count));
531 if (!scaled_within(max.freq * min.count,
532 min.freq * max.count,
536 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
538 max.freq * min.count,
539 min.freq * max.count);
540 show_pcu_config(rps);
542 for (f = min.freq + 1; f <= rps->max_freq; f++) {
546 count = measure_cs_frequency_at(rps, engine, &act);
550 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
552 act, intel_gpu_freq(rps, act), count,
553 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
556 f = act; /* may skip ahead [pcu granularity] */
563 *cancel = MI_BATCH_BUFFER_END;
564 i915_gem_object_unpin_map(vma->obj);
568 engine_heartbeat_enable(engine, saved_heartbeat);
569 if (igt_flush_test(gt->i915))
575 intel_gt_pm_wait_for_idle(gt);
576 rps->work.func = saved_work;
578 if (CPU_LATENCY >= 0)
579 cpu_latency_qos_remove_request(&qos);
584 int live_rps_frequency_srm(void *arg)
586 void (*saved_work)(struct work_struct *wrk);
587 struct intel_gt *gt = arg;
588 struct intel_rps *rps = >->rps;
589 struct intel_engine_cs *engine;
590 struct pm_qos_request qos;
591 enum intel_engine_id id;
595 * The premise is that the GPU does change freqency at our behest.
596 * Let's check there is a correspondence between the requested
597 * frequency, the actual frequency, and the observed clock rate.
600 if (!rps->enabled || rps->max_freq <= rps->min_freq)
603 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
606 if (CPU_LATENCY >= 0)
607 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
609 intel_gt_pm_wait_for_idle(gt);
610 saved_work = rps->work.func;
611 rps->work.func = dummy_rps_work;
613 for_each_engine(engine, gt, id) {
614 unsigned long saved_heartbeat;
615 struct i915_request *rq;
616 struct i915_vma *vma;
623 saved_heartbeat = engine_heartbeat_disable(engine);
625 vma = create_spin_counter(engine,
626 engine->kernel_context->vm, true,
630 engine_heartbeat_enable(engine, saved_heartbeat);
634 rq = intel_engine_create_kernel_request(engine);
641 err = i915_request_await_object(rq, vma->obj, false);
643 err = i915_vma_move_to_active(vma, rq, 0);
645 err = rq->engine->emit_bb_start(rq,
648 i915_vma_unlock(vma);
649 i915_request_add(rq);
653 if (wait_for(READ_ONCE(*cntr), 10)) {
654 pr_err("%s: timed loop did not start\n",
659 min.freq = rps->min_freq;
660 min.count = measure_frequency_at(rps, cntr, &min.freq);
662 max.freq = rps->max_freq;
663 max.count = measure_frequency_at(rps, cntr, &max.freq);
665 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
667 min.count, intel_gpu_freq(rps, min.freq),
668 max.count, intel_gpu_freq(rps, max.freq),
669 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
670 max.freq * min.count));
672 if (!scaled_within(max.freq * min.count,
673 min.freq * max.count,
677 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
679 max.freq * min.count,
680 min.freq * max.count);
681 show_pcu_config(rps);
683 for (f = min.freq + 1; f <= rps->max_freq; f++) {
687 count = measure_frequency_at(rps, cntr, &act);
691 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
693 act, intel_gpu_freq(rps, act), count,
694 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
697 f = act; /* may skip ahead [pcu granularity] */
704 *cancel = MI_BATCH_BUFFER_END;
705 i915_gem_object_unpin_map(vma->obj);
709 engine_heartbeat_enable(engine, saved_heartbeat);
710 if (igt_flush_test(gt->i915))
716 intel_gt_pm_wait_for_idle(gt);
717 rps->work.func = saved_work;
719 if (CPU_LATENCY >= 0)
720 cpu_latency_qos_remove_request(&qos);
725 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
727 /* Flush any previous EI */
728 usleep_range(timeout_us, 2 * timeout_us);
730 /* Reset the interrupt status */
731 rps_disable_interrupts(rps);
732 GEM_BUG_ON(rps->pm_iir);
733 rps_enable_interrupts(rps);
735 /* And then wait for the timeout, for real this time */
736 usleep_range(2 * timeout_us, 3 * timeout_us);
739 static int __rps_up_interrupt(struct intel_rps *rps,
740 struct intel_engine_cs *engine,
741 struct igt_spinner *spin)
743 struct intel_uncore *uncore = engine->uncore;
744 struct i915_request *rq;
747 if (!intel_engine_can_store_dword(engine))
750 rps_set_check(rps, rps->min_freq);
752 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
756 i915_request_get(rq);
757 i915_request_add(rq);
759 if (!igt_wait_for_spinner(spin, rq)) {
760 pr_err("%s: RPS spinner did not start\n",
762 i915_request_put(rq);
763 intel_gt_set_wedged(engine->gt);
768 pr_err("%s: RPS not enabled on starting spinner\n",
770 igt_spinner_end(spin);
771 i915_request_put(rq);
775 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
776 pr_err("%s: RPS did not register UP interrupt\n",
778 i915_request_put(rq);
782 if (rps->last_freq != rps->min_freq) {
783 pr_err("%s: RPS did not program min frequency\n",
785 i915_request_put(rq);
789 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
790 timeout = GT_PM_INTERVAL_TO_US(engine->i915, timeout);
792 sleep_for_ei(rps, timeout);
793 GEM_BUG_ON(i915_request_completed(rq));
795 igt_spinner_end(spin);
796 i915_request_put(rq);
798 if (rps->cur_freq != rps->min_freq) {
799 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
800 engine->name, intel_rps_read_actual_frequency(rps));
804 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
805 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
806 engine->name, rps->pm_iir,
807 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
808 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
809 intel_uncore_read(uncore, GEN6_RP_UP_EI));
816 static int __rps_down_interrupt(struct intel_rps *rps,
817 struct intel_engine_cs *engine)
819 struct intel_uncore *uncore = engine->uncore;
822 rps_set_check(rps, rps->max_freq);
824 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
825 pr_err("%s: RPS did not register DOWN interrupt\n",
830 if (rps->last_freq != rps->max_freq) {
831 pr_err("%s: RPS did not program max frequency\n",
836 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
837 timeout = GT_PM_INTERVAL_TO_US(engine->i915, timeout);
839 sleep_for_ei(rps, timeout);
841 if (rps->cur_freq != rps->max_freq) {
842 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
844 intel_rps_read_actual_frequency(rps));
848 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
849 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
850 engine->name, rps->pm_iir,
851 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
852 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
853 intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
854 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
855 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
856 intel_uncore_read(uncore, GEN6_RP_UP_EI));
863 int live_rps_interrupt(void *arg)
865 struct intel_gt *gt = arg;
866 struct intel_rps *rps = >->rps;
867 void (*saved_work)(struct work_struct *wrk);
868 struct intel_engine_cs *engine;
869 enum intel_engine_id id;
870 struct igt_spinner spin;
875 * First, let's check whether or not we are receiving interrupts.
878 if (!rps->enabled || rps->max_freq <= rps->min_freq)
882 pm_events = rps->pm_events;
885 pr_err("No RPS PM events registered, but RPS is enabled?\n");
889 if (igt_spinner_init(&spin, gt))
892 intel_gt_pm_wait_for_idle(gt);
893 saved_work = rps->work.func;
894 rps->work.func = dummy_rps_work;
896 for_each_engine(engine, gt, id) {
897 /* Keep the engine busy with a spinner; expect an UP! */
898 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
899 unsigned long saved_heartbeat;
901 intel_gt_pm_wait_for_idle(engine->gt);
902 GEM_BUG_ON(rps->active);
904 saved_heartbeat = engine_heartbeat_disable(engine);
906 err = __rps_up_interrupt(rps, engine, &spin);
908 engine_heartbeat_enable(engine, saved_heartbeat);
912 intel_gt_pm_wait_for_idle(engine->gt);
915 /* Keep the engine awake but idle and check for DOWN */
916 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
917 unsigned long saved_heartbeat;
919 saved_heartbeat = engine_heartbeat_disable(engine);
920 intel_rc6_disable(>->rc6);
922 err = __rps_down_interrupt(rps, engine);
924 intel_rc6_enable(>->rc6);
925 engine_heartbeat_enable(engine, saved_heartbeat);
932 if (igt_flush_test(gt->i915))
935 igt_spinner_fini(&spin);
937 intel_gt_pm_wait_for_idle(gt);
938 rps->work.func = saved_work;
943 static u64 __measure_power(int duration_ms)
948 dE = librapl_energy_uJ();
949 usleep_range(1000 * duration_ms, 2000 * duration_ms);
950 dE = librapl_energy_uJ() - dE;
951 dt = ktime_get() - dt;
953 return div64_u64(1000 * 1000 * dE, dt);
956 static u64 measure_power_at(struct intel_rps *rps, int *freq)
961 *freq = rps_set_check(rps, *freq);
962 for (i = 0; i < 5; i++)
963 x[i] = __measure_power(5);
964 *freq = (*freq + read_cagf(rps)) / 2;
966 /* A simple triangle filter for better result stability */
967 sort(x, 5, sizeof(*x), cmp_u64, NULL);
968 return div_u64(x[1] + 2 * x[2] + x[3], 4);
971 int live_rps_power(void *arg)
973 struct intel_gt *gt = arg;
974 struct intel_rps *rps = >->rps;
975 void (*saved_work)(struct work_struct *wrk);
976 struct intel_engine_cs *engine;
977 enum intel_engine_id id;
978 struct igt_spinner spin;
982 * Our fundamental assumption is that running at lower frequency
983 * actually saves power. Let's see if our RAPL measurement support
987 if (!rps->enabled || rps->max_freq <= rps->min_freq)
990 if (!librapl_energy_uJ())
993 if (igt_spinner_init(&spin, gt))
996 intel_gt_pm_wait_for_idle(gt);
997 saved_work = rps->work.func;
998 rps->work.func = dummy_rps_work;
1000 for_each_engine(engine, gt, id) {
1001 unsigned long saved_heartbeat;
1002 struct i915_request *rq;
1008 if (!intel_engine_can_store_dword(engine))
1011 saved_heartbeat = engine_heartbeat_disable(engine);
1013 rq = igt_spinner_create_request(&spin,
1014 engine->kernel_context,
1017 engine_heartbeat_enable(engine, saved_heartbeat);
1022 i915_request_add(rq);
1024 if (!igt_wait_for_spinner(&spin, rq)) {
1025 pr_err("%s: RPS spinner did not start\n",
1027 igt_spinner_end(&spin);
1028 engine_heartbeat_enable(engine, saved_heartbeat);
1029 intel_gt_set_wedged(engine->gt);
1034 max.freq = rps->max_freq;
1035 max.power = measure_power_at(rps, &max.freq);
1037 min.freq = rps->min_freq;
1038 min.power = measure_power_at(rps, &min.freq);
1040 igt_spinner_end(&spin);
1041 engine_heartbeat_enable(engine, saved_heartbeat);
1043 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1045 min.power, intel_gpu_freq(rps, min.freq),
1046 max.power, intel_gpu_freq(rps, max.freq));
1048 if (10 * min.freq >= 9 * max.freq) {
1049 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1050 min.freq, intel_gpu_freq(rps, min.freq),
1051 max.freq, intel_gpu_freq(rps, max.freq));
1055 if (11 * min.power > 10 * max.power) {
1056 pr_err("%s: did not conserve power when setting lower frequency!\n",
1062 if (igt_flush_test(gt->i915)) {
1068 igt_spinner_fini(&spin);
1070 intel_gt_pm_wait_for_idle(gt);
1071 rps->work.func = saved_work;
1076 int live_rps_dynamic(void *arg)
1078 struct intel_gt *gt = arg;
1079 struct intel_rps *rps = >->rps;
1080 struct intel_engine_cs *engine;
1081 enum intel_engine_id id;
1082 struct igt_spinner spin;
1086 * We've looked at the bascs, and have established that we
1087 * can change the clock frequency and that the HW will generate
1088 * interrupts based on load. Now we check how we integrate those
1089 * moving parts into dynamic reclocking based on load.
1092 if (!rps->enabled || rps->max_freq <= rps->min_freq)
1095 if (igt_spinner_init(&spin, gt))
1098 for_each_engine(engine, gt, id) {
1099 struct i915_request *rq;
1105 if (!intel_engine_can_store_dword(engine))
1108 intel_gt_pm_wait_for_idle(gt);
1109 GEM_BUG_ON(rps->active);
1110 rps->cur_freq = rps->min_freq;
1112 intel_engine_pm_get(engine);
1113 intel_rc6_disable(>->rc6);
1114 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1116 rq = igt_spinner_create_request(&spin,
1117 engine->kernel_context,
1124 i915_request_add(rq);
1126 max.dt = ktime_get();
1127 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1128 max.dt = ktime_sub(ktime_get(), max.dt);
1130 igt_spinner_end(&spin);
1132 min.dt = ktime_get();
1133 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1134 min.dt = ktime_sub(ktime_get(), min.dt);
1136 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1138 max.freq, intel_gpu_freq(rps, max.freq),
1139 ktime_to_ns(max.dt),
1140 min.freq, intel_gpu_freq(rps, min.freq),
1141 ktime_to_ns(min.dt));
1142 if (min.freq >= max.freq) {
1143 pr_err("%s: dynamic reclocking of spinner failed\n!",
1149 intel_rc6_enable(>->rc6);
1150 intel_engine_pm_put(engine);
1152 if (igt_flush_test(gt->i915))
1158 igt_spinner_fini(&spin);