Merge branches 'clk-range', 'clk-uniphier', 'clk-apple' and 'clk-qcom' into clk-next

[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / intel_gt.c
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c

index f98f0fb..35d0fcd 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -29,6 +29,8 @@ void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
  {
         spin_lock_init(&gt->irq_lock);
  
+       mutex_init(&gt->tlb_invalidate_lock);
+
         INIT_LIST_HEAD(&gt->closed_vma);
         spin_lock_init(&gt->closed_lock);
  
@@ -912,3 +914,109 @@ void intel_gt_info_print(const struct intel_gt_info *info,
  
         intel_sseu_dump(&info->sseu, p);
  }
+
+struct reg_and_bit {
+       i915_reg_t reg;
+       u32 bit;
+};
+
+static struct reg_and_bit
+get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
+               const i915_reg_t *regs, const unsigned int num)
+{
+       const unsigned int class = engine->class;
+       struct reg_and_bit rb = { };
+
+       if (drm_WARN_ON_ONCE(&engine->i915->drm,
+                            class >= num || !regs[class].reg))
+               return rb;
+
+       rb.reg = regs[class];
+       if (gen8 && class == VIDEO_DECODE_CLASS)
+               rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+       else
+               rb.bit = engine->instance;
+
+       rb.bit = BIT(rb.bit);
+
+       return rb;
+}
+
+void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+{
+       static const i915_reg_t gen8_regs[] = {
+               [RENDER_CLASS]                  = GEN8_RTCR,
+               [VIDEO_DECODE_CLASS]            = GEN8_M1TCR, /* , GEN8_M2TCR */
+               [VIDEO_ENHANCEMENT_CLASS]       = GEN8_VTCR,
+               [COPY_ENGINE_CLASS]             = GEN8_BTCR,
+       };
+       static const i915_reg_t gen12_regs[] = {
+               [RENDER_CLASS]                  = GEN12_GFX_TLB_INV_CR,
+               [VIDEO_DECODE_CLASS]            = GEN12_VD_TLB_INV_CR,
+               [VIDEO_ENHANCEMENT_CLASS]       = GEN12_VE_TLB_INV_CR,
+               [COPY_ENGINE_CLASS]             = GEN12_BLT_TLB_INV_CR,
+       };
+       struct drm_i915_private *i915 = gt->i915;
+       struct intel_uncore *uncore = gt->uncore;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       const i915_reg_t *regs;
+       unsigned int num = 0;
+
+       if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+               return;
+
+       if (GRAPHICS_VER(i915) == 12) {
+               regs = gen12_regs;
+               num = ARRAY_SIZE(gen12_regs);
+       } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
+               regs = gen8_regs;
+               num = ARRAY_SIZE(gen8_regs);
+       } else if (GRAPHICS_VER(i915) < 8) {
+               return;
+       }
+
+       if (drm_WARN_ONCE(&i915->drm, !num,
+                         "Platform does not implement TLB invalidation!"))
+               return;
+
+       GEM_TRACE("\n");
+
+       assert_rpm_wakelock_held(&i915->runtime_pm);
+
+       mutex_lock(&gt->tlb_invalidate_lock);
+       intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+       for_each_engine(engine, gt, id) {
+               /*
+                * HW architecture suggest typical invalidation time at 40us,
+                * with pessimistic cases up to 100us and a recommendation to
+                * cap at 1ms. We go a bit higher just in case.
+                */
+               const unsigned int timeout_us = 100;
+               const unsigned int timeout_ms = 4;
+               struct reg_and_bit rb;
+
+               rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+               if (!i915_mmio_reg_offset(rb.reg))
+                       continue;
+
+               intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+               if (__intel_wait_for_register_fw(uncore,
+                                                rb.reg, rb.bit, 0,
+                                                timeout_us, timeout_ms,
+                                                NULL))
+                       drm_err_ratelimited(&gt->i915->drm,
+                                           "%s TLB invalidation did not complete in %ums!\n",
+                                           engine->name, timeout_ms);
+       }
+
+       /*
+        * Use delayed put since a) we mostly expect a flurry of TLB
+        * invalidations so it is good to avoid paying the forcewake cost and
+        * b) it works around a bug in Icelake which cannot cope with too rapid
+        * transitions.
+        */
+       intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+       mutex_unlock(&gt->tlb_invalidate_lock);
+}