drm/i915: Workaround async flip + VT-d corruption on HSW/BDW
authorVille Syrjälä <ville.syrjala@linux.intel.com>
Sat, 20 Feb 2021 10:33:03 +0000 (12:33 +0200)
committerJani Nikula <jani.nikula@intel.com>
Wed, 17 Mar 2021 10:51:29 +0000 (12:51 +0200)
On HSW/BDW with VT-d active the first tile row scanned out
after the first async flip of the frame often ends up corrupted.

Whether the corruption happens or not depends on the scanline
on which the async flip happens, but the behaviour seems very
consistent. Ie. the same set of scanlines (which are most scanlines)
always show the corruption. And another set of scanlines (far less
of them) never shows the corruption.

I discovered that disabling the fetch-stride stretching
feature cures the corruption. This is some kind of TLB related
prefetch thing AFAIK. We already disable it on SNB primary
planes due to a documented workaround. The hardware folks
indicated that disabling this should be fine, so let's go
with that.

And while we're here, let's document the relevant bits on all
pre-skl platforms.

Fixes: 2a636e240c77 ("drm/i915: Implement async flip for ivb/hsw")
Fixes: cda195f13abd ("drm/i915: Implement async flips for bdw")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210220103303.3448-1-ville.syrjala@linux.intel.com
Reviewed-by: Karthik B S <karthik.b.s@intel.com>
(cherry picked from commit b7a7053ab2ec558b8ae4e55f62ea8f1f58e14f5c)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_pm.c

index 7146cd0..aaf1f00 100644 (file)
@@ -3316,7 +3316,18 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 
 #define ILK_DISPLAY_CHICKEN1   _MMIO(0x42000)
 #define   ILK_FBCQ_DIS         (1 << 22)
-#define          ILK_PABSTRETCH_DIS    (1 << 21)
+#define   ILK_PABSTRETCH_DIS   REG_BIT(21)
+#define   ILK_SABSTRETCH_DIS   REG_BIT(20)
+#define   IVB_PRI_STRETCH_MAX_MASK     REG_GENMASK(21, 20)
+#define   IVB_PRI_STRETCH_MAX_X8       REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 0)
+#define   IVB_PRI_STRETCH_MAX_X4       REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 1)
+#define   IVB_PRI_STRETCH_MAX_X2       REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 2)
+#define   IVB_PRI_STRETCH_MAX_X1       REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 3)
+#define   IVB_SPR_STRETCH_MAX_MASK     REG_GENMASK(19, 18)
+#define   IVB_SPR_STRETCH_MAX_X8       REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 0)
+#define   IVB_SPR_STRETCH_MAX_X4       REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 1)
+#define   IVB_SPR_STRETCH_MAX_X2       REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 2)
+#define   IVB_SPR_STRETCH_MAX_X1       REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 3)
 
 
 /*
@@ -8039,6 +8050,16 @@ enum {
 
 #define _CHICKEN_PIPESL_1_A    0x420b0
 #define _CHICKEN_PIPESL_1_B    0x420b4
+#define  HSW_PRI_STRETCH_MAX_MASK      REG_GENMASK(28, 27)
+#define  HSW_PRI_STRETCH_MAX_X8                REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 0)
+#define  HSW_PRI_STRETCH_MAX_X4                REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 1)
+#define  HSW_PRI_STRETCH_MAX_X2                REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 2)
+#define  HSW_PRI_STRETCH_MAX_X1                REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 3)
+#define  HSW_SPR_STRETCH_MAX_MASK      REG_GENMASK(26, 25)
+#define  HSW_SPR_STRETCH_MAX_X8                REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 0)
+#define  HSW_SPR_STRETCH_MAX_X4                REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 1)
+#define  HSW_SPR_STRETCH_MAX_X2                REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 2)
+#define  HSW_SPR_STRETCH_MAX_X1                REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 3)
 #define  HSW_FBCQ_DIS                  (1 << 22)
 #define  BDW_DPRS_MASK_VBLANK_SRD      (1 << 0)
 #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B)
index 0c3e63f..97b57ac 100644 (file)
@@ -7245,11 +7245,16 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
        intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1,
                   intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
 
-       /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
        for_each_pipe(dev_priv, pipe) {
+               /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
                intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe),
                           intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe)) |
                           BDW_DPRS_MASK_VBLANK_SRD);
+
+               /* Undocumented but fixes async flip + VT-d corruption */
+               if (intel_vtd_active())
+                       intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe),
+                                        HSW_PRI_STRETCH_MAX_MASK, HSW_PRI_STRETCH_MAX_X1);
        }
 
        /* WaVSRefCountFullforceMissDisable:bdw */
@@ -7285,11 +7290,20 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
 
 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
 {
+       enum pipe pipe;
+
        /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */
        intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A),
                   intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A)) |
                   HSW_FBCQ_DIS);
 
+       for_each_pipe(dev_priv, pipe) {
+               /* Undocumented but fixes async flip + VT-d corruption */
+               if (intel_vtd_active())
+                       intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe),
+                                        HSW_PRI_STRETCH_MAX_MASK, HSW_PRI_STRETCH_MAX_X1);
+       }
+
        /* This is required by WaCatErrorRejectionIssue:hsw */
        intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
                   intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |