drm/i915: Kill intel_flush_primary_plane()
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / intel_pm.c
index fa4ccb3..148ebf5 100644 (file)
@@ -67,7 +67,7 @@ static void skl_init_clock_gating(struct drm_device *dev)
 
        gen9_init_clock_gating(dev);
 
-       if (INTEL_REVID(dev) == SKL_REVID_A0) {
+       if (INTEL_REVID(dev) <= SKL_REVID_B0) {
                /*
                 * WaDisableSDEUnitClockGating:skl
                 * WaSetGAPSunitClckGateDisable:skl
@@ -75,6 +75,10 @@ static void skl_init_clock_gating(struct drm_device *dev)
                I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
                           GEN8_GAPSUNIT_CLOCK_GATE_DISABLE |
                           GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+               /* WaDisableVFUnitClockGating:skl */
+               I915_WRITE(GEN6_UCGCTL2, I915_READ(GEN6_UCGCTL2) |
+                          GEN6_VFUNIT_CLOCK_GATE_DISABLE);
        }
 
        if (INTEL_REVID(dev) <= SKL_REVID_D0) {
@@ -84,8 +88,7 @@ static void skl_init_clock_gating(struct drm_device *dev)
 
                /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
                I915_WRITE(FF_SLICE_CS_CHICKEN2,
-                          I915_READ(FF_SLICE_CS_CHICKEN2) |
-                          GEN9_TSG_BARRIER_ACK_DISABLE);
+                          _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
        }
 
        if (INTEL_REVID(dev) <= SKL_REVID_E0)
@@ -94,6 +97,26 @@ static void skl_init_clock_gating(struct drm_device *dev)
                           GEN8_LQSC_RO_PERF_DIS);
 }
 
+static void bxt_init_clock_gating(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       gen9_init_clock_gating(dev);
+
+       /*
+        * FIXME:
+        * GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only.
+        * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
+        */
+        /* WaDisableSDEUnitClockGating:bxt */
+       I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+                  GEN8_SDEUNIT_CLOCK_GATE_DISABLE |
+                  GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
+
+       /* FIXME: apply on A0 only */
+       I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
+}
+
 static void i915_pineview_get_mem_freq(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1792,7 +1815,7 @@ hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
        linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
                                     mode->crtc_clock);
        ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
-                                        intel_ddi_get_cdclk_freq(dev_priv));
+                                        dev_priv->display.get_display_clock_speed(dev_priv->dev));
 
        return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
               PIPE_WM_LINETIME_TIME(linetime);
@@ -1923,7 +1946,7 @@ static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
 int ilk_wm_max_level(const struct drm_device *dev)
 {
        /* how many WM levels are we expecting */
-       if (IS_GEN9(dev))
+       if (INTEL_INFO(dev)->gen >= 9)
                return 7;
        else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
                return 4;
@@ -2538,6 +2561,7 @@ static bool ilk_disable_lp_wm(struct drm_device *dev)
  */
 
 #define SKL_DDB_SIZE           896     /* in blocks */
+#define BXT_DDB_SIZE           512
 
 static void
 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
@@ -2556,7 +2580,10 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
                return;
        }
 
-       ddb_size = SKL_DDB_SIZE;
+       if (IS_BROXTON(dev))
+               ddb_size = BXT_DDB_SIZE;
+       else
+               ddb_size = SKL_DDB_SIZE;
 
        ddb_size -= 4; /* 4 blocks for bypass path allocation */
 
@@ -2612,8 +2639,18 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 }
 
 static unsigned int
-skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p)
+skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
 {
+
+       /* for planar format */
+       if (p->y_bytes_per_pixel) {
+               if (y)  /* y-plane data rate */
+                       return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
+               else    /* uv-plane data rate */
+                       return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
+       }
+
+       /* for packed formats */
        return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
 }
 
@@ -2636,7 +2673,10 @@ skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
                if (!p->enabled)
                        continue;
 
-               total_data_rate += skl_plane_relative_data_rate(p);
+               total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
+               if (p->y_bytes_per_pixel) {
+                       total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
+               }
        }
 
        return total_data_rate;
@@ -2655,6 +2695,7 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
        struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
        uint16_t alloc_size, start, cursor_blocks;
        uint16_t minimum[I915_MAX_PLANES];
+       uint16_t y_minimum[I915_MAX_PLANES];
        unsigned int total_data_rate;
        int plane;
 
@@ -2683,6 +2724,8 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
 
                minimum[plane] = 8;
                alloc_size -= minimum[plane];
+               y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
+               alloc_size -= y_minimum[plane];
        }
 
        /*
@@ -2696,16 +2739,17 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
        start = alloc->start;
        for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
                const struct intel_plane_wm_parameters *p;
-               unsigned int data_rate;
-               uint16_t plane_blocks;
+               unsigned int data_rate, y_data_rate;
+               uint16_t plane_blocks, y_plane_blocks = 0;
 
                p = &params->plane[plane];
                if (!p->enabled)
                        continue;
 
-               data_rate = skl_plane_relative_data_rate(p);
+               data_rate = skl_plane_relative_data_rate(p, 0);
 
                /*
+                * allocation for (packed formats) or (uv-plane part of planar format):
                 * promote the expression to 64 bits to avoid overflowing, the
                 * result is < available as data_rate / total_data_rate < 1
                 */
@@ -2717,6 +2761,22 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
                ddb->plane[pipe][plane].end = start + plane_blocks;
 
                start += plane_blocks;
+
+               /*
+                * allocation for y_plane part of planar format:
+                */
+               if (p->y_bytes_per_pixel) {
+                       y_data_rate = skl_plane_relative_data_rate(p, 1);
+                       y_plane_blocks = y_minimum[plane];
+                       y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
+                                               total_data_rate);
+
+                       ddb->y_plane[pipe][plane].start = start;
+                       ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
+
+                       start += y_plane_blocks;
+               }
+
        }
 
 }
@@ -2829,13 +2889,18 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
                p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
 
                fb = crtc->primary->state->fb;
+               /* For planar: Bpp is for uv plane, y_Bpp is for y plane */
                if (fb) {
                        p->plane[0].enabled = true;
-                       p->plane[0].bytes_per_pixel = fb->bits_per_pixel / 8;
+                       p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
+                               drm_format_plane_cpp(fb->pixel_format, 1) : fb->bits_per_pixel / 8;
+                       p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
+                               drm_format_plane_cpp(fb->pixel_format, 0) : 0;
                        p->plane[0].tiling = fb->modifier[0];
                } else {
                        p->plane[0].enabled = false;
                        p->plane[0].bytes_per_pixel = 0;
+                       p->plane[0].y_bytes_per_pixel = 0;
                        p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
                }
                p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
@@ -2843,6 +2908,7 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
                p->plane[0].rotation = crtc->primary->state->rotation;
 
                fb = crtc->cursor->state->fb;
+               p->cursor.y_bytes_per_pixel = 0;
                if (fb) {
                        p->cursor.enabled = true;
                        p->cursor.bytes_per_pixel = fb->bits_per_pixel / 8;
@@ -2878,22 +2944,25 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
        uint32_t plane_bytes_per_line, plane_blocks_per_line;
        uint32_t res_blocks, res_lines;
        uint32_t selected_result;
+       uint8_t bytes_per_pixel;
 
        if (latency == 0 || !p->active || !p_params->enabled)
                return false;
 
+       bytes_per_pixel = p_params->y_bytes_per_pixel ?
+               p_params->y_bytes_per_pixel :
+               p_params->bytes_per_pixel;
        method1 = skl_wm_method1(p->pixel_rate,
-                                p_params->bytes_per_pixel,
+                                bytes_per_pixel,
                                 latency);
        method2 = skl_wm_method2(p->pixel_rate,
                                 p->pipe_htotal,
                                 p_params->horiz_pixels,
-                                p_params->bytes_per_pixel,
+                                bytes_per_pixel,
                                 p_params->tiling,
                                 latency);
 
-       plane_bytes_per_line = p_params->horiz_pixels *
-                                       p_params->bytes_per_pixel;
+       plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
        plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
 
        if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
@@ -3110,10 +3179,14 @@ static void skl_write_wm_values(struct drm_i915_private *dev_priv,
                                   new->plane_trans[pipe][i]);
                I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]);
 
-               for (i = 0; i < intel_num_planes(crtc); i++)
+               for (i = 0; i < intel_num_planes(crtc); i++) {
                        skl_ddb_entry_write(dev_priv,
                                            PLANE_BUF_CFG(pipe, i),
                                            &new->ddb.plane[pipe][i]);
+                       skl_ddb_entry_write(dev_priv,
+                                           PLANE_NV12_BUF_CFG(pipe, i),
+                                           &new->ddb.y_plane[pipe][i]);
+               }
 
                skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
                                    &new->ddb.cursor[pipe]);
@@ -3178,7 +3251,7 @@ static void skl_flush_wm_values(struct drm_i915_private *dev_priv,
 {
        struct drm_device *dev = dev_priv->dev;
        struct skl_ddb_allocation *cur_ddb, *new_ddb;
-       bool reallocated[I915_MAX_PIPES] = {false, false, false};
+       bool reallocated[I915_MAX_PIPES] = {};
        struct intel_crtc *crtc;
        enum pipe pipe;
 
@@ -3271,6 +3344,7 @@ static bool skl_update_pipe_wm(struct drm_crtc *crtc,
                return false;
 
        intel_crtc->wm.skl_active = *pipe_wm;
+
        return true;
 }
 
@@ -3364,8 +3438,16 @@ skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
        intel_plane->wm.scaled = scaled;
        intel_plane->wm.horiz_pixels = sprite_width;
        intel_plane->wm.vert_pixels = sprite_height;
-       intel_plane->wm.bytes_per_pixel = pixel_size;
        intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
+
+       /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
+       intel_plane->wm.bytes_per_pixel =
+               (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
+               drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
+       intel_plane->wm.y_bytes_per_pixel =
+               (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
+               drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
+
        /*
         * Framebuffer can be NULL on plane disable, but it does not
         * matter for watermarks if we assume no tiling in that case.
@@ -3930,6 +4012,8 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
                    GEN6_RP_DOWN_IDLE_AVG);
 
        dev_priv->rps.power = new_power;
+       dev_priv->rps.up_threshold = threshold_up;
+       dev_priv->rps.down_threshold = threshold_down;
        dev_priv->rps.last_adj = 0;
 }
 
@@ -4001,8 +4085,11 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
                      "Odd GPU freq value\n"))
                val &= ~1;
 
-       if (val != dev_priv->rps.cur_freq)
+       if (val != dev_priv->rps.cur_freq) {
                vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+               if (!IS_CHERRYVIEW(dev_priv))
+                       gen6_set_rps_thresholds(dev_priv, val);
+       }
 
        I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
@@ -4010,50 +4097,25 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
        trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
 }
 
-/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down
+/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
  *
  * * If Gfx is Idle, then
- * 1. Mask Turbo interrupts
- * 2. Bring up Gfx clock
- * 3. Change the freq to Rpn and wait till P-Unit updates freq
- * 4. Clear the Force GFX CLK ON bit so that Gfx can down
- * 5. Unmask Turbo interrupts
+ * 1. Forcewake Media well.
+ * 2. Request idle freq.
+ * 3. Release Forcewake of Media well.
 */
 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 {
-       struct drm_device *dev = dev_priv->dev;
        u32 val = dev_priv->rps.idle_freq;
 
-       /* CHV and latest VLV don't need to force the gfx clock */
-       if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) {
-               valleyview_set_rps(dev_priv->dev, val);
-               return;
-       }
-
-       /*
-        * When we are idle.  Drop to min voltage state.
-        */
-
        if (dev_priv->rps.cur_freq <= val)
                return;
 
-       /* Mask turbo interrupt so that they will not come in between */
-       I915_WRITE(GEN6_PMINTRMSK,
-                  gen6_sanitize_rps_pm_mask(dev_priv, ~0));
-
-       vlv_force_gfx_clock(dev_priv, true);
-
-       dev_priv->rps.cur_freq = val;
-
-       vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
-
-       if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
-                               & GENFREQSTATUS) == 0, 100))
-               DRM_ERROR("timed out waiting for Punit\n");
-
-       vlv_force_gfx_clock(dev_priv, false);
-
-       I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+       /* Wake up the media well, as that takes a lot less
+        * power than the Render well. */
+       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
+       valleyview_set_rps(dev_priv->dev, val);
+       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
 }
 
 void gen6_rps_busy(struct drm_i915_private *dev_priv)
@@ -4082,21 +4144,47 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
                I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
        }
        mutex_unlock(&dev_priv->rps.hw_lock);
+
+       spin_lock(&dev_priv->rps.client_lock);
+       while (!list_empty(&dev_priv->rps.clients))
+               list_del_init(dev_priv->rps.clients.next);
+       spin_unlock(&dev_priv->rps.client_lock);
 }
 
-void gen6_rps_boost(struct drm_i915_private *dev_priv)
+void gen6_rps_boost(struct drm_i915_private *dev_priv,
+                   struct intel_rps_client *rps,
+                   unsigned long submitted)
 {
-       u32 val;
+       /* This is intentionally racy! We peek at the state here, then
+        * validate inside the RPS worker.
+        */
+       if (!(dev_priv->mm.busy &&
+             dev_priv->rps.enabled &&
+             dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit))
+               return;
 
-       mutex_lock(&dev_priv->rps.hw_lock);
-       val = dev_priv->rps.max_freq_softlimit;
-       if (dev_priv->rps.enabled &&
-           dev_priv->mm.busy &&
-           dev_priv->rps.cur_freq < val) {
-               intel_set_rps(dev_priv->dev, val);
-               dev_priv->rps.last_adj = 0;
+       /* Force a RPS boost (and don't count it against the client) if
+        * the GPU is severely congested.
+        */
+       if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
+               rps = NULL;
+
+       spin_lock(&dev_priv->rps.client_lock);
+       if (rps == NULL || list_empty(&rps->link)) {
+               spin_lock_irq(&dev_priv->irq_lock);
+               if (dev_priv->rps.interrupts_enabled) {
+                       dev_priv->rps.client_boost = true;
+                       queue_work(dev_priv->wq, &dev_priv->rps.work);
+               }
+               spin_unlock_irq(&dev_priv->irq_lock);
+
+               if (rps != NULL) {
+                       list_add(&rps->link, &dev_priv->rps.clients);
+                       rps->boosts++;
+               } else
+                       dev_priv->rps.boosts++;
        }
-       mutex_unlock(&dev_priv->rps.hw_lock);
+       spin_unlock(&dev_priv->rps.client_lock);
 }
 
 void intel_set_rps(struct drm_device *dev, u8 val)
@@ -4250,8 +4338,8 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)
        if (dev_priv->rps.min_freq_softlimit == 0) {
                if (IS_HASWELL(dev) || IS_BROADWELL(dev))
                        dev_priv->rps.min_freq_softlimit =
-                               /* max(RPe, 450 MHz) */
-                               max(dev_priv->rps.efficient_freq, (u8) 9);
+                               max_t(int, dev_priv->rps.efficient_freq,
+                                     intel_freq_opcode(dev_priv, 450));
                else
                        dev_priv->rps.min_freq_softlimit =
                                dev_priv->rps.min_freq;
@@ -4325,8 +4413,13 @@ static void gen9_enable_rc6(struct drm_device *dev)
                                   GEN6_RC_CTL_EI_MODE(1) |
                                   rc6_mask);
 
-       /* 3b: Enable Coarse Power Gating only when RC6 is enabled */
-       I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 3 : 0);
+       /*
+        * 3b: Enable Coarse Power Gating only when RC6 is enabled.
+        * WaDisableRenderPowerGating:skl,bxt - Render PG need to be disabled with RC6.
+        */
+       I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
+                       GEN9_MEDIA_PG_ENABLE : 0);
+
 
        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
@@ -4665,24 +4758,6 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
        return rp1;
 }
 
-static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
-{
-       struct drm_device *dev = dev_priv->dev;
-       u32 val, rpn;
-
-       if (dev->pdev->revision >= 0x20) {
-               val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
-               rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
-                      FB_GFX_FREQ_FUSE_MASK);
-       } else { /* For pre-production hardware */
-               val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
-               rpn = ((val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) &
-                      PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK);
-       }
-
-       return rpn;
-}
-
 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
 {
        u32 val, rp1;
@@ -4934,7 +5009,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
                         intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
                         dev_priv->rps.rp1_freq);
 
-       dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv);
+       /* PUnit validated range is only [RPe, RP0] */
+       dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;
        DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
                         intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
                         dev_priv->rps.min_freq);
@@ -4996,8 +5072,8 @@ static void cherryview_enable_rps(struct drm_device *dev)
                I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
        I915_WRITE(GEN6_RC_SLEEP, 0);
 
-       /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */
-       I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
+       /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
+       I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
 
        /* allows RC6 residency counter to work */
        I915_WRITE(VLV_COUNTER_CONTROL,
@@ -5032,6 +5108,12 @@ static void cherryview_enable_rps(struct drm_device *dev)
                   GEN6_RP_UP_BUSY_AVG |
                   GEN6_RP_DOWN_IDLE_AVG);
 
+       /* Setting Fixed Bias */
+       val = VLV_OVERRIDE_EN |
+                 VLV_SOC_TDP_EN |
+                 CHV_BIAS_CPU_50_SOC_50;
+       vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
+
        val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 
        /* RPS code assumes GPLL is used */
@@ -5116,6 +5198,12 @@ static void valleyview_enable_rps(struct drm_device *dev)
 
        I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
 
+       /* Setting Fixed Bias */
+       val = VLV_OVERRIDE_EN |
+                 VLV_SOC_TDP_EN |
+                 VLV_BIAS_CPU_125_SOC_875;
+       vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
+
        val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 
        /* RPS code assumes GPLL is used */
@@ -5798,13 +5886,15 @@ static void ibx_init_clock_gating(struct drm_device *dev)
 static void g4x_disable_trickle_feed(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       int pipe;
+       enum pipe pipe;
 
        for_each_pipe(dev_priv, pipe) {
                I915_WRITE(DSPCNTR(pipe),
                           I915_READ(DSPCNTR(pipe)) |
                           DISPPLANE_TRICKLE_FEED_DISABLE);
-               intel_flush_primary_plane(dev_priv, pipe);
+
+               I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
+               POSTING_READ(DSPSURF(pipe));
        }
 }
 
@@ -6094,10 +6184,9 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        enum pipe pipe;
+       uint32_t misccpctl;
 
-       I915_WRITE(WM3_LP_ILK, 0);
-       I915_WRITE(WM2_LP_ILK, 0);
-       I915_WRITE(WM1_LP_ILK, 0);
+       ilk_init_lp_watermarks(dev);
 
        /* WaSwitchSolVfFArbitrationPriority:bdw */
        I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
@@ -6126,6 +6215,22 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
                   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
+       /*
+        * WaProgramL3SqcReg1Default:bdw
+        * WaTempDisableDOPClkGating:bdw
+        */
+       misccpctl = I915_READ(GEN7_MISCCPCTL);
+       I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
+       I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
+       I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+
+       /*
+        * WaGttCachingOffByDefault:bdw
+        * GTT cache may not work with big pages, so if those
+        * are ever enabled GTT cache may need to be disabled.
+        */
+       I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
+
        lpt_init_clock_gating(dev);
 }
 
@@ -6401,6 +6506,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
        /* WaDisableSDEUnitClockGating:chv */
        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
                   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+       /*
+        * GTT cache may not work with big pages, so if those
+        * are ever enabled GTT cache may need to be disabled.
+        */
+       I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
 }
 
 static void g4x_init_clock_gating(struct drm_device *dev)
@@ -6544,7 +6655,12 @@ void intel_init_pm(struct drm_device *dev)
        if (INTEL_INFO(dev)->gen >= 9) {
                skl_setup_wm_latency(dev);
 
-               dev_priv->display.init_clock_gating = skl_init_clock_gating;
+               if (IS_BROXTON(dev))
+                       dev_priv->display.init_clock_gating =
+                               bxt_init_clock_gating;
+               else if (IS_SKYLAKE(dev))
+                       dev_priv->display.init_clock_gating =
+                               skl_init_clock_gating;
                dev_priv->display.update_wm = skl_update_wm;
                dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
        } else if (HAS_PCH_SPLIT(dev)) {
@@ -6762,14 +6878,58 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
                return val / GT_FREQUENCY_MULTIPLIER;
 }
 
+struct request_boost {
+       struct work_struct work;
+       struct drm_i915_gem_request *req;
+};
+
+static void __intel_rps_boost_work(struct work_struct *work)
+{
+       struct request_boost *boost = container_of(work, struct request_boost, work);
+       struct drm_i915_gem_request *req = boost->req;
+
+       if (!i915_gem_request_completed(req, true))
+               gen6_rps_boost(to_i915(req->ring->dev), NULL,
+                              req->emitted_jiffies);
+
+       i915_gem_request_unreference__unlocked(req);
+       kfree(boost);
+}
+
+void intel_queue_rps_boost_for_request(struct drm_device *dev,
+                                      struct drm_i915_gem_request *req)
+{
+       struct request_boost *boost;
+
+       if (req == NULL || INTEL_INFO(dev)->gen < 6)
+               return;
+
+       if (i915_gem_request_completed(req, true))
+               return;
+
+       boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
+       if (boost == NULL)
+               return;
+
+       i915_gem_request_reference(req);
+       boost->req = req;
+
+       INIT_WORK(&boost->work, __intel_rps_boost_work);
+       queue_work(to_i915(dev)->wq, &boost->work);
+}
+
 void intel_pm_setup(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
        mutex_init(&dev_priv->rps.hw_lock);
+       spin_lock_init(&dev_priv->rps.client_lock);
 
        INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
                          intel_gen6_powersave_work);
+       INIT_LIST_HEAD(&dev_priv->rps.clients);
+       INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
+       INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
 
        dev_priv->pm.suspended = false;
 }