drm/i915: Use a sentinel to terminate the dbuf slice arrays
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / intel_pm.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27
28 #include <linux/module.h>
29 #include <linux/pm_runtime.h>
30
31 #include <drm/drm_atomic_helper.h>
32 #include <drm/drm_fourcc.h>
33 #include <drm/drm_plane_helper.h>
34
35 #include "display/intel_atomic.h"
36 #include "display/intel_display_types.h"
37 #include "display/intel_fbc.h"
38 #include "display/intel_sprite.h"
39
40 #include "gt/intel_llc.h"
41
42 #include "i915_drv.h"
43 #include "i915_irq.h"
44 #include "i915_trace.h"
45 #include "intel_pm.h"
46 #include "intel_sideband.h"
47 #include "../../../platform/x86/intel_ips.h"
48
49 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
50 {
51         if (HAS_LLC(dev_priv)) {
52                 /*
53                  * WaCompressedResourceDisplayNewHashMode:skl,kbl
54                  * Display WA #0390: skl,kbl
55                  *
56                  * Must match Sampler, Pixel Back End, and Media. See
57                  * WaCompressedResourceSamplerPbeMediaNewHashMode.
58                  */
59                 I915_WRITE(CHICKEN_PAR1_1,
60                            I915_READ(CHICKEN_PAR1_1) |
61                            SKL_DE_COMPRESSED_HASH_MODE);
62         }
63
64         /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
65         I915_WRITE(CHICKEN_PAR1_1,
66                    I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
67
68         /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
69         I915_WRITE(GEN8_CHICKEN_DCPR_1,
70                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
71
72         /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
73         /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
74         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
75                    DISP_FBC_WM_DIS |
76                    DISP_FBC_MEMORY_WAKE);
77
78         /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
79         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
80                    ILK_DPFC_DISABLE_DUMMY0);
81
82         if (IS_SKYLAKE(dev_priv)) {
83                 /* WaDisableDopClockGating */
84                 I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
85                            & ~GEN7_DOP_CLOCK_GATE_ENABLE);
86         }
87 }
88
89 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
90 {
91         gen9_init_clock_gating(dev_priv);
92
93         /* WaDisableSDEUnitClockGating:bxt */
94         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
95                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
96
97         /*
98          * FIXME:
99          * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
100          */
101         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
102                    GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
103
104         /*
105          * Wa: Backlight PWM may stop in the asserted state, causing backlight
106          * to stay fully on.
107          */
108         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
109                    PWM1_GATING_DIS | PWM2_GATING_DIS);
110
111         /*
112          * Lower the display internal timeout.
113          * This is needed to avoid any hard hangs when DSI port PLL
114          * is off and a MMIO access is attempted by any privilege
115          * application, using batch buffers or any other means.
116          */
117         I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950));
118 }
119
120 static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
121 {
122         gen9_init_clock_gating(dev_priv);
123
124         /*
125          * WaDisablePWMClockGating:glk
126          * Backlight PWM may stop in the asserted state, causing backlight
127          * to stay fully on.
128          */
129         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
130                    PWM1_GATING_DIS | PWM2_GATING_DIS);
131 }
132
133 static void pnv_get_mem_freq(struct drm_i915_private *dev_priv)
134 {
135         u32 tmp;
136
137         tmp = I915_READ(CLKCFG);
138
139         switch (tmp & CLKCFG_FSB_MASK) {
140         case CLKCFG_FSB_533:
141                 dev_priv->fsb_freq = 533; /* 133*4 */
142                 break;
143         case CLKCFG_FSB_800:
144                 dev_priv->fsb_freq = 800; /* 200*4 */
145                 break;
146         case CLKCFG_FSB_667:
147                 dev_priv->fsb_freq =  667; /* 167*4 */
148                 break;
149         case CLKCFG_FSB_400:
150                 dev_priv->fsb_freq = 400; /* 100*4 */
151                 break;
152         }
153
154         switch (tmp & CLKCFG_MEM_MASK) {
155         case CLKCFG_MEM_533:
156                 dev_priv->mem_freq = 533;
157                 break;
158         case CLKCFG_MEM_667:
159                 dev_priv->mem_freq = 667;
160                 break;
161         case CLKCFG_MEM_800:
162                 dev_priv->mem_freq = 800;
163                 break;
164         }
165
166         /* detect pineview DDR3 setting */
167         tmp = I915_READ(CSHRDDR3CTL);
168         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
169 }
170
171 static void ilk_get_mem_freq(struct drm_i915_private *dev_priv)
172 {
173         u16 ddrpll, csipll;
174
175         ddrpll = intel_uncore_read16(&dev_priv->uncore, DDRMPLL1);
176         csipll = intel_uncore_read16(&dev_priv->uncore, CSIPLL0);
177
178         switch (ddrpll & 0xff) {
179         case 0xc:
180                 dev_priv->mem_freq = 800;
181                 break;
182         case 0x10:
183                 dev_priv->mem_freq = 1066;
184                 break;
185         case 0x14:
186                 dev_priv->mem_freq = 1333;
187                 break;
188         case 0x18:
189                 dev_priv->mem_freq = 1600;
190                 break;
191         default:
192                 drm_dbg(&dev_priv->drm, "unknown memory frequency 0x%02x\n",
193                         ddrpll & 0xff);
194                 dev_priv->mem_freq = 0;
195                 break;
196         }
197
198         switch (csipll & 0x3ff) {
199         case 0x00c:
200                 dev_priv->fsb_freq = 3200;
201                 break;
202         case 0x00e:
203                 dev_priv->fsb_freq = 3733;
204                 break;
205         case 0x010:
206                 dev_priv->fsb_freq = 4266;
207                 break;
208         case 0x012:
209                 dev_priv->fsb_freq = 4800;
210                 break;
211         case 0x014:
212                 dev_priv->fsb_freq = 5333;
213                 break;
214         case 0x016:
215                 dev_priv->fsb_freq = 5866;
216                 break;
217         case 0x018:
218                 dev_priv->fsb_freq = 6400;
219                 break;
220         default:
221                 drm_dbg(&dev_priv->drm, "unknown fsb frequency 0x%04x\n",
222                         csipll & 0x3ff);
223                 dev_priv->fsb_freq = 0;
224                 break;
225         }
226 }
227
228 static const struct cxsr_latency cxsr_latency_table[] = {
229         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
230         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
231         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
232         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
233         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
234
235         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
236         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
237         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
238         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
239         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
240
241         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
242         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
243         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
244         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
245         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
246
247         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
248         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
249         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
250         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
251         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
252
253         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
254         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
255         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
256         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
257         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
258
259         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
260         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
261         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
262         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
263         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
264 };
265
266 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
267                                                          bool is_ddr3,
268                                                          int fsb,
269                                                          int mem)
270 {
271         const struct cxsr_latency *latency;
272         int i;
273
274         if (fsb == 0 || mem == 0)
275                 return NULL;
276
277         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
278                 latency = &cxsr_latency_table[i];
279                 if (is_desktop == latency->is_desktop &&
280                     is_ddr3 == latency->is_ddr3 &&
281                     fsb == latency->fsb_freq && mem == latency->mem_freq)
282                         return latency;
283         }
284
285         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
286
287         return NULL;
288 }
289
290 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
291 {
292         u32 val;
293
294         vlv_punit_get(dev_priv);
295
296         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
297         if (enable)
298                 val &= ~FORCE_DDR_HIGH_FREQ;
299         else
300                 val |= FORCE_DDR_HIGH_FREQ;
301         val &= ~FORCE_DDR_LOW_FREQ;
302         val |= FORCE_DDR_FREQ_REQ_ACK;
303         vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
304
305         if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
306                       FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
307                 drm_err(&dev_priv->drm,
308                         "timed out waiting for Punit DDR DVFS request\n");
309
310         vlv_punit_put(dev_priv);
311 }
312
313 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
314 {
315         u32 val;
316
317         vlv_punit_get(dev_priv);
318
319         val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
320         if (enable)
321                 val |= DSP_MAXFIFO_PM5_ENABLE;
322         else
323                 val &= ~DSP_MAXFIFO_PM5_ENABLE;
324         vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, val);
325
326         vlv_punit_put(dev_priv);
327 }
328
329 #define FW_WM(value, plane) \
330         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
331
332 static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
333 {
334         bool was_enabled;
335         u32 val;
336
337         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
338                 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
339                 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
340                 POSTING_READ(FW_BLC_SELF_VLV);
341         } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
342                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
343                 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
344                 POSTING_READ(FW_BLC_SELF);
345         } else if (IS_PINEVIEW(dev_priv)) {
346                 val = I915_READ(DSPFW3);
347                 was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
348                 if (enable)
349                         val |= PINEVIEW_SELF_REFRESH_EN;
350                 else
351                         val &= ~PINEVIEW_SELF_REFRESH_EN;
352                 I915_WRITE(DSPFW3, val);
353                 POSTING_READ(DSPFW3);
354         } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
355                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
356                 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
357                                _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
358                 I915_WRITE(FW_BLC_SELF, val);
359                 POSTING_READ(FW_BLC_SELF);
360         } else if (IS_I915GM(dev_priv)) {
361                 /*
362                  * FIXME can't find a bit like this for 915G, and
363                  * and yet it does have the related watermark in
364                  * FW_BLC_SELF. What's going on?
365                  */
366                 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
367                 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
368                                _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
369                 I915_WRITE(INSTPM, val);
370                 POSTING_READ(INSTPM);
371         } else {
372                 return false;
373         }
374
375         trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
376
377         drm_dbg_kms(&dev_priv->drm, "memory self-refresh is %s (was %s)\n",
378                     enableddisabled(enable),
379                     enableddisabled(was_enabled));
380
381         return was_enabled;
382 }
383
384 /**
385  * intel_set_memory_cxsr - Configure CxSR state
386  * @dev_priv: i915 device
387  * @enable: Allow vs. disallow CxSR
388  *
389  * Allow or disallow the system to enter a special CxSR
390  * (C-state self refresh) state. What typically happens in CxSR mode
391  * is that several display FIFOs may get combined into a single larger
392  * FIFO for a particular plane (so called max FIFO mode) to allow the
393  * system to defer memory fetches longer, and the memory will enter
394  * self refresh.
395  *
396  * Note that enabling CxSR does not guarantee that the system enter
397  * this special mode, nor does it guarantee that the system stays
398  * in that mode once entered. So this just allows/disallows the system
399  * to autonomously utilize the CxSR mode. Other factors such as core
400  * C-states will affect when/if the system actually enters/exits the
401  * CxSR mode.
402  *
403  * Note that on VLV/CHV this actually only controls the max FIFO mode,
404  * and the system is free to enter/exit memory self refresh at any time
405  * even when the use of CxSR has been disallowed.
406  *
407  * While the system is actually in the CxSR/max FIFO mode, some plane
408  * control registers will not get latched on vblank. Thus in order to
409  * guarantee the system will respond to changes in the plane registers
410  * we must always disallow CxSR prior to making changes to those registers.
411  * Unfortunately the system will re-evaluate the CxSR conditions at
412  * frame start which happens after vblank start (which is when the plane
413  * registers would get latched), so we can't proceed with the plane update
414  * during the same frame where we disallowed CxSR.
415  *
416  * Certain platforms also have a deeper HPLL SR mode. Fortunately the
417  * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
418  * the hardware w.r.t. HPLL SR when writing to plane registers.
419  * Disallowing just CxSR is sufficient.
420  */
421 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
422 {
423         bool ret;
424
425         mutex_lock(&dev_priv->wm.wm_mutex);
426         ret = _intel_set_memory_cxsr(dev_priv, enable);
427         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
428                 dev_priv->wm.vlv.cxsr = enable;
429         else if (IS_G4X(dev_priv))
430                 dev_priv->wm.g4x.cxsr = enable;
431         mutex_unlock(&dev_priv->wm.wm_mutex);
432
433         return ret;
434 }
435
436 /*
437  * Latency for FIFO fetches is dependent on several factors:
438  *   - memory configuration (speed, channels)
439  *   - chipset
440  *   - current MCH state
441  * It can be fairly high in some situations, so here we assume a fairly
442  * pessimal value.  It's a tradeoff between extra memory fetches (if we
443  * set this value too high, the FIFO will fetch frequently to stay full)
444  * and power consumption (set it too low to save power and we might see
445  * FIFO underruns and display "flicker").
446  *
447  * A value of 5us seems to be a good balance; safe for very low end
448  * platforms but not overly aggressive on lower latency configs.
449  */
450 static const int pessimal_latency_ns = 5000;
451
452 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
453         ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
454
455 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
456 {
457         struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
458         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
459         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
460         enum pipe pipe = crtc->pipe;
461         int sprite0_start, sprite1_start;
462         u32 dsparb, dsparb2, dsparb3;
463
464         switch (pipe) {
465         case PIPE_A:
466                 dsparb = I915_READ(DSPARB);
467                 dsparb2 = I915_READ(DSPARB2);
468                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
469                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
470                 break;
471         case PIPE_B:
472                 dsparb = I915_READ(DSPARB);
473                 dsparb2 = I915_READ(DSPARB2);
474                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
475                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
476                 break;
477         case PIPE_C:
478                 dsparb2 = I915_READ(DSPARB2);
479                 dsparb3 = I915_READ(DSPARB3);
480                 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
481                 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
482                 break;
483         default:
484                 MISSING_CASE(pipe);
485                 return;
486         }
487
488         fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
489         fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
490         fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
491         fifo_state->plane[PLANE_CURSOR] = 63;
492 }
493
494 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
495                               enum i9xx_plane_id i9xx_plane)
496 {
497         u32 dsparb = I915_READ(DSPARB);
498         int size;
499
500         size = dsparb & 0x7f;
501         if (i9xx_plane == PLANE_B)
502                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
503
504         drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n",
505                     dsparb, plane_name(i9xx_plane), size);
506
507         return size;
508 }
509
510 static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
511                               enum i9xx_plane_id i9xx_plane)
512 {
513         u32 dsparb = I915_READ(DSPARB);
514         int size;
515
516         size = dsparb & 0x1ff;
517         if (i9xx_plane == PLANE_B)
518                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
519         size >>= 1; /* Convert to cachelines */
520
521         drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n",
522                     dsparb, plane_name(i9xx_plane), size);
523
524         return size;
525 }
526
527 static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
528                               enum i9xx_plane_id i9xx_plane)
529 {
530         u32 dsparb = I915_READ(DSPARB);
531         int size;
532
533         size = dsparb & 0x7f;
534         size >>= 2; /* Convert to cachelines */
535
536         drm_dbg_kms(&dev_priv->drm, "FIFO size - (0x%08x) %c: %d\n",
537                     dsparb, plane_name(i9xx_plane), size);
538
539         return size;
540 }
541
542 /* Pineview has different values for various configs */
543 static const struct intel_watermark_params pnv_display_wm = {
544         .fifo_size = PINEVIEW_DISPLAY_FIFO,
545         .max_wm = PINEVIEW_MAX_WM,
546         .default_wm = PINEVIEW_DFT_WM,
547         .guard_size = PINEVIEW_GUARD_WM,
548         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
549 };
550
551 static const struct intel_watermark_params pnv_display_hplloff_wm = {
552         .fifo_size = PINEVIEW_DISPLAY_FIFO,
553         .max_wm = PINEVIEW_MAX_WM,
554         .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
555         .guard_size = PINEVIEW_GUARD_WM,
556         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
557 };
558
559 static const struct intel_watermark_params pnv_cursor_wm = {
560         .fifo_size = PINEVIEW_CURSOR_FIFO,
561         .max_wm = PINEVIEW_CURSOR_MAX_WM,
562         .default_wm = PINEVIEW_CURSOR_DFT_WM,
563         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
564         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
565 };
566
567 static const struct intel_watermark_params pnv_cursor_hplloff_wm = {
568         .fifo_size = PINEVIEW_CURSOR_FIFO,
569         .max_wm = PINEVIEW_CURSOR_MAX_WM,
570         .default_wm = PINEVIEW_CURSOR_DFT_WM,
571         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
572         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
573 };
574
575 static const struct intel_watermark_params i965_cursor_wm_info = {
576         .fifo_size = I965_CURSOR_FIFO,
577         .max_wm = I965_CURSOR_MAX_WM,
578         .default_wm = I965_CURSOR_DFT_WM,
579         .guard_size = 2,
580         .cacheline_size = I915_FIFO_LINE_SIZE,
581 };
582
583 static const struct intel_watermark_params i945_wm_info = {
584         .fifo_size = I945_FIFO_SIZE,
585         .max_wm = I915_MAX_WM,
586         .default_wm = 1,
587         .guard_size = 2,
588         .cacheline_size = I915_FIFO_LINE_SIZE,
589 };
590
591 static const struct intel_watermark_params i915_wm_info = {
592         .fifo_size = I915_FIFO_SIZE,
593         .max_wm = I915_MAX_WM,
594         .default_wm = 1,
595         .guard_size = 2,
596         .cacheline_size = I915_FIFO_LINE_SIZE,
597 };
598
599 static const struct intel_watermark_params i830_a_wm_info = {
600         .fifo_size = I855GM_FIFO_SIZE,
601         .max_wm = I915_MAX_WM,
602         .default_wm = 1,
603         .guard_size = 2,
604         .cacheline_size = I830_FIFO_LINE_SIZE,
605 };
606
607 static const struct intel_watermark_params i830_bc_wm_info = {
608         .fifo_size = I855GM_FIFO_SIZE,
609         .max_wm = I915_MAX_WM/2,
610         .default_wm = 1,
611         .guard_size = 2,
612         .cacheline_size = I830_FIFO_LINE_SIZE,
613 };
614
615 static const struct intel_watermark_params i845_wm_info = {
616         .fifo_size = I830_FIFO_SIZE,
617         .max_wm = I915_MAX_WM,
618         .default_wm = 1,
619         .guard_size = 2,
620         .cacheline_size = I830_FIFO_LINE_SIZE,
621 };
622
623 /**
624  * intel_wm_method1 - Method 1 / "small buffer" watermark formula
625  * @pixel_rate: Pipe pixel rate in kHz
626  * @cpp: Plane bytes per pixel
627  * @latency: Memory wakeup latency in 0.1us units
628  *
629  * Compute the watermark using the method 1 or "small buffer"
630  * formula. The caller may additonally add extra cachelines
631  * to account for TLB misses and clock crossings.
632  *
633  * This method is concerned with the short term drain rate
634  * of the FIFO, ie. it does not account for blanking periods
635  * which would effectively reduce the average drain rate across
636  * a longer period. The name "small" refers to the fact the
637  * FIFO is relatively small compared to the amount of data
638  * fetched.
639  *
640  * The FIFO level vs. time graph might look something like:
641  *
642  *   |\   |\
643  *   | \  | \
644  * __---__---__ (- plane active, _ blanking)
645  * -> time
646  *
647  * or perhaps like this:
648  *
649  *   |\|\  |\|\
650  * __----__----__ (- plane active, _ blanking)
651  * -> time
652  *
653  * Returns:
654  * The watermark in bytes
655  */
656 static unsigned int intel_wm_method1(unsigned int pixel_rate,
657                                      unsigned int cpp,
658                                      unsigned int latency)
659 {
660         u64 ret;
661
662         ret = mul_u32_u32(pixel_rate, cpp * latency);
663         ret = DIV_ROUND_UP_ULL(ret, 10000);
664
665         return ret;
666 }
667
668 /**
669  * intel_wm_method2 - Method 2 / "large buffer" watermark formula
670  * @pixel_rate: Pipe pixel rate in kHz
671  * @htotal: Pipe horizontal total
672  * @width: Plane width in pixels
673  * @cpp: Plane bytes per pixel
674  * @latency: Memory wakeup latency in 0.1us units
675  *
676  * Compute the watermark using the method 2 or "large buffer"
677  * formula. The caller may additonally add extra cachelines
678  * to account for TLB misses and clock crossings.
679  *
680  * This method is concerned with the long term drain rate
681  * of the FIFO, ie. it does account for blanking periods
682  * which effectively reduce the average drain rate across
683  * a longer period. The name "large" refers to the fact the
684  * FIFO is relatively large compared to the amount of data
685  * fetched.
686  *
687  * The FIFO level vs. time graph might look something like:
688  *
689  *    |\___       |\___
690  *    |    \___   |    \___
691  *    |        \  |        \
692  * __ --__--__--__--__--__--__ (- plane active, _ blanking)
693  * -> time
694  *
695  * Returns:
696  * The watermark in bytes
697  */
698 static unsigned int intel_wm_method2(unsigned int pixel_rate,
699                                      unsigned int htotal,
700                                      unsigned int width,
701                                      unsigned int cpp,
702                                      unsigned int latency)
703 {
704         unsigned int ret;
705
706         /*
707          * FIXME remove once all users are computing
708          * watermarks in the correct place.
709          */
710         if (WARN_ON_ONCE(htotal == 0))
711                 htotal = 1;
712
713         ret = (latency * pixel_rate) / (htotal * 10000);
714         ret = (ret + 1) * width * cpp;
715
716         return ret;
717 }
718
719 /**
720  * intel_calculate_wm - calculate watermark level
721  * @pixel_rate: pixel clock
722  * @wm: chip FIFO params
723  * @fifo_size: size of the FIFO buffer
724  * @cpp: bytes per pixel
725  * @latency_ns: memory latency for the platform
726  *
727  * Calculate the watermark level (the level at which the display plane will
728  * start fetching from memory again).  Each chip has a different display
729  * FIFO size and allocation, so the caller needs to figure that out and pass
730  * in the correct intel_watermark_params structure.
731  *
732  * As the pixel clock runs, the FIFO will be drained at a rate that depends
733  * on the pixel size.  When it reaches the watermark level, it'll start
734  * fetching FIFO line sized based chunks from memory until the FIFO fills
735  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
736  * will occur, and a display engine hang could result.
737  */
738 static unsigned int intel_calculate_wm(int pixel_rate,
739                                        const struct intel_watermark_params *wm,
740                                        int fifo_size, int cpp,
741                                        unsigned int latency_ns)
742 {
743         int entries, wm_size;
744
745         /*
746          * Note: we need to make sure we don't overflow for various clock &
747          * latency values.
748          * clocks go from a few thousand to several hundred thousand.
749          * latency is usually a few thousand
750          */
751         entries = intel_wm_method1(pixel_rate, cpp,
752                                    latency_ns / 100);
753         entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
754                 wm->guard_size;
755         DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
756
757         wm_size = fifo_size - entries;
758         DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
759
760         /* Don't promote wm_size to unsigned... */
761         if (wm_size > wm->max_wm)
762                 wm_size = wm->max_wm;
763         if (wm_size <= 0)
764                 wm_size = wm->default_wm;
765
766         /*
767          * Bspec seems to indicate that the value shouldn't be lower than
768          * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
769          * Lets go for 8 which is the burst size since certain platforms
770          * already use a hardcoded 8 (which is what the spec says should be
771          * done).
772          */
773         if (wm_size <= 8)
774                 wm_size = 8;
775
776         return wm_size;
777 }
778
779 static bool is_disabling(int old, int new, int threshold)
780 {
781         return old >= threshold && new < threshold;
782 }
783
784 static bool is_enabling(int old, int new, int threshold)
785 {
786         return old < threshold && new >= threshold;
787 }
788
789 static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
790 {
791         return dev_priv->wm.max_level + 1;
792 }
793
794 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
795                                    const struct intel_plane_state *plane_state)
796 {
797         struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
798
799         /* FIXME check the 'enable' instead */
800         if (!crtc_state->hw.active)
801                 return false;
802
803         /*
804          * Treat cursor with fb as always visible since cursor updates
805          * can happen faster than the vrefresh rate, and the current
806          * watermark code doesn't handle that correctly. Cursor updates
807          * which set/clear the fb or change the cursor size are going
808          * to get throttled by intel_legacy_cursor_update() to work
809          * around this problem with the watermark code.
810          */
811         if (plane->id == PLANE_CURSOR)
812                 return plane_state->hw.fb != NULL;
813         else
814                 return plane_state->uapi.visible;
815 }
816
817 static bool intel_crtc_active(struct intel_crtc *crtc)
818 {
819         /* Be paranoid as we can arrive here with only partial
820          * state retrieved from the hardware during setup.
821          *
822          * We can ditch the adjusted_mode.crtc_clock check as soon
823          * as Haswell has gained clock readout/fastboot support.
824          *
825          * We can ditch the crtc->primary->state->fb check as soon as we can
826          * properly reconstruct framebuffers.
827          *
828          * FIXME: The intel_crtc->active here should be switched to
829          * crtc->state->active once we have proper CRTC states wired up
830          * for atomic.
831          */
832         return crtc->active && crtc->base.primary->state->fb &&
833                 crtc->config->hw.adjusted_mode.crtc_clock;
834 }
835
836 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
837 {
838         struct intel_crtc *crtc, *enabled = NULL;
839
840         for_each_intel_crtc(&dev_priv->drm, crtc) {
841                 if (intel_crtc_active(crtc)) {
842                         if (enabled)
843                                 return NULL;
844                         enabled = crtc;
845                 }
846         }
847
848         return enabled;
849 }
850
851 static void pnv_update_wm(struct intel_crtc *unused_crtc)
852 {
853         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
854         struct intel_crtc *crtc;
855         const struct cxsr_latency *latency;
856         u32 reg;
857         unsigned int wm;
858
859         latency = intel_get_cxsr_latency(!IS_MOBILE(dev_priv),
860                                          dev_priv->is_ddr3,
861                                          dev_priv->fsb_freq,
862                                          dev_priv->mem_freq);
863         if (!latency) {
864                 drm_dbg_kms(&dev_priv->drm,
865                             "Unknown FSB/MEM found, disable CxSR\n");
866                 intel_set_memory_cxsr(dev_priv, false);
867                 return;
868         }
869
870         crtc = single_enabled_crtc(dev_priv);
871         if (crtc) {
872                 const struct drm_display_mode *adjusted_mode =
873                         &crtc->config->hw.adjusted_mode;
874                 const struct drm_framebuffer *fb =
875                         crtc->base.primary->state->fb;
876                 int cpp = fb->format->cpp[0];
877                 int clock = adjusted_mode->crtc_clock;
878
879                 /* Display SR */
880                 wm = intel_calculate_wm(clock, &pnv_display_wm,
881                                         pnv_display_wm.fifo_size,
882                                         cpp, latency->display_sr);
883                 reg = I915_READ(DSPFW1);
884                 reg &= ~DSPFW_SR_MASK;
885                 reg |= FW_WM(wm, SR);
886                 I915_WRITE(DSPFW1, reg);
887                 drm_dbg_kms(&dev_priv->drm, "DSPFW1 register is %x\n", reg);
888
889                 /* cursor SR */
890                 wm = intel_calculate_wm(clock, &pnv_cursor_wm,
891                                         pnv_display_wm.fifo_size,
892                                         4, latency->cursor_sr);
893                 reg = I915_READ(DSPFW3);
894                 reg &= ~DSPFW_CURSOR_SR_MASK;
895                 reg |= FW_WM(wm, CURSOR_SR);
896                 I915_WRITE(DSPFW3, reg);
897
898                 /* Display HPLL off SR */
899                 wm = intel_calculate_wm(clock, &pnv_display_hplloff_wm,
900                                         pnv_display_hplloff_wm.fifo_size,
901                                         cpp, latency->display_hpll_disable);
902                 reg = I915_READ(DSPFW3);
903                 reg &= ~DSPFW_HPLL_SR_MASK;
904                 reg |= FW_WM(wm, HPLL_SR);
905                 I915_WRITE(DSPFW3, reg);
906
907                 /* cursor HPLL off SR */
908                 wm = intel_calculate_wm(clock, &pnv_cursor_hplloff_wm,
909                                         pnv_display_hplloff_wm.fifo_size,
910                                         4, latency->cursor_hpll_disable);
911                 reg = I915_READ(DSPFW3);
912                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
913                 reg |= FW_WM(wm, HPLL_CURSOR);
914                 I915_WRITE(DSPFW3, reg);
915                 drm_dbg_kms(&dev_priv->drm, "DSPFW3 register is %x\n", reg);
916
917                 intel_set_memory_cxsr(dev_priv, true);
918         } else {
919                 intel_set_memory_cxsr(dev_priv, false);
920         }
921 }
922
923 /*
924  * Documentation says:
925  * "If the line size is small, the TLB fetches can get in the way of the
926  *  data fetches, causing some lag in the pixel data return which is not
927  *  accounted for in the above formulas. The following adjustment only
928  *  needs to be applied if eight whole lines fit in the buffer at once.
929  *  The WM is adjusted upwards by the difference between the FIFO size
930  *  and the size of 8 whole lines. This adjustment is always performed
931  *  in the actual pixel depth regardless of whether FBC is enabled or not."
932  */
933 static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
934 {
935         int tlb_miss = fifo_size * 64 - width * cpp * 8;
936
937         return max(0, tlb_miss);
938 }
939
940 static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
941                                 const struct g4x_wm_values *wm)
942 {
943         enum pipe pipe;
944
945         for_each_pipe(dev_priv, pipe)
946                 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
947
948         I915_WRITE(DSPFW1,
949                    FW_WM(wm->sr.plane, SR) |
950                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
951                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
952                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
953         I915_WRITE(DSPFW2,
954                    (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
955                    FW_WM(wm->sr.fbc, FBC_SR) |
956                    FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
957                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
958                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
959                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
960         I915_WRITE(DSPFW3,
961                    (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
962                    FW_WM(wm->sr.cursor, CURSOR_SR) |
963                    FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
964                    FW_WM(wm->hpll.plane, HPLL_SR));
965
966         POSTING_READ(DSPFW1);
967 }
968
969 #define FW_WM_VLV(value, plane) \
970         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
971
972 static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
973                                 const struct vlv_wm_values *wm)
974 {
975         enum pipe pipe;
976
977         for_each_pipe(dev_priv, pipe) {
978                 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
979
980                 I915_WRITE(VLV_DDL(pipe),
981                            (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
982                            (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
983                            (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
984                            (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
985         }
986
987         /*
988          * Zero the (unused) WM1 watermarks, and also clear all the
989          * high order bits so that there are no out of bounds values
990          * present in the registers during the reprogramming.
991          */
992         I915_WRITE(DSPHOWM, 0);
993         I915_WRITE(DSPHOWM1, 0);
994         I915_WRITE(DSPFW4, 0);
995         I915_WRITE(DSPFW5, 0);
996         I915_WRITE(DSPFW6, 0);
997
998         I915_WRITE(DSPFW1,
999                    FW_WM(wm->sr.plane, SR) |
1000                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
1001                    FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
1002                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
1003         I915_WRITE(DSPFW2,
1004                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
1005                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
1006                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
1007         I915_WRITE(DSPFW3,
1008                    FW_WM(wm->sr.cursor, CURSOR_SR));
1009
1010         if (IS_CHERRYVIEW(dev_priv)) {
1011                 I915_WRITE(DSPFW7_CHV,
1012                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1013                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1014                 I915_WRITE(DSPFW8_CHV,
1015                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1016                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
1017                 I915_WRITE(DSPFW9_CHV,
1018                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1019                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
1020                 I915_WRITE(DSPHOWM,
1021                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1022                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1023                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1024                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1025                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1026                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1027                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1028                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1029                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1030                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1031         } else {
1032                 I915_WRITE(DSPFW7,
1033                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1034                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1035                 I915_WRITE(DSPHOWM,
1036                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1037                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1038                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1039                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1040                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1041                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1042                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1043         }
1044
1045         POSTING_READ(DSPFW1);
1046 }
1047
1048 #undef FW_WM_VLV
1049
1050 static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1051 {
1052         /* all latencies in usec */
1053         dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1054         dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
1055         dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
1056
1057         dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
1058 }
1059
1060 static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1061 {
1062         /*
1063          * DSPCNTR[13] supposedly controls whether the
1064          * primary plane can use the FIFO space otherwise
1065          * reserved for the sprite plane. It's not 100% clear
1066          * what the actual FIFO size is, but it looks like we
1067          * can happily set both primary and sprite watermarks
1068          * up to 127 cachelines. So that would seem to mean
1069          * that either DSPCNTR[13] doesn't do anything, or that
1070          * the total FIFO is >= 256 cachelines in size. Either
1071          * way, we don't seem to have to worry about this
1072          * repartitioning as the maximum watermark value the
1073          * register can hold for each plane is lower than the
1074          * minimum FIFO size.
1075          */
1076         switch (plane_id) {
1077         case PLANE_CURSOR:
1078                 return 63;
1079         case PLANE_PRIMARY:
1080                 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1081         case PLANE_SPRITE0:
1082                 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1083         default:
1084                 MISSING_CASE(plane_id);
1085                 return 0;
1086         }
1087 }
1088
1089 static int g4x_fbc_fifo_size(int level)
1090 {
1091         switch (level) {
1092         case G4X_WM_LEVEL_SR:
1093                 return 7;
1094         case G4X_WM_LEVEL_HPLL:
1095                 return 15;
1096         default:
1097                 MISSING_CASE(level);
1098                 return 0;
1099         }
1100 }
1101
1102 static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1103                           const struct intel_plane_state *plane_state,
1104                           int level)
1105 {
1106         struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
1107         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1108         const struct drm_display_mode *adjusted_mode =
1109                 &crtc_state->hw.adjusted_mode;
1110         unsigned int latency = dev_priv->wm.pri_latency[level] * 10;
1111         unsigned int clock, htotal, cpp, width, wm;
1112
1113         if (latency == 0)
1114                 return USHRT_MAX;
1115
1116         if (!intel_wm_plane_visible(crtc_state, plane_state))
1117                 return 0;
1118
1119         cpp = plane_state->hw.fb->format->cpp[0];
1120
1121         /*
1122          * Not 100% sure which way ELK should go here as the
1123          * spec only says CL/CTG should assume 32bpp and BW
1124          * doesn't need to. But as these things followed the
1125          * mobile vs. desktop lines on gen3 as well, let's
1126          * assume ELK doesn't need this.
1127          *
1128          * The spec also fails to list such a restriction for
1129          * the HPLL watermark, which seems a little strange.
1130          * Let's use 32bpp for the HPLL watermark as well.
1131          */
1132         if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1133             level != G4X_WM_LEVEL_NORMAL)
1134                 cpp = max(cpp, 4u);
1135
1136         clock = adjusted_mode->crtc_clock;
1137         htotal = adjusted_mode->crtc_htotal;
1138
1139         width = drm_rect_width(&plane_state->uapi.dst);
1140
1141         if (plane->id == PLANE_CURSOR) {
1142                 wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1143         } else if (plane->id == PLANE_PRIMARY &&
1144                    level == G4X_WM_LEVEL_NORMAL) {
1145                 wm = intel_wm_method1(clock, cpp, latency);
1146         } else {
1147                 unsigned int small, large;
1148
1149                 small = intel_wm_method1(clock, cpp, latency);
1150                 large = intel_wm_method2(clock, htotal, width, cpp, latency);
1151
1152                 wm = min(small, large);
1153         }
1154
1155         wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1156                               width, cpp);
1157
1158         wm = DIV_ROUND_UP(wm, 64) + 2;
1159
1160         return min_t(unsigned int, wm, USHRT_MAX);
1161 }
1162
1163 static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1164                                  int level, enum plane_id plane_id, u16 value)
1165 {
1166         struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
1167         bool dirty = false;
1168
1169         for (; level < intel_wm_num_levels(dev_priv); level++) {
1170                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1171
1172                 dirty |= raw->plane[plane_id] != value;
1173                 raw->plane[plane_id] = value;
1174         }
1175
1176         return dirty;
1177 }
1178
1179 static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1180                                int level, u16 value)
1181 {
1182         struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
1183         bool dirty = false;
1184
1185         /* NORMAL level doesn't have an FBC watermark */
1186         level = max(level, G4X_WM_LEVEL_SR);
1187
1188         for (; level < intel_wm_num_levels(dev_priv); level++) {
1189                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1190
1191                 dirty |= raw->fbc != value;
1192                 raw->fbc = value;
1193         }
1194
1195         return dirty;
1196 }
1197
1198 static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *crtc_state,
1199                               const struct intel_plane_state *plane_state,
1200                               u32 pri_val);
1201
1202 static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1203                                      const struct intel_plane_state *plane_state)
1204 {
1205         struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
1206         struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
1207         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1208         enum plane_id plane_id = plane->id;
1209         bool dirty = false;
1210         int level;
1211
1212         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1213                 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1214                 if (plane_id == PLANE_PRIMARY)
1215                         dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1216                 goto out;
1217         }
1218
1219         for (level = 0; level < num_levels; level++) {
1220                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1221                 int wm, max_wm;
1222
1223                 wm = g4x_compute_wm(crtc_state, plane_state, level);
1224                 max_wm = g4x_plane_fifo_size(plane_id, level);
1225
1226                 if (wm > max_wm)
1227                         break;
1228
1229                 dirty |= raw->plane[plane_id] != wm;
1230                 raw->plane[plane_id] = wm;
1231
1232                 if (plane_id != PLANE_PRIMARY ||
1233                     level == G4X_WM_LEVEL_NORMAL)
1234                         continue;
1235
1236                 wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1237                                         raw->plane[plane_id]);
1238                 max_wm = g4x_fbc_fifo_size(level);
1239
1240                 /*
1241                  * FBC wm is not mandatory as we
1242                  * can always just disable its use.
1243                  */
1244                 if (wm > max_wm)
1245                         wm = USHRT_MAX;
1246
1247                 dirty |= raw->fbc != wm;
1248                 raw->fbc = wm;
1249         }
1250
1251         /* mark watermarks as invalid */
1252         dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1253
1254         if (plane_id == PLANE_PRIMARY)
1255                 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1256
1257  out:
1258         if (dirty) {
1259                 drm_dbg_kms(&dev_priv->drm,
1260                             "%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1261                             plane->base.name,
1262                             crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1263                             crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1264                             crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1265
1266                 if (plane_id == PLANE_PRIMARY)
1267                         drm_dbg_kms(&dev_priv->drm,
1268                                     "FBC watermarks: SR=%d, HPLL=%d\n",
1269                                     crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1270                                     crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1271         }
1272
1273         return dirty;
1274 }
1275
1276 static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1277                                       enum plane_id plane_id, int level)
1278 {
1279         const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1280
1281         return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1282 }
1283
1284 static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1285                                      int level)
1286 {
1287         struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
1288
1289         if (level > dev_priv->wm.max_level)
1290                 return false;
1291
1292         return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1293                 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1294                 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1295 }
1296
1297 /* mark all levels starting from 'level' as invalid */
1298 static void g4x_invalidate_wms(struct intel_crtc *crtc,
1299                                struct g4x_wm_state *wm_state, int level)
1300 {
1301         if (level <= G4X_WM_LEVEL_NORMAL) {
1302                 enum plane_id plane_id;
1303
1304                 for_each_plane_id_on_crtc(crtc, plane_id)
1305                         wm_state->wm.plane[plane_id] = USHRT_MAX;
1306         }
1307
1308         if (level <= G4X_WM_LEVEL_SR) {
1309                 wm_state->cxsr = false;
1310                 wm_state->sr.cursor = USHRT_MAX;
1311                 wm_state->sr.plane = USHRT_MAX;
1312                 wm_state->sr.fbc = USHRT_MAX;
1313         }
1314
1315         if (level <= G4X_WM_LEVEL_HPLL) {
1316                 wm_state->hpll_en = false;
1317                 wm_state->hpll.cursor = USHRT_MAX;
1318                 wm_state->hpll.plane = USHRT_MAX;
1319                 wm_state->hpll.fbc = USHRT_MAX;
1320         }
1321 }
1322
1323 static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1324 {
1325         struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
1326         struct intel_atomic_state *state =
1327                 to_intel_atomic_state(crtc_state->uapi.state);
1328         struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1329         int num_active_planes = hweight8(crtc_state->active_planes &
1330                                          ~BIT(PLANE_CURSOR));
1331         const struct g4x_pipe_wm *raw;
1332         const struct intel_plane_state *old_plane_state;
1333         const struct intel_plane_state *new_plane_state;
1334         struct intel_plane *plane;
1335         enum plane_id plane_id;
1336         int i, level;
1337         unsigned int dirty = 0;
1338
1339         for_each_oldnew_intel_plane_in_state(state, plane,
1340                                              old_plane_state,
1341                                              new_plane_state, i) {
1342                 if (new_plane_state->hw.crtc != &crtc->base &&
1343                     old_plane_state->hw.crtc != &crtc->base)
1344                         continue;
1345
1346                 if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
1347                         dirty |= BIT(plane->id);
1348         }
1349
1350         if (!dirty)
1351                 return 0;
1352
1353         level = G4X_WM_LEVEL_NORMAL;
1354         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1355                 goto out;
1356
1357         raw = &crtc_state->wm.g4x.raw[level];
1358         for_each_plane_id_on_crtc(crtc, plane_id)
1359                 wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1360
1361         level = G4X_WM_LEVEL_SR;
1362
1363         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1364                 goto out;
1365
1366         raw = &crtc_state->wm.g4x.raw[level];
1367         wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1368         wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1369         wm_state->sr.fbc = raw->fbc;
1370
1371         wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1372
1373         level = G4X_WM_LEVEL_HPLL;
1374
1375         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1376                 goto out;
1377
1378         raw = &crtc_state->wm.g4x.raw[level];
1379         wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1380         wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1381         wm_state->hpll.fbc = raw->fbc;
1382
1383         wm_state->hpll_en = wm_state->cxsr;
1384
1385         level++;
1386
1387  out:
1388         if (level == G4X_WM_LEVEL_NORMAL)
1389                 return -EINVAL;
1390
1391         /* invalidate the higher levels */
1392         g4x_invalidate_wms(crtc, wm_state, level);
1393
1394         /*
1395          * Determine if the FBC watermark(s) can be used. IF
1396          * this isn't the case we prefer to disable the FBC
1397          ( watermark(s) rather than disable the SR/HPLL
1398          * level(s) entirely.
1399          */
1400         wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1401
1402         if (level >= G4X_WM_LEVEL_SR &&
1403             wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1404                 wm_state->fbc_en = false;
1405         else if (level >= G4X_WM_LEVEL_HPLL &&
1406                  wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1407                 wm_state->fbc_en = false;
1408
1409         return 0;
1410 }
1411
1412 static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
1413 {
1414         struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
1415         struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate;
1416         const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal;
1417         struct intel_atomic_state *intel_state =
1418                 to_intel_atomic_state(new_crtc_state->uapi.state);
1419         const struct intel_crtc_state *old_crtc_state =
1420                 intel_atomic_get_old_crtc_state(intel_state, crtc);
1421         const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal;
1422         enum plane_id plane_id;
1423
1424         if (!new_crtc_state->hw.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) {
1425                 *intermediate = *optimal;
1426
1427                 intermediate->cxsr = false;
1428                 intermediate->hpll_en = false;
1429                 goto out;
1430         }
1431
1432         intermediate->cxsr = optimal->cxsr && active->cxsr &&
1433                 !new_crtc_state->disable_cxsr;
1434         intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
1435                 !new_crtc_state->disable_cxsr;
1436         intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1437
1438         for_each_plane_id_on_crtc(crtc, plane_id) {
1439                 intermediate->wm.plane[plane_id] =
1440                         max(optimal->wm.plane[plane_id],
1441                             active->wm.plane[plane_id]);
1442
1443                 WARN_ON(intermediate->wm.plane[plane_id] >
1444                         g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1445         }
1446
1447         intermediate->sr.plane = max(optimal->sr.plane,
1448                                      active->sr.plane);
1449         intermediate->sr.cursor = max(optimal->sr.cursor,
1450                                       active->sr.cursor);
1451         intermediate->sr.fbc = max(optimal->sr.fbc,
1452                                    active->sr.fbc);
1453
1454         intermediate->hpll.plane = max(optimal->hpll.plane,
1455                                        active->hpll.plane);
1456         intermediate->hpll.cursor = max(optimal->hpll.cursor,
1457                                         active->hpll.cursor);
1458         intermediate->hpll.fbc = max(optimal->hpll.fbc,
1459                                      active->hpll.fbc);
1460
1461         WARN_ON((intermediate->sr.plane >
1462                  g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1463                  intermediate->sr.cursor >
1464                  g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1465                 intermediate->cxsr);
1466         WARN_ON((intermediate->sr.plane >
1467                  g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1468                  intermediate->sr.cursor >
1469                  g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1470                 intermediate->hpll_en);
1471
1472         WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1473                 intermediate->fbc_en && intermediate->cxsr);
1474         WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1475                 intermediate->fbc_en && intermediate->hpll_en);
1476
1477 out:
1478         /*
1479          * If our intermediate WM are identical to the final WM, then we can
1480          * omit the post-vblank programming; only update if it's different.
1481          */
1482         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
1483                 new_crtc_state->wm.need_postvbl_update = true;
1484
1485         return 0;
1486 }
1487
1488 static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1489                          struct g4x_wm_values *wm)
1490 {
1491         struct intel_crtc *crtc;
1492         int num_active_pipes = 0;
1493
1494         wm->cxsr = true;
1495         wm->hpll_en = true;
1496         wm->fbc_en = true;
1497
1498         for_each_intel_crtc(&dev_priv->drm, crtc) {
1499                 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1500
1501                 if (!crtc->active)
1502                         continue;
1503
1504                 if (!wm_state->cxsr)
1505                         wm->cxsr = false;
1506                 if (!wm_state->hpll_en)
1507                         wm->hpll_en = false;
1508                 if (!wm_state->fbc_en)
1509                         wm->fbc_en = false;
1510
1511                 num_active_pipes++;
1512         }
1513
1514         if (num_active_pipes != 1) {
1515                 wm->cxsr = false;
1516                 wm->hpll_en = false;
1517                 wm->fbc_en = false;
1518         }
1519
1520         for_each_intel_crtc(&dev_priv->drm, crtc) {
1521                 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1522                 enum pipe pipe = crtc->pipe;
1523
1524                 wm->pipe[pipe] = wm_state->wm;
1525                 if (crtc->active && wm->cxsr)
1526                         wm->sr = wm_state->sr;
1527                 if (crtc->active && wm->hpll_en)
1528                         wm->hpll = wm_state->hpll;
1529         }
1530 }
1531
1532 static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1533 {
1534         struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1535         struct g4x_wm_values new_wm = {};
1536
1537         g4x_merge_wm(dev_priv, &new_wm);
1538
1539         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1540                 return;
1541
1542         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1543                 _intel_set_memory_cxsr(dev_priv, false);
1544
1545         g4x_write_wm_values(dev_priv, &new_wm);
1546
1547         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1548                 _intel_set_memory_cxsr(dev_priv, true);
1549
1550         *old_wm = new_wm;
1551 }
1552
1553 static void g4x_initial_watermarks(struct intel_atomic_state *state,
1554                                    struct intel_crtc *crtc)
1555 {
1556         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1557         const struct intel_crtc_state *crtc_state =
1558                 intel_atomic_get_new_crtc_state(state, crtc);
1559
1560         mutex_lock(&dev_priv->wm.wm_mutex);
1561         crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1562         g4x_program_watermarks(dev_priv);
1563         mutex_unlock(&dev_priv->wm.wm_mutex);
1564 }
1565
1566 static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1567                                     struct intel_crtc *crtc)
1568 {
1569         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1570         const struct intel_crtc_state *crtc_state =
1571                 intel_atomic_get_new_crtc_state(state, crtc);
1572
1573         if (!crtc_state->wm.need_postvbl_update)
1574                 return;
1575
1576         mutex_lock(&dev_priv->wm.wm_mutex);
1577         crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1578         g4x_program_watermarks(dev_priv);
1579         mutex_unlock(&dev_priv->wm.wm_mutex);
1580 }
1581
1582 /* latency must be in 0.1us units. */
1583 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
1584                                    unsigned int htotal,
1585                                    unsigned int width,
1586                                    unsigned int cpp,
1587                                    unsigned int latency)
1588 {
1589         unsigned int ret;
1590
1591         ret = intel_wm_method2(pixel_rate, htotal,
1592                                width, cpp, latency);
1593         ret = DIV_ROUND_UP(ret, 64);
1594
1595         return ret;
1596 }
1597
1598 static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
1599 {
1600         /* all latencies in usec */
1601         dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1602
1603         dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1604
1605         if (IS_CHERRYVIEW(dev_priv)) {
1606                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1607                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
1608
1609                 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
1610         }
1611 }
1612
1613 static u16 vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1614                                 const struct intel_plane_state *plane_state,
1615                                 int level)
1616 {
1617         struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
1618         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1619         const struct drm_display_mode *adjusted_mode =
1620                 &crtc_state->hw.adjusted_mode;
1621         unsigned int clock, htotal, cpp, width, wm;
1622
1623         if (dev_priv->wm.pri_latency[level] == 0)
1624                 return USHRT_MAX;
1625
1626         if (!intel_wm_plane_visible(crtc_state, plane_state))
1627                 return 0;
1628
1629         cpp = plane_state->hw.fb->format->cpp[0];
1630         clock = adjusted_mode->crtc_clock;
1631         htotal = adjusted_mode->crtc_htotal;
1632         width = crtc_state->pipe_src_w;
1633
1634         if (plane->id == PLANE_CURSOR) {
1635                 /*
1636                  * FIXME the formula gives values that are
1637                  * too big for the cursor FIFO, and hence we
1638                  * would never be able to use cursors. For
1639                  * now just hardcode the watermark.
1640                  */
1641                 wm = 63;
1642         } else {
1643                 wm = vlv_wm_method2(clock, htotal, width, cpp,
1644                                     dev_priv->wm.pri_latency[level] * 10);
1645         }
1646
1647         return min_t(unsigned int, wm, USHRT_MAX);
1648 }
1649
1650 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1651 {
1652         return (active_planes & (BIT(PLANE_SPRITE0) |
1653                                  BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1654 }
1655
1656 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1657 {
1658         struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
1659         const struct g4x_pipe_wm *raw =
1660                 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
1661         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
1662         unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1663         int num_active_planes = hweight8(active_planes);
1664         const int fifo_size = 511;
1665         int fifo_extra, fifo_left = fifo_size;
1666         int sprite0_fifo_extra = 0;
1667         unsigned int total_rate;
1668         enum plane_id plane_id;
1669
1670         /*
1671          * When enabling sprite0 after sprite1 has already been enabled
1672          * we tend to get an underrun unless sprite0 already has some
1673          * FIFO space allcoated. Hence we always allocate at least one
1674          * cacheline for sprite0 whenever sprite1 is enabled.
1675          *
1676          * All other plane enable sequences appear immune to this problem.
1677          */
1678         if (vlv_need_sprite0_fifo_workaround(active_planes))
1679                 sprite0_fifo_extra = 1;
1680
1681         total_rate = raw->plane[PLANE_PRIMARY] +
1682                 raw->plane[PLANE_SPRITE0] +
1683                 raw->plane[PLANE_SPRITE1] +
1684                 sprite0_fifo_extra;
1685
1686         if (total_rate > fifo_size)
1687                 return -EINVAL;
1688
1689         if (total_rate == 0)
1690                 total_rate = 1;
1691
1692         for_each_plane_id_on_crtc(crtc, plane_id) {
1693                 unsigned int rate;
1694
1695                 if ((active_planes & BIT(plane_id)) == 0) {
1696                         fifo_state->plane[plane_id] = 0;
1697                         continue;
1698                 }
1699
1700                 rate = raw->plane[plane_id];
1701                 fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1702                 fifo_left -= fifo_state->plane[plane_id];
1703         }
1704
1705         fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1706         fifo_left -= sprite0_fifo_extra;
1707
1708         fifo_state->plane[PLANE_CURSOR] = 63;
1709
1710         fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
1711
1712         /* spread the remainder evenly */
1713         for_each_plane_id_on_crtc(crtc, plane_id) {
1714                 int plane_extra;
1715
1716                 if (fifo_left == 0)
1717                         break;
1718
1719                 if ((active_planes & BIT(plane_id)) == 0)
1720                         continue;
1721
1722                 plane_extra = min(fifo_extra, fifo_left);
1723                 fifo_state->plane[plane_id] += plane_extra;
1724                 fifo_left -= plane_extra;
1725         }
1726
1727         WARN_ON(active_planes != 0 && fifo_left != 0);
1728
1729         /* give it all to the first plane if none are active */
1730         if (active_planes == 0) {
1731                 WARN_ON(fifo_left != fifo_size);
1732                 fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1733         }
1734
1735         return 0;
1736 }
1737
1738 /* mark all levels starting from 'level' as invalid */
1739 static void vlv_invalidate_wms(struct intel_crtc *crtc,
1740                                struct vlv_wm_state *wm_state, int level)
1741 {
1742         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1743
1744         for (; level < intel_wm_num_levels(dev_priv); level++) {
1745                 enum plane_id plane_id;
1746
1747                 for_each_plane_id_on_crtc(crtc, plane_id)
1748                         wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1749
1750                 wm_state->sr[level].cursor = USHRT_MAX;
1751                 wm_state->sr[level].plane = USHRT_MAX;
1752         }
1753 }
1754
1755 static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1756 {
1757         if (wm > fifo_size)
1758                 return USHRT_MAX;
1759         else
1760                 return fifo_size - wm;
1761 }
1762
1763 /*
1764  * Starting from 'level' set all higher
1765  * levels to 'value' in the "raw" watermarks.
1766  */
1767 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1768                                  int level, enum plane_id plane_id, u16 value)
1769 {
1770         struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
1771         int num_levels = intel_wm_num_levels(dev_priv);
1772         bool dirty = false;
1773
1774         for (; level < num_levels; level++) {
1775                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1776
1777                 dirty |= raw->plane[plane_id] != value;
1778                 raw->plane[plane_id] = value;
1779         }
1780
1781         return dirty;
1782 }
1783
1784 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1785                                      const struct intel_plane_state *plane_state)
1786 {
1787         struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
1788         struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
1789         enum plane_id plane_id = plane->id;
1790         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1791         int level;
1792         bool dirty = false;
1793
1794         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1795                 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1796                 goto out;
1797         }
1798
1799         for (level = 0; level < num_levels; level++) {
1800                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1801                 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1802                 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
1803
1804                 if (wm > max_wm)
1805                         break;
1806
1807                 dirty |= raw->plane[plane_id] != wm;
1808                 raw->plane[plane_id] = wm;
1809         }
1810
1811         /* mark all higher levels as invalid */
1812         dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1813
1814 out:
1815         if (dirty)
1816                 drm_dbg_kms(&dev_priv->drm,
1817                             "%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1818                             plane->base.name,
1819                             crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1820                             crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1821                             crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1822
1823         return dirty;
1824 }
1825
1826 static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1827                                       enum plane_id plane_id, int level)
1828 {
1829         const struct g4x_pipe_wm *raw =
1830                 &crtc_state->wm.vlv.raw[level];
1831         const struct vlv_fifo_state *fifo_state =
1832                 &crtc_state->wm.vlv.fifo_state;
1833
1834         return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1835 }
1836
1837 static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
1838 {
1839         return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1840                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1841                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1842                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1843 }
1844
1845 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1846 {
1847         struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
1848         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1849         struct intel_atomic_state *state =
1850                 to_intel_atomic_state(crtc_state->uapi.state);
1851         struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1852         const struct vlv_fifo_state *fifo_state =
1853                 &crtc_state->wm.vlv.fifo_state;
1854         int num_active_planes = hweight8(crtc_state->active_planes &
1855                                          ~BIT(PLANE_CURSOR));
1856         bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->uapi);
1857         const struct intel_plane_state *old_plane_state;
1858         const struct intel_plane_state *new_plane_state;
1859         struct intel_plane *plane;
1860         enum plane_id plane_id;
1861         int level, ret, i;
1862         unsigned int dirty = 0;
1863
1864         for_each_oldnew_intel_plane_in_state(state, plane,
1865                                              old_plane_state,
1866                                              new_plane_state, i) {
1867                 if (new_plane_state->hw.crtc != &crtc->base &&
1868                     old_plane_state->hw.crtc != &crtc->base)
1869                         continue;
1870
1871                 if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
1872                         dirty |= BIT(plane->id);
1873         }
1874
1875         /*
1876          * DSPARB registers may have been reset due to the
1877          * power well being turned off. Make sure we restore
1878          * them to a consistent state even if no primary/sprite
1879          * planes are initially active.
1880          */
1881         if (needs_modeset)
1882                 crtc_state->fifo_changed = true;
1883
1884         if (!dirty)
1885                 return 0;
1886
1887         /* cursor changes don't warrant a FIFO recompute */
1888         if (dirty & ~BIT(PLANE_CURSOR)) {
1889                 const struct intel_crtc_state *old_crtc_state =
1890                         intel_atomic_get_old_crtc_state(state, crtc);
1891                 const struct vlv_fifo_state *old_fifo_state =
1892                         &old_crtc_state->wm.vlv.fifo_state;
1893
1894                 ret = vlv_compute_fifo(crtc_state);
1895                 if (ret)
1896                         return ret;
1897
1898                 if (needs_modeset ||
1899                     memcmp(old_fifo_state, fifo_state,
1900                            sizeof(*fifo_state)) != 0)
1901                         crtc_state->fifo_changed = true;
1902         }
1903
1904         /* initially allow all levels */
1905         wm_state->num_levels = intel_wm_num_levels(dev_priv);
1906         /*
1907          * Note that enabling cxsr with no primary/sprite planes
1908          * enabled can wedge the pipe. Hence we only allow cxsr
1909          * with exactly one enabled primary/sprite plane.
1910          */
1911         wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
1912
1913         for (level = 0; level < wm_state->num_levels; level++) {
1914                 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1915                 const int sr_fifo_size = INTEL_NUM_PIPES(dev_priv) * 512 - 1;
1916
1917                 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
1918                         break;
1919
1920                 for_each_plane_id_on_crtc(crtc, plane_id) {
1921                         wm_state->wm[level].plane[plane_id] =
1922                                 vlv_invert_wm_value(raw->plane[plane_id],
1923                                                     fifo_state->plane[plane_id]);
1924                 }
1925
1926                 wm_state->sr[level].plane =
1927                         vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
1928                                                  raw->plane[PLANE_SPRITE0],
1929                                                  raw->plane[PLANE_SPRITE1]),
1930                                             sr_fifo_size);
1931
1932                 wm_state->sr[level].cursor =
1933                         vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1934                                             63);
1935         }
1936
1937         if (level == 0)
1938                 return -EINVAL;
1939
1940         /* limit to only levels we can actually handle */
1941         wm_state->num_levels = level;
1942
1943         /* invalidate the higher levels */
1944         vlv_invalidate_wms(crtc, wm_state, level);
1945
1946         return 0;
1947 }
1948
1949 #define VLV_FIFO(plane, value) \
1950         (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1951
1952 static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1953                                    struct intel_crtc *crtc)
1954 {
1955         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1956         struct intel_uncore *uncore = &dev_priv->uncore;
1957         const struct intel_crtc_state *crtc_state =
1958                 intel_atomic_get_new_crtc_state(state, crtc);
1959         const struct vlv_fifo_state *fifo_state =
1960                 &crtc_state->wm.vlv.fifo_state;
1961         int sprite0_start, sprite1_start, fifo_size;
1962         u32 dsparb, dsparb2, dsparb3;
1963
1964         if (!crtc_state->fifo_changed)
1965                 return;
1966
1967         sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1968         sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1969         fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
1970
1971         drm_WARN_ON(&dev_priv->drm, fifo_state->plane[PLANE_CURSOR] != 63);
1972         drm_WARN_ON(&dev_priv->drm, fifo_size != 511);
1973
1974         trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1975
1976         /*
1977          * uncore.lock serves a double purpose here. It allows us to
1978          * use the less expensive I915_{READ,WRITE}_FW() functions, and
1979          * it protects the DSPARB registers from getting clobbered by
1980          * parallel updates from multiple pipes.
1981          *
1982          * intel_pipe_update_start() has already disabled interrupts
1983          * for us, so a plain spin_lock() is sufficient here.
1984          */
1985         spin_lock(&uncore->lock);
1986
1987         switch (crtc->pipe) {
1988         case PIPE_A:
1989                 dsparb = intel_uncore_read_fw(uncore, DSPARB);
1990                 dsparb2 = intel_uncore_read_fw(uncore, DSPARB2);
1991
1992                 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1993                             VLV_FIFO(SPRITEB, 0xff));
1994                 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1995                            VLV_FIFO(SPRITEB, sprite1_start));
1996
1997                 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1998                              VLV_FIFO(SPRITEB_HI, 0x1));
1999                 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
2000                            VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
2001
2002                 intel_uncore_write_fw(uncore, DSPARB, dsparb);
2003                 intel_uncore_write_fw(uncore, DSPARB2, dsparb2);
2004                 break;
2005         case PIPE_B:
2006                 dsparb = intel_uncore_read_fw(uncore, DSPARB);
2007                 dsparb2 = intel_uncore_read_fw(uncore, DSPARB2);
2008
2009                 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
2010                             VLV_FIFO(SPRITED, 0xff));
2011                 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
2012                            VLV_FIFO(SPRITED, sprite1_start));
2013
2014                 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
2015                              VLV_FIFO(SPRITED_HI, 0xff));
2016                 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
2017                            VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
2018
2019                 intel_uncore_write_fw(uncore, DSPARB, dsparb);
2020                 intel_uncore_write_fw(uncore, DSPARB2, dsparb2);
2021                 break;
2022         case PIPE_C:
2023                 dsparb3 = intel_uncore_read_fw(uncore, DSPARB3);
2024                 dsparb2 = intel_uncore_read_fw(uncore, DSPARB2);
2025
2026                 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
2027                              VLV_FIFO(SPRITEF, 0xff));
2028                 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
2029                             VLV_FIFO(SPRITEF, sprite1_start));
2030
2031                 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
2032                              VLV_FIFO(SPRITEF_HI, 0xff));
2033                 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
2034                            VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
2035
2036                 intel_uncore_write_fw(uncore, DSPARB3, dsparb3);
2037                 intel_uncore_write_fw(uncore, DSPARB2, dsparb2);
2038                 break;
2039         default:
2040                 break;
2041         }
2042
2043         intel_uncore_posting_read_fw(uncore, DSPARB);
2044
2045         spin_unlock(&uncore->lock);
2046 }
2047
2048 #undef VLV_FIFO
2049
2050 static int vlv_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
2051 {
2052         struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
2053         struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate;
2054         const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal;
2055         struct intel_atomic_state *intel_state =
2056                 to_intel_atomic_state(new_crtc_state->uapi.state);
2057         const struct intel_crtc_state *old_crtc_state =
2058                 intel_atomic_get_old_crtc_state(intel_state, crtc);
2059         const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal;
2060         int level;
2061
2062         if (!new_crtc_state->hw.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) {
2063                 *intermediate = *optimal;
2064
2065                 intermediate->cxsr = false;
2066                 goto out;
2067         }
2068
2069         intermediate->num_levels = min(optimal->num_levels, active->num_levels);
2070         intermediate->cxsr = optimal->cxsr && active->cxsr &&
2071                 !new_crtc_state->disable_cxsr;
2072
2073         for (level = 0; level < intermediate->num_levels; level++) {
2074                 enum plane_id plane_id;
2075
2076                 for_each_plane_id_on_crtc(crtc, plane_id) {
2077                         intermediate->wm[level].plane[plane_id] =
2078                                 min(optimal->wm[level].plane[plane_id],
2079                                     active->wm[level].plane[plane_id]);
2080                 }
2081
2082                 intermediate->sr[level].plane = min(optimal->sr[level].plane,
2083                                                     active->sr[level].plane);
2084                 intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
2085                                                      active->sr[level].cursor);
2086         }
2087
2088         vlv_invalidate_wms(crtc, intermediate, level);
2089
2090 out:
2091         /*
2092          * If our intermediate WM are identical to the final WM, then we can
2093          * omit the post-vblank programming; only update if it's different.
2094          */
2095         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
2096                 new_crtc_state->wm.need_postvbl_update = true;
2097
2098         return 0;
2099 }
2100
2101 static void vlv_merge_wm(struct drm_i915_private *dev_priv,
2102                          struct vlv_wm_values *wm)
2103 {
2104         struct intel_crtc *crtc;
2105         int num_active_pipes = 0;
2106
2107         wm->level = dev_priv->wm.max_level;
2108         wm->cxsr = true;
2109
2110         for_each_intel_crtc(&dev_priv->drm, crtc) {
2111                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2112
2113                 if (!crtc->active)
2114                         continue;
2115
2116                 if (!wm_state->cxsr)
2117                         wm->cxsr = false;
2118
2119                 num_active_pipes++;
2120                 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
2121         }
2122
2123         if (num_active_pipes != 1)
2124                 wm->cxsr = false;
2125
2126         if (num_active_pipes > 1)
2127                 wm->level = VLV_WM_LEVEL_PM2;
2128
2129         for_each_intel_crtc(&dev_priv->drm, crtc) {
2130                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2131                 enum pipe pipe = crtc->pipe;
2132
2133                 wm->pipe[pipe] = wm_state->wm[wm->level];
2134                 if (crtc->active && wm->cxsr)
2135                         wm->sr = wm_state->sr[wm->level];
2136
2137                 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
2138                 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
2139                 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
2140                 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
2141         }
2142 }
2143
2144 static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
2145 {
2146         struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
2147         struct vlv_wm_values new_wm = {};
2148
2149         vlv_merge_wm(dev_priv, &new_wm);
2150
2151         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
2152                 return;
2153
2154         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2155                 chv_set_memory_dvfs(dev_priv, false);
2156
2157         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2158                 chv_set_memory_pm5(dev_priv, false);
2159
2160         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
2161                 _intel_set_memory_cxsr(dev_priv, false);
2162
2163         vlv_write_wm_values(dev_priv, &new_wm);
2164
2165         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
2166                 _intel_set_memory_cxsr(dev_priv, true);
2167
2168         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2169                 chv_set_memory_pm5(dev_priv, true);
2170
2171         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2172                 chv_set_memory_dvfs(dev_priv, true);
2173
2174         *old_wm = new_wm;
2175 }
2176
2177 static void vlv_initial_watermarks(struct intel_atomic_state *state,
2178                                    struct intel_crtc *crtc)
2179 {
2180         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
2181         const struct intel_crtc_state *crtc_state =
2182                 intel_atomic_get_new_crtc_state(state, crtc);
2183
2184         mutex_lock(&dev_priv->wm.wm_mutex);
2185         crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
2186         vlv_program_watermarks(dev_priv);
2187         mutex_unlock(&dev_priv->wm.wm_mutex);
2188 }
2189
2190 static void vlv_optimize_watermarks(struct intel_atomic_state *state,
2191                                     struct intel_crtc *crtc)
2192 {
2193         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
2194         const struct intel_crtc_state *crtc_state =
2195                 intel_atomic_get_new_crtc_state(state, crtc);
2196
2197         if (!crtc_state->wm.need_postvbl_update)
2198                 return;
2199
2200         mutex_lock(&dev_priv->wm.wm_mutex);
2201         crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
2202         vlv_program_watermarks(dev_priv);
2203         mutex_unlock(&dev_priv->wm.wm_mutex);
2204 }
2205
2206 static void i965_update_wm(struct intel_crtc *unused_crtc)
2207 {
2208         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2209         struct intel_crtc *crtc;
2210         int srwm = 1;
2211         int cursor_sr = 16;
2212         bool cxsr_enabled;
2213
2214         /* Calc sr entries for one plane configs */
2215         crtc = single_enabled_crtc(dev_priv);
2216         if (crtc) {
2217                 /* self-refresh has much higher latency */
2218                 static const int sr_latency_ns = 12000;
2219                 const struct drm_display_mode *adjusted_mode =
2220                         &crtc->config->hw.adjusted_mode;
2221                 const struct drm_framebuffer *fb =
2222                         crtc->base.primary->state->fb;
2223                 int clock = adjusted_mode->crtc_clock;
2224                 int htotal = adjusted_mode->crtc_htotal;
2225                 int hdisplay = crtc->config->pipe_src_w;
2226                 int cpp = fb->format->cpp[0];
2227                 int entries;
2228
2229                 entries = intel_wm_method2(clock, htotal,
2230                                            hdisplay, cpp, sr_latency_ns / 100);
2231                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
2232                 srwm = I965_FIFO_SIZE - entries;
2233                 if (srwm < 0)
2234                         srwm = 1;
2235                 srwm &= 0x1ff;
2236                 drm_dbg_kms(&dev_priv->drm,
2237                             "self-refresh entries: %d, wm: %d\n",
2238                             entries, srwm);
2239
2240                 entries = intel_wm_method2(clock, htotal,
2241                                            crtc->base.cursor->state->crtc_w, 4,
2242                                            sr_latency_ns / 100);
2243                 entries = DIV_ROUND_UP(entries,
2244                                        i965_cursor_wm_info.cacheline_size) +
2245                         i965_cursor_wm_info.guard_size;
2246
2247                 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
2248                 if (cursor_sr > i965_cursor_wm_info.max_wm)
2249                         cursor_sr = i965_cursor_wm_info.max_wm;
2250
2251                 drm_dbg_kms(&dev_priv->drm,
2252                             "self-refresh watermark: display plane %d "
2253                             "cursor %d\n", srwm, cursor_sr);
2254
2255                 cxsr_enabled = true;
2256         } else {
2257                 cxsr_enabled = false;
2258                 /* Turn off self refresh if both pipes are enabled */
2259                 intel_set_memory_cxsr(dev_priv, false);
2260         }
2261
2262         drm_dbg_kms(&dev_priv->drm,
2263                     "Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2264                     srwm);
2265
2266         /* 965 has limitations... */
2267         I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
2268                    FW_WM(8, CURSORB) |
2269                    FW_WM(8, PLANEB) |
2270                    FW_WM(8, PLANEA));
2271         I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
2272                    FW_WM(8, PLANEC_OLD));
2273         /* update cursor SR watermark */
2274         I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
2275
2276         if (cxsr_enabled)
2277                 intel_set_memory_cxsr(dev_priv, true);
2278 }
2279
2280 #undef FW_WM
2281
2282 static void i9xx_update_wm(struct intel_crtc *unused_crtc)
2283 {
2284         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2285         const struct intel_watermark_params *wm_info;
2286         u32 fwater_lo;
2287         u32 fwater_hi;
2288         int cwm, srwm = 1;
2289         int fifo_size;
2290         int planea_wm, planeb_wm;
2291         struct intel_crtc *crtc, *enabled = NULL;
2292
2293         if (IS_I945GM(dev_priv))
2294                 wm_info = &i945_wm_info;
2295         else if (!IS_GEN(dev_priv, 2))
2296                 wm_info = &i915_wm_info;
2297         else
2298                 wm_info = &i830_a_wm_info;
2299
2300         fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
2301         crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
2302         if (intel_crtc_active(crtc)) {
2303                 const struct drm_display_mode *adjusted_mode =
2304                         &crtc->config->hw.adjusted_mode;
2305                 const struct drm_framebuffer *fb =
2306                         crtc->base.primary->state->fb;
2307                 int cpp;
2308
2309                 if (IS_GEN(dev_priv, 2))
2310                         cpp = 4;
2311                 else
2312                         cpp = fb->format->cpp[0];
2313
2314                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2315                                                wm_info, fifo_size, cpp,
2316                                                pessimal_latency_ns);
2317                 enabled = crtc;
2318         } else {
2319                 planea_wm = fifo_size - wm_info->guard_size;
2320                 if (planea_wm > (long)wm_info->max_wm)
2321                         planea_wm = wm_info->max_wm;
2322         }
2323
2324         if (IS_GEN(dev_priv, 2))
2325                 wm_info = &i830_bc_wm_info;
2326
2327         fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
2328         crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
2329         if (intel_crtc_active(crtc)) {
2330                 const struct drm_display_mode *adjusted_mode =
2331                         &crtc->config->hw.adjusted_mode;
2332                 const struct drm_framebuffer *fb =
2333                         crtc->base.primary->state->fb;
2334                 int cpp;
2335
2336                 if (IS_GEN(dev_priv, 2))
2337                         cpp = 4;
2338                 else
2339                         cpp = fb->format->cpp[0];
2340
2341                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2342                                                wm_info, fifo_size, cpp,
2343                                                pessimal_latency_ns);
2344                 if (enabled == NULL)
2345                         enabled = crtc;
2346                 else
2347                         enabled = NULL;
2348         } else {
2349                 planeb_wm = fifo_size - wm_info->guard_size;
2350                 if (planeb_wm > (long)wm_info->max_wm)
2351                         planeb_wm = wm_info->max_wm;
2352         }
2353
2354         drm_dbg_kms(&dev_priv->drm,
2355                     "FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
2356
2357         if (IS_I915GM(dev_priv) && enabled) {
2358                 struct drm_i915_gem_object *obj;
2359
2360                 obj = intel_fb_obj(enabled->base.primary->state->fb);
2361
2362                 /* self-refresh seems busted with untiled */
2363                 if (!i915_gem_object_is_tiled(obj))
2364                         enabled = NULL;
2365         }
2366
2367         /*
2368          * Overlay gets an aggressive default since video jitter is bad.
2369          */
2370         cwm = 2;
2371
2372         /* Play safe and disable self-refresh before adjusting watermarks. */
2373         intel_set_memory_cxsr(dev_priv, false);
2374
2375         /* Calc sr entries for one plane configs */
2376         if (HAS_FW_BLC(dev_priv) && enabled) {
2377                 /* self-refresh has much higher latency */
2378                 static const int sr_latency_ns = 6000;
2379                 const struct drm_display_mode *adjusted_mode =
2380                         &enabled->config->hw.adjusted_mode;
2381                 const struct drm_framebuffer *fb =
2382                         enabled->base.primary->state->fb;
2383                 int clock = adjusted_mode->crtc_clock;
2384                 int htotal = adjusted_mode->crtc_htotal;
2385                 int hdisplay = enabled->config->pipe_src_w;
2386                 int cpp;
2387                 int entries;
2388
2389                 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
2390                         cpp = 4;
2391                 else
2392                         cpp = fb->format->cpp[0];
2393
2394                 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
2395                                            sr_latency_ns / 100);
2396                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
2397                 drm_dbg_kms(&dev_priv->drm,
2398                             "self-refresh entries: %d\n", entries);
2399                 srwm = wm_info->fifo_size - entries;
2400                 if (srwm < 0)
2401                         srwm = 1;
2402
2403                 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
2404                         I915_WRITE(FW_BLC_SELF,
2405                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
2406                 else
2407                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2408         }
2409
2410         drm_dbg_kms(&dev_priv->drm,
2411                     "Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2412                      planea_wm, planeb_wm, cwm, srwm);
2413
2414         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2415         fwater_hi = (cwm & 0x1f);
2416
2417         /* Set request length to 8 cachelines per fetch */
2418         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2419         fwater_hi = fwater_hi | (1 << 8);
2420
2421         I915_WRITE(FW_BLC, fwater_lo);
2422         I915_WRITE(FW_BLC2, fwater_hi);
2423
2424         if (enabled)
2425                 intel_set_memory_cxsr(dev_priv, true);
2426 }
2427
2428 static void i845_update_wm(struct intel_crtc *unused_crtc)
2429 {
2430         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2431         struct intel_crtc *crtc;
2432         const struct drm_display_mode *adjusted_mode;
2433         u32 fwater_lo;
2434         int planea_wm;
2435
2436         crtc = single_enabled_crtc(dev_priv);
2437         if (crtc == NULL)
2438                 return;
2439
2440         adjusted_mode = &crtc->config->hw.adjusted_mode;
2441         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2442                                        &i845_wm_info,
2443                                        dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
2444                                        4, pessimal_latency_ns);
2445         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2446         fwater_lo |= (3<<8) | planea_wm;
2447
2448         drm_dbg_kms(&dev_priv->drm,
2449                     "Setting FIFO watermarks - A: %d\n", planea_wm);
2450
2451         I915_WRITE(FW_BLC, fwater_lo);
2452 }
2453
2454 /* latency must be in 0.1us units. */
2455 static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2456                                    unsigned int cpp,
2457                                    unsigned int latency)
2458 {
2459         unsigned int ret;
2460
2461         ret = intel_wm_method1(pixel_rate, cpp, latency);
2462         ret = DIV_ROUND_UP(ret, 64) + 2;
2463
2464         return ret;
2465 }
2466
2467 /* latency must be in 0.1us units. */
2468 static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2469                                    unsigned int htotal,
2470                                    unsigned int width,
2471                                    unsigned int cpp,
2472                                    unsigned int latency)
2473 {
2474         unsigned int ret;
2475
2476         ret = intel_wm_method2(pixel_rate, htotal,
2477                                width, cpp, latency);
2478         ret = DIV_ROUND_UP(ret, 64) + 2;
2479
2480         return ret;
2481 }
2482
2483 static u32 ilk_wm_fbc(u32 pri_val, u32 horiz_pixels, u8 cpp)
2484 {
2485         /*
2486          * Neither of these should be possible since this function shouldn't be
2487          * called if the CRTC is off or the plane is invisible.  But let's be
2488          * extra paranoid to avoid a potential divide-by-zero if we screw up
2489          * elsewhere in the driver.
2490          */
2491         if (WARN_ON(!cpp))
2492                 return 0;
2493         if (WARN_ON(!horiz_pixels))
2494                 return 0;
2495
2496         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
2497 }
2498
2499 struct ilk_wm_maximums {
2500         u16 pri;
2501         u16 spr;
2502         u16 cur;
2503         u16 fbc;
2504 };
2505
2506 /*
2507  * For both WM_PIPE and WM_LP.
2508  * mem_value must be in 0.1us units.
2509  */
2510 static u32 ilk_compute_pri_wm(const struct intel_crtc_state *crtc_state,
2511                               const struct intel_plane_state *plane_state,
2512                               u32 mem_value, bool is_lp)
2513 {
2514         u32 method1, method2;
2515         int cpp;
2516
2517         if (mem_value == 0)
2518                 return U32_MAX;
2519
2520         if (!intel_wm_plane_visible(crtc_state, plane_state))
2521                 return 0;
2522
2523         cpp = plane_state->hw.fb->format->cpp[0];
2524
2525         method1 = ilk_wm_method1(crtc_state->pixel_rate, cpp, mem_value);
2526
2527         if (!is_lp)
2528                 return method1;
2529
2530         method2 = ilk_wm_method2(crtc_state->pixel_rate,
2531                                  crtc_state->hw.adjusted_mode.crtc_htotal,
2532                                  drm_rect_width(&plane_state->uapi.dst),
2533                                  cpp, mem_value);
2534
2535         return min(method1, method2);
2536 }
2537
2538 /*
2539  * For both WM_PIPE and WM_LP.
2540  * mem_value must be in 0.1us units.
2541  */
2542 static u32 ilk_compute_spr_wm(const struct intel_crtc_state *crtc_state,
2543                               const struct intel_plane_state *plane_state,
2544                               u32 mem_value)
2545 {
2546         u32 method1, method2;
2547         int cpp;
2548
2549         if (mem_value == 0)
2550                 return U32_MAX;
2551
2552         if (!intel_wm_plane_visible(crtc_state, plane_state))
2553                 return 0;
2554
2555         cpp = plane_state->hw.fb->format->cpp[0];
2556
2557         method1 = ilk_wm_method1(crtc_state->pixel_rate, cpp, mem_value);
2558         method2 = ilk_wm_method2(crtc_state->pixel_rate,
2559                                  crtc_state->hw.adjusted_mode.crtc_htotal,
2560                                  drm_rect_width(&plane_state->uapi.dst),
2561                                  cpp, mem_value);
2562         return min(method1, method2);
2563 }
2564
2565 /*
2566  * For both WM_PIPE and WM_LP.
2567  * mem_value must be in 0.1us units.
2568  */
2569 static u32 ilk_compute_cur_wm(const struct intel_crtc_state *crtc_state,
2570                               const struct intel_plane_state *plane_state,
2571                               u32 mem_value)
2572 {
2573         int cpp;
2574
2575         if (mem_value == 0)
2576                 return U32_MAX;
2577
2578         if (!intel_wm_plane_visible(crtc_state, plane_state))
2579                 return 0;
2580
2581         cpp = plane_state->hw.fb->format->cpp[0];
2582
2583         return ilk_wm_method2(crtc_state->pixel_rate,
2584                               crtc_state->hw.adjusted_mode.crtc_htotal,
2585                               drm_rect_width(&plane_state->uapi.dst),
2586                               cpp, mem_value);
2587 }
2588
2589 /* Only for WM_LP. */
2590 static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *crtc_state,
2591                               const struct intel_plane_state *plane_state,
2592                               u32 pri_val)
2593 {
2594         int cpp;
2595
2596         if (!intel_wm_plane_visible(crtc_state, plane_state))
2597                 return 0;
2598
2599         cpp = plane_state->hw.fb->format->cpp[0];
2600
2601         return ilk_wm_fbc(pri_val, drm_rect_width(&plane_state->uapi.dst),
2602                           cpp);
2603 }
2604
2605 static unsigned int
2606 ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
2607 {
2608         if (INTEL_GEN(dev_priv) >= 8)
2609                 return 3072;
2610         else if (INTEL_GEN(dev_priv) >= 7)
2611                 return 768;
2612         else
2613                 return 512;
2614 }
2615
2616 static unsigned int
2617 ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2618                      int level, bool is_sprite)
2619 {
2620         if (INTEL_GEN(dev_priv) >= 8)
2621                 /* BDW primary/sprite plane watermarks */
2622                 return level == 0 ? 255 : 2047;
2623         else if (INTEL_GEN(dev_priv) >= 7)
2624                 /* IVB/HSW primary/sprite plane watermarks */
2625                 return level == 0 ? 127 : 1023;
2626         else if (!is_sprite)
2627                 /* ILK/SNB primary plane watermarks */
2628                 return level == 0 ? 127 : 511;
2629         else
2630                 /* ILK/SNB sprite plane watermarks */
2631                 return level == 0 ? 63 : 255;
2632 }
2633
2634 static unsigned int
2635 ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
2636 {
2637         if (INTEL_GEN(dev_priv) >= 7)
2638                 return level == 0 ? 63 : 255;
2639         else
2640                 return level == 0 ? 31 : 63;
2641 }
2642
2643 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
2644 {
2645         if (INTEL_GEN(dev_priv) >= 8)
2646                 return 31;
2647         else
2648                 return 15;
2649 }
2650
2651 /* Calculate the maximum primary/sprite plane watermark */
2652 static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv,
2653                                      int level,
2654                                      const struct intel_wm_config *config,
2655                                      enum intel_ddb_partitioning ddb_partitioning,
2656                                      bool is_sprite)
2657 {
2658         unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
2659
2660         /* if sprites aren't enabled, sprites get nothing */
2661         if (is_sprite && !config->sprites_enabled)
2662                 return 0;
2663
2664         /* HSW allows LP1+ watermarks even with multiple pipes */
2665         if (level == 0 || config->num_pipes_active > 1) {
2666                 fifo_size /= INTEL_NUM_PIPES(dev_priv);
2667
2668                 /*
2669                  * For some reason the non self refresh
2670                  * FIFO size is only half of the self
2671                  * refresh FIFO size on ILK/SNB.
2672                  */
2673                 if (INTEL_GEN(dev_priv) <= 6)
2674                         fifo_size /= 2;
2675         }
2676
2677         if (config->sprites_enabled) {
2678                 /* level 0 is always calculated with 1:1 split */
2679                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2680                         if (is_sprite)
2681                                 fifo_size *= 5;
2682                         fifo_size /= 6;
2683                 } else {
2684                         fifo_size /= 2;
2685                 }
2686         }
2687
2688         /* clamp to max that the registers can hold */
2689         return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
2690 }
2691
2692 /* Calculate the maximum cursor plane watermark */
2693 static unsigned int ilk_cursor_wm_max(const struct drm_i915_private *dev_priv,
2694                                       int level,
2695                                       const struct intel_wm_config *config)
2696 {
2697         /* HSW LP1+ watermarks w/ multiple pipes */
2698         if (level > 0 && config->num_pipes_active > 1)
2699                 return 64;
2700
2701         /* otherwise just report max that registers can hold */
2702         return ilk_cursor_wm_reg_max(dev_priv, level);
2703 }
2704
2705 static void ilk_compute_wm_maximums(const struct drm_i915_private *dev_priv,
2706                                     int level,
2707                                     const struct intel_wm_config *config,
2708                                     enum intel_ddb_partitioning ddb_partitioning,
2709                                     struct ilk_wm_maximums *max)
2710 {
2711         max->pri = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, false);
2712         max->spr = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, true);
2713         max->cur = ilk_cursor_wm_max(dev_priv, level, config);
2714         max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2715 }
2716
2717 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
2718                                         int level,
2719                                         struct ilk_wm_maximums *max)
2720 {
2721         max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2722         max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2723         max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2724         max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2725 }
2726
2727 static bool ilk_validate_wm_level(int level,
2728                                   const struct ilk_wm_maximums *max,
2729                                   struct intel_wm_level *result)
2730 {
2731         bool ret;
2732
2733         /* already determined to be invalid? */
2734         if (!result->enable)
2735                 return false;
2736
2737         result->enable = result->pri_val <= max->pri &&
2738                          result->spr_val <= max->spr &&
2739                          result->cur_val <= max->cur;
2740
2741         ret = result->enable;
2742
2743         /*
2744          * HACK until we can pre-compute everything,
2745          * and thus fail gracefully if LP0 watermarks
2746          * are exceeded...
2747          */
2748         if (level == 0 && !result->enable) {
2749                 if (result->pri_val > max->pri)
2750                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2751                                       level, result->pri_val, max->pri);
2752                 if (result->spr_val > max->spr)
2753                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2754                                       level, result->spr_val, max->spr);
2755                 if (result->cur_val > max->cur)
2756                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2757                                       level, result->cur_val, max->cur);
2758
2759                 result->pri_val = min_t(u32, result->pri_val, max->pri);
2760                 result->spr_val = min_t(u32, result->spr_val, max->spr);
2761                 result->cur_val = min_t(u32, result->cur_val, max->cur);
2762                 result->enable = true;
2763         }
2764
2765         return ret;
2766 }
2767
2768 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2769                                  const struct intel_crtc *crtc,
2770                                  int level,
2771                                  struct intel_crtc_state *crtc_state,
2772                                  const struct intel_plane_state *pristate,
2773                                  const struct intel_plane_state *sprstate,
2774                                  const struct intel_plane_state *curstate,
2775                                  struct intel_wm_level *result)
2776 {
2777         u16 pri_latency = dev_priv->wm.pri_latency[level];
2778         u16 spr_latency = dev_priv->wm.spr_latency[level];
2779         u16 cur_latency = dev_priv->wm.cur_latency[level];
2780
2781         /* WM1+ latency values stored in 0.5us units */
2782         if (level > 0) {
2783                 pri_latency *= 5;
2784                 spr_latency *= 5;
2785                 cur_latency *= 5;
2786         }
2787
2788         if (pristate) {
2789                 result->pri_val = ilk_compute_pri_wm(crtc_state, pristate,
2790                                                      pri_latency, level);
2791                 result->fbc_val = ilk_compute_fbc_wm(crtc_state, pristate, result->pri_val);
2792         }
2793
2794         if (sprstate)
2795                 result->spr_val = ilk_compute_spr_wm(crtc_state, sprstate, spr_latency);
2796
2797         if (curstate)
2798                 result->cur_val = ilk_compute_cur_wm(crtc_state, curstate, cur_latency);
2799
2800         result->enable = true;
2801 }
2802
2803 static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
2804                                   u16 wm[8])
2805 {
2806         struct intel_uncore *uncore = &dev_priv->uncore;
2807
2808         if (INTEL_GEN(dev_priv) >= 9) {
2809                 u32 val;
2810                 int ret, i;
2811                 int level, max_level = ilk_wm_max_level(dev_priv);
2812
2813                 /* read the first set of memory latencies[0:3] */
2814                 val = 0; /* data0 to be programmed to 0 for first set */
2815                 ret = sandybridge_pcode_read(dev_priv,
2816                                              GEN9_PCODE_READ_MEM_LATENCY,
2817                                              &val, NULL);
2818
2819                 if (ret) {
2820                         drm_err(&dev_priv->drm,
2821                                 "SKL Mailbox read error = %d\n", ret);
2822                         return;
2823                 }
2824
2825                 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2826                 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2827                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2828                 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2829                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2830                 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2831                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2832
2833                 /* read the second set of memory latencies[4:7] */
2834                 val = 1; /* data0 to be programmed to 1 for second set */
2835                 ret = sandybridge_pcode_read(dev_priv,
2836                                              GEN9_PCODE_READ_MEM_LATENCY,
2837                                              &val, NULL);
2838                 if (ret) {
2839                         drm_err(&dev_priv->drm,
2840                                 "SKL Mailbox read error = %d\n", ret);
2841                         return;
2842                 }
2843
2844                 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2845                 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2846                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2847                 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2848                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2849                 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2850                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2851
2852                 /*
2853                  * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2854                  * need to be disabled. We make sure to sanitize the values out
2855                  * of the punit to satisfy this requirement.
2856                  */
2857                 for (level = 1; level <= max_level; level++) {
2858                         if (wm[level] == 0) {
2859                                 for (i = level + 1; i <= max_level; i++)
2860                                         wm[i] = 0;
2861                                 break;
2862                         }
2863                 }
2864
2865                 /*
2866                  * WaWmMemoryReadLatency:skl+,glk
2867                  *
2868                  * punit doesn't take into account the read latency so we need
2869                  * to add 2us to the various latency levels we retrieve from the
2870                  * punit when level 0 response data us 0us.
2871                  */
2872                 if (wm[0] == 0) {
2873                         wm[0] += 2;
2874                         for (level = 1; level <= max_level; level++) {
2875                                 if (wm[level] == 0)
2876                                         break;
2877                                 wm[level] += 2;
2878                         }
2879                 }
2880
2881                 /*
2882                  * WA Level-0 adjustment for 16GB DIMMs: SKL+
2883                  * If we could not get dimm info enable this WA to prevent from
2884                  * any underrun. If not able to get Dimm info assume 16GB dimm
2885                  * to avoid any underrun.
2886                  */
2887                 if (dev_priv->dram_info.is_16gb_dimm)
2888                         wm[0] += 1;
2889
2890         } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
2891                 u64 sskpd = intel_uncore_read64(uncore, MCH_SSKPD);
2892
2893                 wm[0] = (sskpd >> 56) & 0xFF;
2894                 if (wm[0] == 0)
2895                         wm[0] = sskpd & 0xF;
2896                 wm[1] = (sskpd >> 4) & 0xFF;
2897                 wm[2] = (sskpd >> 12) & 0xFF;
2898                 wm[3] = (sskpd >> 20) & 0x1FF;
2899                 wm[4] = (sskpd >> 32) & 0x1FF;
2900         } else if (INTEL_GEN(dev_priv) >= 6) {
2901                 u32 sskpd = intel_uncore_read(uncore, MCH_SSKPD);
2902
2903                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2904                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2905                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2906                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2907         } else if (INTEL_GEN(dev_priv) >= 5) {
2908                 u32 mltr = intel_uncore_read(uncore, MLTR_ILK);
2909
2910                 /* ILK primary LP0 latency is 700 ns */
2911                 wm[0] = 7;
2912                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2913                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2914         } else {
2915                 MISSING_CASE(INTEL_DEVID(dev_priv));
2916         }
2917 }
2918
2919 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
2920                                        u16 wm[5])
2921 {
2922         /* ILK sprite LP0 latency is 1300 ns */
2923         if (IS_GEN(dev_priv, 5))
2924                 wm[0] = 13;
2925 }
2926
2927 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
2928                                        u16 wm[5])
2929 {
2930         /* ILK cursor LP0 latency is 1300 ns */
2931         if (IS_GEN(dev_priv, 5))
2932                 wm[0] = 13;
2933 }
2934
2935 int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
2936 {
2937         /* how many WM levels are we expecting */
2938         if (INTEL_GEN(dev_priv) >= 9)
2939                 return 7;
2940         else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2941                 return 4;
2942         else if (INTEL_GEN(dev_priv) >= 6)
2943                 return 3;
2944         else
2945                 return 2;
2946 }
2947
2948 static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
2949                                    const char *name,
2950                                    const u16 wm[8])
2951 {
2952         int level, max_level = ilk_wm_max_level(dev_priv);
2953
2954         for (level = 0; level <= max_level; level++) {
2955                 unsigned int latency = wm[level];
2956
2957                 if (latency == 0) {
2958                         drm_dbg_kms(&dev_priv->drm,
2959                                     "%s WM%d latency not provided\n",
2960                                     name, level);
2961                         continue;
2962                 }
2963
2964                 /*
2965                  * - latencies are in us on gen9.
2966                  * - before then, WM1+ latency values are in 0.5us units
2967                  */
2968                 if (INTEL_GEN(dev_priv) >= 9)
2969                         latency *= 10;
2970                 else if (level > 0)
2971                         latency *= 5;
2972
2973                 drm_dbg_kms(&dev_priv->drm,
2974                             "%s WM%d latency %u (%u.%u usec)\n", name, level,
2975                             wm[level], latency / 10, latency % 10);
2976         }
2977 }
2978
2979 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2980                                     u16 wm[5], u16 min)
2981 {
2982         int level, max_level = ilk_wm_max_level(dev_priv);
2983
2984         if (wm[0] >= min)
2985                 return false;
2986
2987         wm[0] = max(wm[0], min);
2988         for (level = 1; level <= max_level; level++)
2989                 wm[level] = max_t(u16, wm[level], DIV_ROUND_UP(min, 5));
2990
2991         return true;
2992 }
2993
2994 static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
2995 {
2996         bool changed;
2997
2998         /*
2999          * The BIOS provided WM memory latency values are often
3000          * inadequate for high resolution displays. Adjust them.
3001          */
3002         changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
3003                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
3004                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
3005
3006         if (!changed)
3007                 return;
3008
3009         drm_dbg_kms(&dev_priv->drm,
3010                     "WM latency values increased to avoid potential underruns\n");
3011         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3012         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3013         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3014 }
3015
3016 static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
3017 {
3018         /*
3019          * On some SNB machines (Thinkpad X220 Tablet at least)
3020          * LP3 usage can cause vblank interrupts to be lost.
3021          * The DEIIR bit will go high but it looks like the CPU
3022          * never gets interrupted.
3023          *
3024          * It's not clear whether other interrupt source could
3025          * be affected or if this is somehow limited to vblank
3026          * interrupts only. To play it safe we disable LP3
3027          * watermarks entirely.
3028          */
3029         if (dev_priv->wm.pri_latency[3] == 0 &&
3030             dev_priv->wm.spr_latency[3] == 0 &&
3031             dev_priv->wm.cur_latency[3] == 0)
3032                 return;
3033
3034         dev_priv->wm.pri_latency[3] = 0;
3035         dev_priv->wm.spr_latency[3] = 0;
3036         dev_priv->wm.cur_latency[3] = 0;
3037
3038         drm_dbg_kms(&dev_priv->drm,
3039                     "LP3 watermarks disabled due to potential for lost interrupts\n");
3040         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3041         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3042         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3043 }
3044
3045 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
3046 {
3047         intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
3048
3049         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
3050                sizeof(dev_priv->wm.pri_latency));
3051         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
3052                sizeof(dev_priv->wm.pri_latency));
3053
3054         intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
3055         intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
3056
3057         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3058         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3059         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3060
3061         if (IS_GEN(dev_priv, 6)) {
3062                 snb_wm_latency_quirk(dev_priv);
3063                 snb_wm_lp3_irq_quirk(dev_priv);
3064         }
3065 }
3066
3067 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
3068 {
3069         intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
3070         intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
3071 }
3072
3073 static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv,
3074                                  struct intel_pipe_wm *pipe_wm)
3075 {
3076         /* LP0 watermark maximums depend on this pipe alone */
3077         const struct intel_wm_config config = {
3078                 .num_pipes_active = 1,
3079                 .sprites_enabled = pipe_wm->sprites_enabled,
3080                 .sprites_scaled = pipe_wm->sprites_scaled,
3081         };
3082         struct ilk_wm_maximums max;
3083
3084         /* LP0 watermarks always use 1/2 DDB partitioning */
3085         ilk_compute_wm_maximums(dev_priv, 0, &config, INTEL_DDB_PART_1_2, &max);
3086
3087         /* At least LP0 must be valid */
3088         if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
3089                 drm_dbg_kms(&dev_priv->drm, "LP0 watermark invalid\n");
3090                 return false;
3091         }
3092
3093         return true;
3094 }
3095
3096 /* Compute new watermarks for the pipe */
3097 static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state)
3098 {
3099         struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
3100         struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
3101         struct intel_pipe_wm *pipe_wm;
3102         struct intel_plane *plane;
3103         const struct intel_plane_state *plane_state;
3104         const struct intel_plane_state *pristate = NULL;
3105         const struct intel_plane_state *sprstate = NULL;
3106         const struct intel_plane_state *curstate = NULL;
3107         int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
3108         struct ilk_wm_maximums max;
3109
3110         pipe_wm = &crtc_state->wm.ilk.optimal;
3111
3112         intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
3113                 if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
3114                         pristate = plane_state;
3115                 else if (plane->base.type == DRM_PLANE_TYPE_OVERLAY)
3116                         sprstate = plane_state;
3117                 else if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
3118                         curstate = plane_state;
3119         }
3120
3121         pipe_wm->pipe_enabled = crtc_state->hw.active;
3122         if (sprstate) {
3123                 pipe_wm->sprites_enabled = sprstate->uapi.visible;
3124                 pipe_wm->sprites_scaled = sprstate->uapi.visible &&
3125                         (drm_rect_width(&sprstate->uapi.dst) != drm_rect_width(&sprstate->uapi.src) >> 16 ||
3126                          drm_rect_height(&sprstate->uapi.dst) != drm_rect_height(&sprstate->uapi.src) >> 16);
3127         }
3128
3129         usable_level = max_level;
3130
3131         /* ILK/SNB: LP2+ watermarks only w/o sprites */
3132         if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
3133                 usable_level = 1;
3134
3135         /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
3136         if (pipe_wm->sprites_scaled)
3137                 usable_level = 0;
3138
3139         memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
3140         ilk_compute_wm_level(dev_priv, crtc, 0, crtc_state,
3141                              pristate, sprstate, curstate, &pipe_wm->wm[0]);
3142
3143         if (!ilk_validate_pipe_wm(dev_priv, pipe_wm))
3144                 return -EINVAL;
3145
3146         ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
3147
3148         for (level = 1; level <= usable_level; level++) {
3149                 struct intel_wm_level *wm = &pipe_wm->wm[level];
3150
3151                 ilk_compute_wm_level(dev_priv, crtc, level, crtc_state,
3152                                      pristate, sprstate, curstate, wm);
3153
3154                 /*
3155                  * Disable any watermark level that exceeds the
3156                  * register maximums since such watermarks are
3157                  * always invalid.
3158                  */
3159                 if (!ilk_validate_wm_level(level, &max, wm)) {
3160                         memset(wm, 0, sizeof(*wm));
3161                         break;
3162                 }
3163         }
3164
3165         return 0;
3166 }
3167
3168 /*
3169  * Build a set of 'intermediate' watermark values that satisfy both the old
3170  * state and the new state.  These can be programmed to the hardware
3171  * immediately.
3172  */
3173 static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate)
3174 {
3175         struct intel_crtc *intel_crtc = to_intel_crtc(newstate->uapi.crtc);
3176         struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
3177         struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
3178         struct intel_atomic_state *intel_state =
3179                 to_intel_atomic_state(newstate->uapi.state);
3180         const struct intel_crtc_state *oldstate =
3181                 intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
3182         const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
3183         int level, max_level = ilk_wm_max_level(dev_priv);
3184
3185         /*
3186          * Start with the final, target watermarks, then combine with the
3187          * currently active watermarks to get values that are safe both before
3188          * and after the vblank.
3189          */
3190         *a = newstate->wm.ilk.optimal;
3191         if (!newstate->hw.active || drm_atomic_crtc_needs_modeset(&newstate->uapi) ||
3192             intel_state->skip_intermediate_wm)
3193                 return 0;
3194
3195         a->pipe_enabled |= b->pipe_enabled;
3196         a->sprites_enabled |= b->sprites_enabled;
3197         a->sprites_scaled |= b->sprites_scaled;
3198
3199         for (level = 0; level <= max_level; level++) {
3200                 struct intel_wm_level *a_wm = &a->wm[level];
3201                 const struct intel_wm_level *b_wm = &b->wm[level];
3202
3203                 a_wm->enable &= b_wm->enable;
3204                 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
3205                 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
3206                 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
3207                 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
3208         }
3209
3210         /*
3211          * We need to make sure that these merged watermark values are
3212          * actually a valid configuration themselves.  If they're not,
3213          * there's no safe way to transition from the old state to
3214          * the new state, so we need to fail the atomic transaction.
3215          */
3216         if (!ilk_validate_pipe_wm(dev_priv, a))
3217                 return -EINVAL;
3218
3219         /*
3220          * If our intermediate WM are identical to the final WM, then we can
3221          * omit the post-vblank programming; only update if it's different.
3222          */
3223         if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
3224                 newstate->wm.need_postvbl_update = true;
3225
3226         return 0;
3227 }
3228
3229 /*
3230  * Merge the watermarks from all active pipes for a specific level.
3231  */
3232 static void ilk_merge_wm_level(struct drm_i915_private *dev_priv,
3233                                int level,
3234                                struct intel_wm_level *ret_wm)
3235 {
3236         const struct intel_crtc *intel_crtc;
3237
3238         ret_wm->enable = true;
3239
3240         for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
3241                 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
3242                 const struct intel_wm_level *wm = &active->wm[level];
3243
3244                 if (!active->pipe_enabled)
3245                         continue;
3246
3247                 /*
3248                  * The watermark values may have been used in the past,
3249                  * so we must maintain them in the registers for some
3250                  * time even if the level is now disabled.
3251                  */
3252                 if (!wm->enable)
3253                         ret_wm->enable = false;
3254
3255                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
3256                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
3257                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
3258                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
3259         }
3260 }
3261
3262 /*
3263  * Merge all low power watermarks for all active pipes.
3264  */
3265 static void ilk_wm_merge(struct drm_i915_private *dev_priv,
3266                          const struct intel_wm_config *config,
3267                          const struct ilk_wm_maximums *max,
3268                          struct intel_pipe_wm *merged)
3269 {
3270         int level, max_level = ilk_wm_max_level(dev_priv);
3271         int last_enabled_level = max_level;
3272
3273         /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
3274         if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
3275             config->num_pipes_active > 1)
3276                 last_enabled_level = 0;
3277
3278         /* ILK: FBC WM must be disabled always */
3279         merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
3280
3281         /* merge each WM1+ level */
3282         for (level = 1; level <= max_level; level++) {
3283                 struct intel_wm_level *wm = &merged->wm[level];
3284
3285                 ilk_merge_wm_level(dev_priv, level, wm);
3286
3287                 if (level > last_enabled_level)
3288                         wm->enable = false;
3289                 else if (!ilk_validate_wm_level(level, max, wm))
3290                         /* make sure all following levels get disabled */
3291                         last_enabled_level = level - 1;
3292
3293                 /*
3294                  * The spec says it is preferred to disable
3295                  * FBC WMs instead of disabling a WM level.
3296                  */
3297                 if (wm->fbc_val > max->fbc) {
3298                         if (wm->enable)
3299                                 merged->fbc_wm_enabled = false;
3300                         wm->fbc_val = 0;
3301                 }
3302         }
3303
3304         /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3305         /*
3306          * FIXME this is racy. FBC might get enabled later.
3307          * What we should check here is whether FBC can be
3308          * enabled sometime later.
3309          */
3310         if (IS_GEN(dev_priv, 5) && !merged->fbc_wm_enabled &&
3311             intel_fbc_is_active(dev_priv)) {
3312                 for (level = 2; level <= max_level; level++) {
3313                         struct intel_wm_level *wm = &merged->wm[level];
3314
3315                         wm->enable = false;
3316                 }
3317         }
3318 }
3319
3320 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
3321 {
3322         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3323         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
3324 }
3325
3326 /* The value we need to program into the WM_LPx latency field */
3327 static unsigned int ilk_wm_lp_latency(struct drm_i915_private *dev_priv,
3328                                       int level)
3329 {
3330         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3331                 return 2 * level;
3332         else
3333                 return dev_priv->wm.pri_latency[level];
3334 }
3335
3336 static void ilk_compute_wm_results(struct drm_i915_private *dev_priv,
3337                                    const struct intel_pipe_wm *merged,
3338                                    enum intel_ddb_partitioning partitioning,
3339                                    struct ilk_wm_values *results)
3340 {
3341         struct intel_crtc *intel_crtc;
3342         int level, wm_lp;
3343
3344         results->enable_fbc_wm = merged->fbc_wm_enabled;
3345         results->partitioning = partitioning;
3346
3347         /* LP1+ register values */
3348         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3349                 const struct intel_wm_level *r;
3350
3351                 level = ilk_wm_lp_to_level(wm_lp, merged);
3352
3353                 r = &merged->wm[level];
3354
3355                 /*
3356                  * Maintain the watermark values even if the level is
3357                  * disabled. Doing otherwise could cause underruns.
3358                  */
3359                 results->wm_lp[wm_lp - 1] =
3360                         (ilk_wm_lp_latency(dev_priv, level) << WM1_LP_LATENCY_SHIFT) |
3361                         (r->pri_val << WM1_LP_SR_SHIFT) |
3362                         r->cur_val;
3363
3364                 if (r->enable)
3365                         results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
3366
3367                 if (INTEL_GEN(dev_priv) >= 8)
3368                         results->wm_lp[wm_lp - 1] |=
3369                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
3370                 else
3371                         results->wm_lp[wm_lp - 1] |=
3372                                 r->fbc_val << WM1_LP_FBC_SHIFT;
3373
3374                 /*
3375                  * Always set WM1S_LP_EN when spr_val != 0, even if the
3376                  * level is disabled. Doing otherwise could cause underruns.
3377                  */
3378                 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
3379                         drm_WARN_ON(&dev_priv->drm, wm_lp != 1);
3380                         results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
3381                 } else
3382                         results->wm_lp_spr[wm_lp - 1] = r->spr_val;
3383         }
3384
3385         /* LP0 register values */
3386         for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
3387                 enum pipe pipe = intel_crtc->pipe;
3388                 const struct intel_pipe_wm *pipe_wm = &intel_crtc->wm.active.ilk;
3389                 const struct intel_wm_level *r = &pipe_wm->wm[0];
3390
3391                 if (drm_WARN_ON(&dev_priv->drm, !r->enable))
3392                         continue;
3393
3394                 results->wm_pipe[pipe] =
3395                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
3396                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
3397                         r->cur_val;
3398         }
3399 }
3400
3401 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
3402  * case both are at the same level. Prefer r1 in case they're the same. */
3403 static struct intel_pipe_wm *
3404 ilk_find_best_result(struct drm_i915_private *dev_priv,
3405                      struct intel_pipe_wm *r1,
3406                      struct intel_pipe_wm *r2)
3407 {
3408         int level, max_level = ilk_wm_max_level(dev_priv);
3409         int level1 = 0, level2 = 0;
3410
3411         for (level = 1; level <= max_level; level++) {
3412                 if (r1->wm[level].enable)
3413                         level1 = level;
3414                 if (r2->wm[level].enable)
3415                         level2 = level;
3416         }
3417
3418         if (level1 == level2) {
3419                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
3420                         return r2;
3421                 else
3422                         return r1;
3423         } else if (level1 > level2) {
3424                 return r1;
3425         } else {
3426                 return r2;
3427         }
3428 }
3429
3430 /* dirty bits used to track which watermarks need changes */
3431 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3432 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3433 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3434 #define WM_DIRTY_FBC (1 << 24)
3435 #define WM_DIRTY_DDB (1 << 25)
3436
3437 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
3438                                          const struct ilk_wm_values *old,
3439                                          const struct ilk_wm_values *new)
3440 {
3441         unsigned int dirty = 0;
3442         enum pipe pipe;
3443         int wm_lp;
3444
3445         for_each_pipe(dev_priv, pipe) {
3446                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3447                         dirty |= WM_DIRTY_PIPE(pipe);
3448                         /* Must disable LP1+ watermarks too */
3449                         dirty |= WM_DIRTY_LP_ALL;
3450                 }
3451         }
3452
3453         if (old->enable_fbc_wm != new->enable_fbc_wm) {
3454                 dirty |= WM_DIRTY_FBC;
3455                 /* Must disable LP1+ watermarks too */
3456                 dirty |= WM_DIRTY_LP_ALL;
3457         }
3458
3459         if (old->partitioning != new->partitioning) {
3460                 dirty |= WM_DIRTY_DDB;
3461                 /* Must disable LP1+ watermarks too */
3462                 dirty |= WM_DIRTY_LP_ALL;
3463         }
3464
3465         /* LP1+ watermarks already deemed dirty, no need to continue */
3466         if (dirty & WM_DIRTY_LP_ALL)
3467                 return dirty;
3468
3469         /* Find the lowest numbered LP1+ watermark in need of an update... */
3470         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3471                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3472                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3473                         break;
3474         }
3475
3476         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3477         for (; wm_lp <= 3; wm_lp++)
3478                 dirty |= WM_DIRTY_LP(wm_lp);
3479
3480         return dirty;
3481 }
3482
3483 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3484                                unsigned int dirty)
3485 {
3486         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3487         bool changed = false;
3488
3489         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3490                 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3491                 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
3492                 changed = true;
3493         }
3494         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3495                 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3496                 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
3497                 changed = true;
3498         }
3499         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3500                 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3501                 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
3502                 changed = true;
3503         }
3504
3505         /*
3506          * Don't touch WM1S_LP_EN here.
3507          * Doing so could cause underruns.
3508          */
3509
3510         return changed;
3511 }
3512
3513 /*
3514  * The spec says we shouldn't write when we don't need, because every write
3515  * causes WMs to be re-evaluated, expending some power.
3516  */
3517 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3518                                 struct ilk_wm_values *results)
3519 {
3520         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3521         unsigned int dirty;
3522         u32 val;
3523
3524         dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
3525         if (!dirty)
3526                 return;
3527
3528         _ilk_disable_lp_wm(dev_priv, dirty);
3529
3530         if (dirty & WM_DIRTY_PIPE(PIPE_A))
3531                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
3532         if (dirty & WM_DIRTY_PIPE(PIPE_B))
3533                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
3534         if (dirty & WM_DIRTY_PIPE(PIPE_C))
3535                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3536
3537         if (dirty & WM_DIRTY_DDB) {
3538                 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
3539                         val = I915_READ(WM_MISC);
3540                         if (results->partitioning == INTEL_DDB_PART_1_2)
3541                                 val &= ~WM_MISC_DATA_PARTITION_5_6;
3542                         else
3543                                 val |= WM_MISC_DATA_PARTITION_5_6;
3544                         I915_WRITE(WM_MISC, val);
3545                 } else {
3546                         val = I915_READ(DISP_ARB_CTL2);
3547                         if (results->partitioning == INTEL_DDB_PART_1_2)
3548                                 val &= ~DISP_DATA_PARTITION_5_6;
3549                         else
3550                                 val |= DISP_DATA_PARTITION_5_6;
3551                         I915_WRITE(DISP_ARB_CTL2, val);
3552                 }
3553         }
3554
3555         if (dirty & WM_DIRTY_FBC) {
3556                 val = I915_READ(DISP_ARB_CTL);
3557                 if (results->enable_fbc_wm)
3558                         val &= ~DISP_FBC_WM_DIS;
3559                 else
3560                         val |= DISP_FBC_WM_DIS;
3561                 I915_WRITE(DISP_ARB_CTL, val);
3562         }
3563
3564         if (dirty & WM_DIRTY_LP(1) &&
3565             previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3566                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3567
3568         if (INTEL_GEN(dev_priv) >= 7) {
3569                 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3570                         I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3571                 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3572                         I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3573         }
3574
3575         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
3576                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
3577         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
3578                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
3579         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
3580                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
3581
3582         dev_priv->wm.hw = *results;
3583 }
3584
3585 bool ilk_disable_lp_wm(struct drm_i915_private *dev_priv)
3586 {
3587         return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3588 }
3589
3590 u8 intel_enabled_dbuf_slices_mask(struct drm_i915_private *dev_priv)
3591 {
3592         int i;
3593         int max_slices = INTEL_INFO(dev_priv)->num_supported_dbuf_slices;
3594         u8 enabled_slices_mask = 0;
3595
3596         for (i = 0; i < max_slices; i++) {
3597                 if (I915_READ(DBUF_CTL_S(i)) & DBUF_POWER_STATE)
3598                         enabled_slices_mask |= BIT(i);
3599         }
3600
3601         return enabled_slices_mask;
3602 }
3603
3604 /*
3605  * FIXME: We still don't have the proper code detect if we need to apply the WA,
3606  * so assume we'll always need it in order to avoid underruns.
3607  */
3608 static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)
3609 {
3610         return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv);
3611 }
3612
3613 static bool
3614 intel_has_sagv(struct drm_i915_private *dev_priv)
3615 {
3616         /* HACK! */
3617         if (IS_GEN(dev_priv, 12))
3618                 return false;
3619
3620         return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) &&
3621                 dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED;
3622 }
3623
3624 static void
3625 skl_setup_sagv_block_time(struct drm_i915_private *dev_priv)
3626 {
3627         if (INTEL_GEN(dev_priv) >= 12) {
3628                 u32 val = 0;
3629                 int ret;
3630
3631                 ret = sandybridge_pcode_read(dev_priv,
3632                                              GEN12_PCODE_READ_SAGV_BLOCK_TIME_US,
3633                                              &val, NULL);
3634                 if (!ret) {
3635                         dev_priv->sagv_block_time_us = val;
3636                         return;
3637                 }
3638
3639                 drm_dbg(&dev_priv->drm, "Couldn't read SAGV block time!\n");
3640         } else if (IS_GEN(dev_priv, 11)) {
3641                 dev_priv->sagv_block_time_us = 10;
3642                 return;
3643         } else if (IS_GEN(dev_priv, 10)) {
3644                 dev_priv->sagv_block_time_us = 20;
3645                 return;
3646         } else if (IS_GEN(dev_priv, 9)) {
3647                 dev_priv->sagv_block_time_us = 30;
3648                 return;
3649         } else {
3650                 MISSING_CASE(INTEL_GEN(dev_priv));
3651         }
3652
3653         /* Default to an unusable block time */
3654         dev_priv->sagv_block_time_us = -1;
3655 }
3656
3657 /*
3658  * SAGV dynamically adjusts the system agent voltage and clock frequencies
3659  * depending on power and performance requirements. The display engine access
3660  * to system memory is blocked during the adjustment time. Because of the
3661  * blocking time, having this enabled can cause full system hangs and/or pipe
3662  * underruns if we don't meet all of the following requirements:
3663  *
3664  *  - <= 1 pipe enabled
3665  *  - All planes can enable watermarks for latencies >= SAGV engine block time
3666  *  - We're not using an interlaced display configuration
3667  */
3668 int
3669 intel_enable_sagv(struct drm_i915_private *dev_priv)
3670 {
3671         int ret;
3672
3673         if (!intel_has_sagv(dev_priv))
3674                 return 0;
3675
3676         if (dev_priv->sagv_status == I915_SAGV_ENABLED)
3677                 return 0;
3678
3679         drm_dbg_kms(&dev_priv->drm, "Enabling SAGV\n");
3680         ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3681                                       GEN9_SAGV_ENABLE);
3682
3683         /* We don't need to wait for SAGV when enabling */
3684
3685         /*
3686          * Some skl systems, pre-release machines in particular,
3687          * don't actually have SAGV.
3688          */
3689         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3690                 drm_dbg(&dev_priv->drm, "No SAGV found on system, ignoring\n");
3691                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3692                 return 0;
3693         } else if (ret < 0) {
3694                 drm_err(&dev_priv->drm, "Failed to enable SAGV\n");
3695                 return ret;
3696         }
3697
3698         dev_priv->sagv_status = I915_SAGV_ENABLED;
3699         return 0;
3700 }
3701
3702 int
3703 intel_disable_sagv(struct drm_i915_private *dev_priv)
3704 {
3705         int ret;
3706
3707         if (!intel_has_sagv(dev_priv))
3708                 return 0;
3709
3710         if (dev_priv->sagv_status == I915_SAGV_DISABLED)
3711                 return 0;
3712
3713         drm_dbg_kms(&dev_priv->drm, "Disabling SAGV\n");
3714         /* bspec says to keep retrying for at least 1 ms */
3715         ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3716                                 GEN9_SAGV_DISABLE,
3717                                 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3718                                 1);
3719         /*
3720          * Some skl systems, pre-release machines in particular,
3721          * don't actually have SAGV.
3722          */
3723         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3724                 drm_dbg(&dev_priv->drm, "No SAGV found on system, ignoring\n");
3725                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3726                 return 0;
3727         } else if (ret < 0) {
3728                 drm_err(&dev_priv->drm, "Failed to disable SAGV (%d)\n", ret);
3729                 return ret;
3730         }
3731
3732         dev_priv->sagv_status = I915_SAGV_DISABLED;
3733         return 0;
3734 }
3735
3736 bool intel_can_enable_sagv(struct intel_atomic_state *state)
3737 {
3738         struct drm_device *dev = state->base.dev;
3739         struct drm_i915_private *dev_priv = to_i915(dev);
3740         struct intel_crtc *crtc;
3741         struct intel_plane *plane;
3742         struct intel_crtc_state *crtc_state;
3743         enum pipe pipe;
3744         int level, latency;
3745
3746         if (!intel_has_sagv(dev_priv))
3747                 return false;
3748
3749         /*
3750          * If there are no active CRTCs, no additional checks need be performed
3751          */
3752         if (hweight8(state->active_pipes) == 0)
3753                 return true;
3754
3755         /*
3756          * SKL+ workaround: bspec recommends we disable SAGV when we have
3757          * more then one pipe enabled
3758          */
3759         if (hweight8(state->active_pipes) > 1)
3760                 return false;
3761
3762         /* Since we're now guaranteed to only have one active CRTC... */
3763         pipe = ffs(state->active_pipes) - 1;
3764         crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
3765         crtc_state = to_intel_crtc_state(crtc->base.state);
3766
3767         if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
3768                 return false;
3769
3770         for_each_intel_plane_on_crtc(dev, crtc, plane) {
3771                 struct skl_plane_wm *wm =
3772                         &crtc_state->wm.skl.optimal.planes[plane->id];
3773
3774                 /* Skip this plane if it's not enabled */
3775                 if (!wm->wm[0].plane_en)
3776                         continue;
3777
3778                 /* Find the highest enabled wm level for this plane */
3779                 for (level = ilk_wm_max_level(dev_priv);
3780                      !wm->wm[level].plane_en; --level)
3781                      { }
3782
3783                 latency = dev_priv->wm.skl_latency[level];
3784
3785                 if (skl_needs_memory_bw_wa(dev_priv) &&
3786                     plane->base.state->fb->modifier ==
3787                     I915_FORMAT_MOD_X_TILED)
3788                         latency += 15;
3789
3790                 /*
3791                  * If any of the planes on this pipe don't enable wm levels that
3792                  * incur memory latencies higher than sagv_block_time_us we
3793                  * can't enable SAGV.
3794                  */
3795                 if (latency < dev_priv->sagv_block_time_us)
3796                         return false;
3797         }
3798
3799         return true;
3800 }
3801
3802 /*
3803  * Calculate initial DBuf slice offset, based on slice size
3804  * and mask(i.e if slice size is 1024 and second slice is enabled
3805  * offset would be 1024)
3806  */
3807 static unsigned int
3808 icl_get_first_dbuf_slice_offset(u32 dbuf_slice_mask,
3809                                 u32 slice_size,
3810                                 u32 ddb_size)
3811 {
3812         unsigned int offset = 0;
3813
3814         if (!dbuf_slice_mask)
3815                 return 0;
3816
3817         offset = (ffs(dbuf_slice_mask) - 1) * slice_size;
3818
3819         WARN_ON(offset >= ddb_size);
3820         return offset;
3821 }
3822
3823 static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv)
3824 {
3825         u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3826
3827         drm_WARN_ON(&dev_priv->drm, ddb_size == 0);
3828
3829         if (INTEL_GEN(dev_priv) < 11)
3830                 return ddb_size - 4; /* 4 blocks for bypass path allocation */
3831
3832         return ddb_size;
3833 }
3834
3835 static u8 skl_compute_dbuf_slices(const struct intel_crtc_state *crtc_state,
3836                                   u8 active_pipes);
3837
3838 static void
3839 skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
3840                                    const struct intel_crtc_state *crtc_state,
3841                                    const u64 total_data_rate,
3842                                    struct skl_ddb_entry *alloc, /* out */
3843                                    int *num_active /* out */)
3844 {
3845         struct drm_atomic_state *state = crtc_state->uapi.state;
3846         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3847         struct drm_crtc *for_crtc = crtc_state->uapi.crtc;
3848         const struct intel_crtc *crtc;
3849         u32 pipe_width = 0, total_width_in_range = 0, width_before_pipe_in_range = 0;
3850         enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
3851         u16 ddb_size;
3852         u32 ddb_range_size;
3853         u32 i;
3854         u32 dbuf_slice_mask;
3855         u32 active_pipes;
3856         u32 offset;
3857         u32 slice_size;
3858         u32 total_slice_mask;
3859         u32 start, end;
3860
3861         if (drm_WARN_ON(&dev_priv->drm, !state) || !crtc_state->hw.active) {
3862                 alloc->start = 0;
3863                 alloc->end = 0;
3864                 *num_active = hweight8(dev_priv->active_pipes);
3865                 return;
3866         }
3867
3868         if (intel_state->active_pipe_changes)
3869                 active_pipes = intel_state->active_pipes;
3870         else
3871                 active_pipes = dev_priv->active_pipes;
3872
3873         *num_active = hweight8(active_pipes);
3874
3875         ddb_size = intel_get_ddb_size(dev_priv);
3876
3877         slice_size = ddb_size / INTEL_INFO(dev_priv)->num_supported_dbuf_slices;
3878
3879         /*
3880          * If the state doesn't change the active CRTC's or there is no
3881          * modeset request, then there's no need to recalculate;
3882          * the existing pipe allocation limits should remain unchanged.
3883          * Note that we're safe from racing commits since any racing commit
3884          * that changes the active CRTC list or do modeset would need to
3885          * grab _all_ crtc locks, including the one we currently hold.
3886          */
3887         if (!intel_state->active_pipe_changes && !intel_state->modeset) {
3888                 /*
3889                  * alloc may be cleared by clear_intel_crtc_state,
3890                  * copy from old state to be sure
3891                  */
3892                 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
3893                 return;
3894         }
3895
3896         /*
3897          * Get allowed DBuf slices for correspondent pipe and platform.
3898          */
3899         dbuf_slice_mask = skl_compute_dbuf_slices(crtc_state, active_pipes);
3900
3901         DRM_DEBUG_KMS("DBuf slice mask %x pipe %c active pipes %x\n",
3902                       dbuf_slice_mask,
3903                       pipe_name(for_pipe), active_pipes);
3904
3905         /*
3906          * Figure out at which DBuf slice we start, i.e if we start at Dbuf S2
3907          * and slice size is 1024, the offset would be 1024
3908          */
3909         offset = icl_get_first_dbuf_slice_offset(dbuf_slice_mask,
3910                                                  slice_size, ddb_size);
3911
3912         /*
3913          * Figure out total size of allowed DBuf slices, which is basically
3914          * a number of allowed slices for that pipe multiplied by slice size.
3915          * Inside of this
3916          * range ddb entries are still allocated in proportion to display width.
3917          */
3918         ddb_range_size = hweight8(dbuf_slice_mask) * slice_size;
3919
3920         /*
3921          * Watermark/ddb requirement highly depends upon width of the
3922          * framebuffer, So instead of allocating DDB equally among pipes
3923          * distribute DDB based on resolution/width of the display.
3924          */
3925         total_slice_mask = dbuf_slice_mask;
3926         for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
3927                 const struct drm_display_mode *adjusted_mode =
3928                         &crtc_state->hw.adjusted_mode;
3929                 enum pipe pipe = crtc->pipe;
3930                 int hdisplay, vdisplay;
3931                 u32 pipe_dbuf_slice_mask;
3932
3933                 if (!crtc_state->hw.active)
3934                         continue;
3935
3936                 pipe_dbuf_slice_mask = skl_compute_dbuf_slices(crtc_state,
3937                                                                active_pipes);
3938
3939                 /*
3940                  * According to BSpec pipe can share one dbuf slice with another
3941                  * pipes or pipe can use multiple dbufs, in both cases we
3942                  * account for other pipes only if they have exactly same mask.
3943                  * However we need to account how many slices we should enable
3944                  * in total.
3945                  */
3946                 total_slice_mask |= pipe_dbuf_slice_mask;
3947
3948                 /*
3949                  * Do not account pipes using other slice sets
3950                  * luckily as of current BSpec slice sets do not partially
3951                  * intersect(pipes share either same one slice or same slice set
3952                  * i.e no partial intersection), so it is enough to check for
3953                  * equality for now.
3954                  */
3955                 if (dbuf_slice_mask != pipe_dbuf_slice_mask)
3956                         continue;
3957
3958                 drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
3959
3960                 total_width_in_range += hdisplay;
3961
3962                 if (pipe < for_pipe)
3963                         width_before_pipe_in_range += hdisplay;
3964                 else if (pipe == for_pipe)
3965                         pipe_width = hdisplay;
3966         }
3967
3968         /*
3969          * FIXME: For now we always enable slice S1 as per
3970          * the Bspec display initialization sequence.
3971          */
3972         intel_state->enabled_dbuf_slices_mask = total_slice_mask | BIT(DBUF_S1);
3973
3974         start = ddb_range_size * width_before_pipe_in_range / total_width_in_range;
3975         end = ddb_range_size *
3976                 (width_before_pipe_in_range + pipe_width) / total_width_in_range;
3977
3978         alloc->start = offset + start;
3979         alloc->end = offset + end;
3980
3981         DRM_DEBUG_KMS("Pipe %d ddb %d-%d\n", for_pipe,
3982                       alloc->start, alloc->end);
3983         DRM_DEBUG_KMS("Enabled ddb slices mask %x num supported %d\n",
3984                       intel_state->enabled_dbuf_slices_mask,
3985                       INTEL_INFO(dev_priv)->num_supported_dbuf_slices);
3986 }
3987
3988 static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
3989                                  int width, const struct drm_format_info *format,
3990                                  u64 modifier, unsigned int rotation,
3991                                  u32 plane_pixel_rate, struct skl_wm_params *wp,
3992                                  int color_plane);
3993 static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
3994                                  int level,
3995                                  const struct skl_wm_params *wp,
3996                                  const struct skl_wm_level *result_prev,
3997                                  struct skl_wm_level *result /* out */);
3998
3999 static unsigned int
4000 skl_cursor_allocation(const struct intel_crtc_state *crtc_state,
4001                       int num_active)
4002 {
4003         struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
4004         int level, max_level = ilk_wm_max_level(dev_priv);
4005         struct skl_wm_level wm = {};
4006         int ret, min_ddb_alloc = 0;
4007         struct skl_wm_params wp;
4008
4009         ret = skl_compute_wm_params(crtc_state, 256,
4010                                     drm_format_info(DRM_FORMAT_ARGB8888),
4011                                     DRM_FORMAT_MOD_LINEAR,
4012                                     DRM_MODE_ROTATE_0,
4013                                     crtc_state->pixel_rate, &wp, 0);
4014         drm_WARN_ON(&dev_priv->drm, ret);
4015
4016         for (level = 0; level <= max_level; level++) {
4017                 skl_compute_plane_wm(crtc_state, level, &wp, &wm, &wm);
4018                 if (wm.min_ddb_alloc == U16_MAX)
4019                         break;
4020
4021                 min_ddb_alloc = wm.min_ddb_alloc;
4022         }
4023
4024         return max(num_active == 1 ? 32 : 8, min_ddb_alloc);
4025 }
4026
4027 static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
4028                                        struct skl_ddb_entry *entry, u32 reg)
4029 {
4030
4031         entry->start = reg & DDB_ENTRY_MASK;
4032         entry->end = (reg >> DDB_ENTRY_END_SHIFT) & DDB_ENTRY_MASK;
4033
4034         if (entry->end)
4035                 entry->end += 1;
4036 }
4037
4038 static void
4039 skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
4040                            const enum pipe pipe,
4041                            const enum plane_id plane_id,
4042                            struct skl_ddb_entry *ddb_y,
4043                            struct skl_ddb_entry *ddb_uv)
4044 {
4045         u32 val, val2;
4046         u32 fourcc = 0;
4047
4048         /* Cursor doesn't support NV12/planar, so no extra calculation needed */
4049         if (plane_id == PLANE_CURSOR) {
4050                 val = I915_READ(CUR_BUF_CFG(pipe));
4051                 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
4052                 return;
4053         }
4054
4055         val = I915_READ(PLANE_CTL(pipe, plane_id));
4056
4057         /* No DDB allocated for disabled planes */
4058         if (val & PLANE_CTL_ENABLE)
4059                 fourcc = skl_format_to_fourcc(val & PLANE_CTL_FORMAT_MASK,
4060                                               val & PLANE_CTL_ORDER_RGBX,
4061                                               val & PLANE_CTL_ALPHA_MASK);
4062
4063         if (INTEL_GEN(dev_priv) >= 11) {
4064                 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
4065                 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
4066         } else {
4067                 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
4068                 val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
4069
4070                 if (fourcc &&
4071                     drm_format_info_is_yuv_semiplanar(drm_format_info(fourcc)))
4072                         swap(val, val2);
4073
4074                 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
4075                 skl_ddb_entry_init_from_hw(dev_priv, ddb_uv, val2);
4076         }
4077 }
4078
4079 void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
4080                                struct skl_ddb_entry *ddb_y,
4081                                struct skl_ddb_entry *ddb_uv)
4082 {
4083         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
4084         enum intel_display_power_domain power_domain;
4085         enum pipe pipe = crtc->pipe;
4086         intel_wakeref_t wakeref;
4087         enum plane_id plane_id;
4088
4089         power_domain = POWER_DOMAIN_PIPE(pipe);
4090         wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
4091         if (!wakeref)
4092                 return;
4093
4094         for_each_plane_id_on_crtc(crtc, plane_id)
4095                 skl_ddb_get_hw_plane_state(dev_priv, pipe,
4096                                            plane_id,
4097                                            &ddb_y[plane_id],
4098                                            &ddb_uv[plane_id]);
4099
4100         intel_display_power_put(dev_priv, power_domain, wakeref);
4101 }
4102
4103 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv)
4104 {
4105         dev_priv->enabled_dbuf_slices_mask =
4106                                 intel_enabled_dbuf_slices_mask(dev_priv);
4107 }
4108
4109 /*
4110  * Determines the downscale amount of a plane for the purposes of watermark calculations.
4111  * The bspec defines downscale amount as:
4112  *
4113  * """
4114  * Horizontal down scale amount = maximum[1, Horizontal source size /
4115  *                                           Horizontal destination size]
4116  * Vertical down scale amount = maximum[1, Vertical source size /
4117  *                                         Vertical destination size]
4118  * Total down scale amount = Horizontal down scale amount *
4119  *                           Vertical down scale amount
4120  * """
4121  *
4122  * Return value is provided in 16.16 fixed point form to retain fractional part.
4123  * Caller should take care of dividing & rounding off the value.
4124  */
4125 static uint_fixed_16_16_t
4126 skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state,
4127                            const struct intel_plane_state *plane_state)
4128 {
4129         u32 src_w, src_h, dst_w, dst_h;
4130         uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4131         uint_fixed_16_16_t downscale_h, downscale_w;
4132
4133         if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state)))
4134                 return u32_to_fixed16(0);
4135
4136         /*
4137          * Src coordinates are already rotated by 270 degrees for
4138          * the 90/270 degree plane rotation cases (to match the
4139          * GTT mapping), hence no need to account for rotation here.
4140          *
4141          * n.b., src is 16.16 fixed point, dst is whole integer.
4142          */
4143         src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
4144         src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
4145         dst_w = drm_rect_width(&plane_state->uapi.dst);
4146         dst_h = drm_rect_height(&plane_state->uapi.dst);
4147
4148         fp_w_ratio = div_fixed16(src_w, dst_w);
4149         fp_h_ratio = div_fixed16(src_h, dst_h);
4150         downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4151         downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4152
4153         return mul_fixed16(downscale_w, downscale_h);
4154 }
4155
4156 struct dbuf_slice_conf_entry {
4157         u8 active_pipes;
4158         u8 dbuf_mask[I915_MAX_PIPES];
4159 };
4160
4161 /*
4162  * Table taken from Bspec 12716
4163  * Pipes do have some preferred DBuf slice affinity,
4164  * plus there are some hardcoded requirements on how
4165  * those should be distributed for multipipe scenarios.
4166  * For more DBuf slices algorithm can get even more messy
4167  * and less readable, so decided to use a table almost
4168  * as is from BSpec itself - that way it is at least easier
4169  * to compare, change and check.
4170  */
4171 static const struct dbuf_slice_conf_entry icl_allowed_dbufs[] =
4172 /* Autogenerated with igt/tools/intel_dbuf_map tool: */
4173 {
4174         {
4175                 .active_pipes = BIT(PIPE_A),
4176                 .dbuf_mask = {
4177                         [PIPE_A] = BIT(DBUF_S1),
4178                 },
4179         },
4180         {
4181                 .active_pipes = BIT(PIPE_B),
4182                 .dbuf_mask = {
4183                         [PIPE_B] = BIT(DBUF_S1),
4184                 },
4185         },
4186         {
4187                 .active_pipes = BIT(PIPE_A) | BIT(PIPE_B),
4188                 .dbuf_mask = {
4189                         [PIPE_A] = BIT(DBUF_S1),
4190                         [PIPE_B] = BIT(DBUF_S2),
4191                 },
4192         },
4193         {
4194                 .active_pipes = BIT(PIPE_C),
4195                 .dbuf_mask = {
4196                         [PIPE_C] = BIT(DBUF_S2),
4197                 },
4198         },
4199         {
4200                 .active_pipes = BIT(PIPE_A) | BIT(PIPE_C),
4201                 .dbuf_mask = {
4202                         [PIPE_A] = BIT(DBUF_S1),
4203                         [PIPE_C] = BIT(DBUF_S2),
4204                 },
4205         },
4206         {
4207                 .active_pipes = BIT(PIPE_B) | BIT(PIPE_C),
4208                 .dbuf_mask = {
4209                         [PIPE_B] = BIT(DBUF_S1),
4210                         [PIPE_C] = BIT(DBUF_S2),
4211                 },
4212         },
4213         {
4214                 .active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C),
4215                 .dbuf_mask = {
4216                         [PIPE_A] = BIT(DBUF_S1),
4217                         [PIPE_B] = BIT(DBUF_S1),
4218                         [PIPE_C] = BIT(DBUF_S2),
4219                 },
4220         },
4221         {}
4222 };
4223
4224 /*
4225  * Table taken from Bspec 49255
4226  * Pipes do have some preferred DBuf slice affinity,
4227  * plus there are some hardcoded requirements on how
4228  * those should be distributed for multipipe scenarios.
4229  * For more DBuf slices algorithm can get even more messy
4230  * and less readable, so decided to use a table almost
4231  * as is from BSpec itself - that way it is at least easier
4232  * to compare, change and check.
4233  */
4234 static const struct dbuf_slice_conf_entry tgl_allowed_dbufs[] =
4235 /* Autogenerated with igt/tools/intel_dbuf_map tool: */
4236 {
4237         {
4238                 .active_pipes = BIT(PIPE_A),
4239                 .dbuf_mask = {
4240                         [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2),
4241                 },
4242         },
4243         {
4244                 .active_pipes = BIT(PIPE_B),
4245                 .dbuf_mask = {
4246                         [PIPE_B] = BIT(DBUF_S1) | BIT(DBUF_S2),
4247                 },
4248         },
4249         {
4250                 .active_pipes = BIT(PIPE_A) | BIT(PIPE_B),
4251                 .dbuf_mask = {
4252                         [PIPE_A] = BIT(DBUF_S2),
4253                         [PIPE_B] = BIT(DBUF_S1),
4254                 },
4255         },
4256         {
4257                 .active_pipes = BIT(PIPE_C),
4258                 .dbuf_mask = {
4259                         [PIPE_C] = BIT(DBUF_S2) | BIT(DBUF_S1),
4260                 },
4261         },
4262         {
4263                 .active_pipes = BIT(PIPE_A) | BIT(PIPE_C),
4264                 .dbuf_mask = {
4265                         [PIPE_A] = BIT(DBUF_S1),
4266                         [PIPE_C] = BIT(DBUF_S2),
4267                 },
4268         },
4269         {
4270                 .active_pipes = BIT(PIPE_B) | BIT(PIPE_C),
4271                 .dbuf_mask = {
4272                         [PIPE_B] = BIT(DBUF_S1),
4273                         [PIPE_C] = BIT(DBUF_S2),
4274                 },
4275         },
4276         {
4277                 .active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C),
4278                 .dbuf_mask = {
4279                         [PIPE_A] = BIT(DBUF_S1),
4280                         [PIPE_B] = BIT(DBUF_S1),
4281                         [PIPE_C] = BIT(DBUF_S2),
4282                 },
4283         },
4284         {
4285                 .active_pipes = BIT(PIPE_D),
4286                 .dbuf_mask = {
4287                         [PIPE_D] = BIT(DBUF_S2) | BIT(DBUF_S1),
4288                 },
4289         },
4290         {
4291                 .active_pipes = BIT(PIPE_A) | BIT(PIPE_D),
4292                 .dbuf_mask = {
4293                         [PIPE_A] = BIT(DBUF_S1),
4294                         [PIPE_D] = BIT(DBUF_S2),
4295                 },
4296         },
4297         {
4298                 .active_pipes = BIT(PIPE_B) | BIT(PIPE_D),
4299                 .dbuf_mask = {
4300                         [PIPE_B] = BIT(DBUF_S1),
4301                         [PIPE_D] = BIT(DBUF_S2),
4302                 },
4303         },
4304         {
4305                 .active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_D),
4306                 .dbuf_mask = {
4307                         [PIPE_A] = BIT(DBUF_S1),
4308                         [PIPE_B] = BIT(DBUF_S1),
4309                         [PIPE_D] = BIT(DBUF_S2),
4310                 },
4311         },
4312         {
4313                 .active_pipes = BIT(PIPE_C) | BIT(PIPE_D),
4314                 .dbuf_mask = {
4315                         [PIPE_C] = BIT(DBUF_S1),
4316                         [PIPE_D] = BIT(DBUF_S2),
4317                 },
4318         },
4319         {
4320                 .active_pipes = BIT(PIPE_A) | BIT(PIPE_C) | BIT(PIPE_D),
4321                 .dbuf_mask = {
4322                         [PIPE_A] = BIT(DBUF_S1),
4323                         [PIPE_C] = BIT(DBUF_S2),
4324                         [PIPE_D] = BIT(DBUF_S2),
4325                 },
4326         },
4327         {
4328                 .active_pipes = BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D),
4329                 .dbuf_mask = {
4330                         [PIPE_B] = BIT(DBUF_S1),
4331                         [PIPE_C] = BIT(DBUF_S2),
4332                         [PIPE_D] = BIT(DBUF_S2),
4333                 },
4334         },
4335         {
4336                 .active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D),
4337                 .dbuf_mask = {
4338                         [PIPE_A] = BIT(DBUF_S1),
4339                         [PIPE_B] = BIT(DBUF_S1),
4340                         [PIPE_C] = BIT(DBUF_S2),
4341                         [PIPE_D] = BIT(DBUF_S2),
4342                 },
4343         },
4344         {}
4345 };
4346
4347 static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes,
4348                               const struct dbuf_slice_conf_entry *dbuf_slices)
4349 {
4350         int i;
4351
4352         for (i = 0; i < dbuf_slices[i].active_pipes; i++) {
4353                 if (dbuf_slices[i].active_pipes == active_pipes)
4354                         return dbuf_slices[i].dbuf_mask[pipe];
4355         }
4356         return 0;
4357 }
4358
4359 /*
4360  * This function finds an entry with same enabled pipe configuration and
4361  * returns correspondent DBuf slice mask as stated in BSpec for particular
4362  * platform.
4363  */
4364 static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes)
4365 {
4366         /*
4367          * FIXME: For ICL this is still a bit unclear as prev BSpec revision
4368          * required calculating "pipe ratio" in order to determine
4369          * if one or two slices can be used for single pipe configurations
4370          * as additional constraint to the existing table.
4371          * However based on recent info, it should be not "pipe ratio"
4372          * but rather ratio between pixel_rate and cdclk with additional
4373          * constants, so for now we are using only table until this is
4374          * clarified. Also this is the reason why crtc_state param is
4375          * still here - we will need it once those additional constraints
4376          * pop up.
4377          */
4378         return compute_dbuf_slices(pipe, active_pipes, icl_allowed_dbufs);
4379 }
4380
4381 static u8 tgl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes)
4382 {
4383         return compute_dbuf_slices(pipe, active_pipes, tgl_allowed_dbufs);
4384 }
4385
4386 static u8 skl_compute_dbuf_slices(const struct intel_crtc_state *crtc_state,
4387                                   u8 active_pipes)
4388 {
4389         struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
4390         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
4391         enum pipe pipe = crtc->pipe;
4392
4393         if (IS_GEN(dev_priv, 12))
4394                 return tgl_compute_dbuf_slices(pipe, active_pipes);
4395         else if (IS_GEN(dev_priv, 11))
4396                 return icl_compute_dbuf_slices(pipe, active_pipes);
4397         /*
4398          * For anything else just return one slice yet.
4399          * Should be extended for other platforms.
4400          */
4401         return BIT(DBUF_S1);
4402 }
4403
4404 static u64
4405 skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state,
4406                              const struct intel_plane_state *plane_state,
4407                              int color_plane)
4408 {
4409         struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
4410         const struct drm_framebuffer *fb = plane_state->hw.fb;
4411         u32 data_rate;
4412         u32 width = 0, height = 0;
4413         uint_fixed_16_16_t down_scale_amount;
4414         u64 rate;
4415
4416         if (!plane_state->uapi.visible)
4417                 return 0;
4418
4419         if (plane->id == PLANE_CURSOR)
4420                 return 0;
4421
4422         if (color_plane == 1 &&
4423             !intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier))
4424                 return 0;
4425
4426         /*
4427          * Src coordinates are already rotated by 270 degrees for
4428          * the 90/270 degree plane rotation cases (to match the
4429          * GTT mapping), hence no need to account for rotation here.
4430          */
4431         width = drm_rect_width(&plane_state->uapi.src) >> 16;
4432         height = drm_rect_height(&plane_state->uapi.src) >> 16;
4433
4434         /* UV plane does 1/2 pixel sub-sampling */
4435         if (color_plane == 1) {
4436                 width /= 2;
4437                 height /= 2;
4438         }
4439
4440         data_rate = width * height;
4441
4442         down_scale_amount = skl_plane_downscale_amount(crtc_state, plane_state);
4443
4444         rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount);
4445
4446         rate *= fb->format->cpp[color_plane];
4447         return rate;
4448 }
4449
4450 static u64
4451 skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
4452                                  u64 *plane_data_rate,
4453                                  u64 *uv_plane_data_rate)
4454 {
4455         struct intel_plane *plane;
4456         const struct intel_plane_state *plane_state;
4457         u64 total_data_rate = 0;
4458
4459         /* Calculate and cache data rate for each plane */
4460         intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
4461                 enum plane_id plane_id = plane->id;
4462                 u64 rate;
4463
4464                 /* packed/y */
4465                 rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0);
4466                 plane_data_rate[plane_id] = rate;
4467                 total_data_rate += rate;
4468
4469                 /* uv-plane */
4470                 rate = skl_plane_relative_data_rate(crtc_state, plane_state, 1);
4471                 uv_plane_data_rate[plane_id] = rate;
4472                 total_data_rate += rate;
4473         }
4474
4475         return total_data_rate;
4476 }
4477
4478 static u64
4479 icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
4480                                  u64 *plane_data_rate)
4481 {
4482         struct intel_plane *plane;
4483         const struct intel_plane_state *plane_state;
4484         u64 total_data_rate = 0;
4485
4486         /* Calculate and cache data rate for each plane */
4487         intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
4488                 enum plane_id plane_id = plane->id;
4489                 u64 rate;
4490
4491                 if (!plane_state->planar_linked_plane) {
4492                         rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0);
4493                         plane_data_rate[plane_id] = rate;
4494                         total_data_rate += rate;
4495                 } else {
4496                         enum plane_id y_plane_id;
4497
4498                         /*
4499                          * The slave plane might not iterate in
4500                          * intel_atomic_crtc_state_for_each_plane_state(),
4501                          * and needs the master plane state which may be
4502                          * NULL if we try get_new_plane_state(), so we
4503                          * always calculate from the master.
4504                          */
4505                         if (plane_state->planar_slave)
4506                                 continue;
4507
4508                         /* Y plane rate is calculated on the slave */
4509                         rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0);
4510                         y_plane_id = plane_state->planar_linked_plane->id;
4511                         plane_data_rate[y_plane_id] = rate;
4512                         total_data_rate += rate;
4513
4514                         rate = skl_plane_relative_data_rate(crtc_state, plane_state, 1);
4515                         plane_data_rate[plane_id] = rate;
4516                         total_data_rate += rate;
4517                 }
4518         }
4519
4520         return total_data_rate;
4521 }
4522
4523 static int
4524 skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state)
4525 {
4526         struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
4527         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
4528         struct skl_ddb_entry *alloc = &crtc_state->wm.skl.ddb;
4529         u16 alloc_size, start = 0;
4530         u16 total[I915_MAX_PLANES] = {};
4531         u16 uv_total[I915_MAX_PLANES] = {};
4532         u64 total_data_rate;
4533         enum plane_id plane_id;
4534         int num_active;
4535         u64 plane_data_rate[I915_MAX_PLANES] = {};
4536         u64 uv_plane_data_rate[I915_MAX_PLANES] = {};
4537         u32 blocks;
4538         int level;
4539
4540         /* Clear the partitioning for disabled planes. */
4541         memset(crtc_state->wm.skl.plane_ddb_y, 0, sizeof(crtc_state->wm.skl.plane_ddb_y));
4542         memset(crtc_state->wm.skl.plane_ddb_uv, 0, sizeof(crtc_state->wm.skl.plane_ddb_uv));
4543
4544         if (!crtc_state->hw.active) {
4545                 alloc->start = alloc->end = 0;
4546                 return 0;
4547         }
4548
4549         if (INTEL_GEN(dev_priv) >= 11)
4550                 total_data_rate =
4551                         icl_get_total_relative_data_rate(crtc_state,
4552                                                          plane_data_rate);
4553         else
4554                 total_data_rate =
4555                         skl_get_total_relative_data_rate(crtc_state,
4556                                                          plane_data_rate,
4557                                                          uv_plane_data_rate);
4558
4559
4560         skl_ddb_get_pipe_allocation_limits(dev_priv, crtc_state, total_data_rate,
4561                                            alloc, &num_active);
4562         alloc_size = skl_ddb_entry_size(alloc);
4563         if (alloc_size == 0)
4564                 return 0;
4565
4566         /* Allocate fixed number of blocks for cursor. */
4567         total[PLANE_CURSOR] = skl_cursor_allocation(crtc_state, num_active);
4568         alloc_size -= total[PLANE_CURSOR];
4569         crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR].start =
4570                 alloc->end - total[PLANE_CURSOR];
4571         crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end;
4572
4573         if (total_data_rate == 0)
4574                 return 0;
4575
4576         /*
4577          * Find the highest watermark level for which we can satisfy the block
4578          * requirement of active planes.
4579          */
4580         for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) {
4581                 blocks = 0;
4582                 for_each_plane_id_on_crtc(crtc, plane_id) {
4583                         const struct skl_plane_wm *wm =
4584                                 &crtc_state->wm.skl.optimal.planes[plane_id];
4585
4586                         if (plane_id == PLANE_CURSOR) {
4587                                 if (wm->wm[level].min_ddb_alloc > total[PLANE_CURSOR]) {
4588                                         drm_WARN_ON(&dev_priv->drm,
4589                                                     wm->wm[level].min_ddb_alloc != U16_MAX);
4590                                         blocks = U32_MAX;
4591                                         break;
4592                                 }
4593                                 continue;
4594                         }
4595
4596                         blocks += wm->wm[level].min_ddb_alloc;
4597                         blocks += wm->uv_wm[level].min_ddb_alloc;
4598                 }
4599
4600                 if (blocks <= alloc_size) {
4601                         alloc_size -= blocks;
4602                         break;
4603                 }
4604         }
4605
4606         if (level < 0) {
4607                 drm_dbg_kms(&dev_priv->drm,
4608                             "Requested display configuration exceeds system DDB limitations");
4609                 drm_dbg_kms(&dev_priv->drm, "minimum required %d/%d\n",
4610                             blocks, alloc_size);
4611                 return -EINVAL;
4612         }
4613
4614         /*
4615          * Grant each plane the blocks it requires at the highest achievable
4616          * watermark level, plus an extra share of the leftover blocks
4617          * proportional to its relative data rate.
4618          */
4619         for_each_plane_id_on_crtc(crtc, plane_id) {
4620                 const struct skl_plane_wm *wm =
4621                         &crtc_state->wm.skl.optimal.planes[plane_id];
4622                 u64 rate;
4623                 u16 extra;
4624
4625                 if (plane_id == PLANE_CURSOR)
4626                         continue;
4627
4628                 /*
4629                  * We've accounted for all active planes; remaining planes are
4630                  * all disabled.
4631                  */
4632                 if (total_data_rate == 0)
4633                         break;
4634
4635                 rate = plane_data_rate[plane_id];
4636                 extra = min_t(u16, alloc_size,
4637                               DIV64_U64_ROUND_UP(alloc_size * rate,
4638                                                  total_data_rate));
4639                 total[plane_id] = wm->wm[level].min_ddb_alloc + extra;
4640                 alloc_size -= extra;
4641                 total_data_rate -= rate;
4642
4643                 if (total_data_rate == 0)
4644                         break;
4645
4646                 rate = uv_plane_data_rate[plane_id];
4647                 extra = min_t(u16, alloc_size,
4648                               DIV64_U64_ROUND_UP(alloc_size * rate,
4649                                                  total_data_rate));
4650                 uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra;
4651                 alloc_size -= extra;
4652                 total_data_rate -= rate;
4653         }
4654         drm_WARN_ON(&dev_priv->drm, alloc_size != 0 || total_data_rate != 0);
4655
4656         /* Set the actual DDB start/end points for each plane */
4657         start = alloc->start;
4658         for_each_plane_id_on_crtc(crtc, plane_id) {
4659                 struct skl_ddb_entry *plane_alloc =
4660                         &crtc_state->wm.skl.plane_ddb_y[plane_id];
4661                 struct skl_ddb_entry *uv_plane_alloc =
4662                         &crtc_state->wm.skl.plane_ddb_uv[plane_id];
4663
4664                 if (plane_id == PLANE_CURSOR)
4665                         continue;
4666
4667                 /* Gen11+ uses a separate plane for UV watermarks */
4668                 drm_WARN_ON(&dev_priv->drm,
4669                             INTEL_GEN(dev_priv) >= 11 && uv_total[plane_id]);
4670
4671                 /* Leave disabled planes at (0,0) */
4672                 if (total[plane_id]) {
4673                         plane_alloc->start = start;
4674                         start += total[plane_id];
4675                         plane_alloc->end = start;
4676                 }
4677
4678                 if (uv_total[plane_id]) {
4679                         uv_plane_alloc->start = start;
4680                         start += uv_total[plane_id];
4681                         uv_plane_alloc->end = start;
4682                 }
4683         }
4684
4685         /*
4686          * When we calculated watermark values we didn't know how high
4687          * of a level we'd actually be able to hit, so we just marked
4688          * all levels as "enabled."  Go back now and disable the ones
4689          * that aren't actually possible.
4690          */
4691         for (level++; level <= ilk_wm_max_level(dev_priv); level++) {
4692                 for_each_plane_id_on_crtc(crtc, plane_id) {
4693                         struct skl_plane_wm *wm =
4694                                 &crtc_state->wm.skl.optimal.planes[plane_id];
4695
4696                         /*
4697                          * We only disable the watermarks for each plane if
4698                          * they exceed the ddb allocation of said plane. This
4699                          * is done so that we don't end up touching cursor
4700                          * watermarks needlessly when some other plane reduces
4701                          * our max possible watermark level.
4702                          *
4703                          * Bspec has this to say about the PLANE_WM enable bit:
4704                          * "All the watermarks at this level for all enabled
4705                          *  planes must be enabled before the level will be used."
4706                          * So this is actually safe to do.
4707                          */
4708                         if (wm->wm[level].min_ddb_alloc > total[plane_id] ||
4709                             wm->uv_wm[level].min_ddb_alloc > uv_total[plane_id])
4710                                 memset(&wm->wm[level], 0, sizeof(wm->wm[level]));
4711
4712                         /*
4713                          * Wa_1408961008:icl, ehl
4714                          * Underruns with WM1+ disabled
4715                          */
4716                         if (IS_GEN(dev_priv, 11) &&
4717                             level == 1 && wm->wm[0].plane_en) {
4718                                 wm->wm[level].plane_res_b = wm->wm[0].plane_res_b;
4719                                 wm->wm[level].plane_res_l = wm->wm[0].plane_res_l;
4720                                 wm->wm[level].ignore_lines = wm->wm[0].ignore_lines;
4721                         }
4722                 }
4723         }
4724
4725         /*
4726          * Go back and disable the transition watermark if it turns out we
4727          * don't have enough DDB blocks for it.
4728          */
4729         for_each_plane_id_on_crtc(crtc, plane_id) {
4730                 struct skl_plane_wm *wm =
4731                         &crtc_state->wm.skl.optimal.planes[plane_id];
4732
4733                 if (wm->trans_wm.plane_res_b >= total[plane_id])
4734                         memset(&wm->trans_wm, 0, sizeof(wm->trans_wm));
4735         }
4736
4737         return 0;
4738 }
4739
4740 /*
4741  * The max latency should be 257 (max the punit can code is 255 and we add 2us
4742  * for the read latency) and cpp should always be <= 8, so that
4743  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4744  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4745 */
4746 static uint_fixed_16_16_t
4747 skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate,
4748                u8 cpp, u32 latency, u32 dbuf_block_size)
4749 {
4750         u32 wm_intermediate_val;
4751         uint_fixed_16_16_t ret;
4752
4753         if (latency == 0)
4754                 return FP_16_16_MAX;
4755
4756         wm_intermediate_val = latency * pixel_rate * cpp;
4757         ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
4758
4759         if (INTEL_GEN(dev_priv) >= 10)
4760                 ret = add_fixed16_u32(ret, 1);
4761
4762         return ret;
4763 }
4764
4765 static uint_fixed_16_16_t
4766 skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency,
4767                uint_fixed_16_16_t plane_blocks_per_line)
4768 {
4769         u32 wm_intermediate_val;
4770         uint_fixed_16_16_t ret;
4771
4772         if (latency == 0)
4773                 return FP_16_16_MAX;
4774
4775         wm_intermediate_val = latency * pixel_rate;
4776         wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
4777                                            pipe_htotal * 1000);
4778         ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
4779         return ret;
4780 }
4781
4782 static uint_fixed_16_16_t
4783 intel_get_linetime_us(const struct intel_crtc_state *crtc_state)
4784 {
4785         u32 pixel_rate;
4786         u32 crtc_htotal;
4787         uint_fixed_16_16_t linetime_us;
4788
4789         if (!crtc_state->hw.active)
4790                 return u32_to_fixed16(0);
4791
4792         pixel_rate = crtc_state->pixel_rate;
4793
4794         if (WARN_ON(pixel_rate == 0))
4795                 return u32_to_fixed16(0);
4796
4797         crtc_htotal = crtc_state->hw.adjusted_mode.crtc_htotal;
4798         linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
4799
4800         return linetime_us;
4801 }
4802
4803 static u32
4804 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *crtc_state,
4805                               const struct intel_plane_state *plane_state)
4806 {
4807         u64 adjusted_pixel_rate;
4808         uint_fixed_16_16_t downscale_amount;
4809
4810         /* Shouldn't reach here on disabled planes... */
4811         if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state)))
4812                 return 0;
4813
4814         /*
4815          * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4816          * with additional adjustments for plane-specific scaling.
4817          */
4818         adjusted_pixel_rate = crtc_state->pixel_rate;
4819         downscale_amount = skl_plane_downscale_amount(crtc_state, plane_state);
4820
4821         return mul_round_up_u32_fixed16(adjusted_pixel_rate,
4822                                             downscale_amount);
4823 }
4824
4825 static int
4826 skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
4827                       int width, const struct drm_format_info *format,
4828                       u64 modifier, unsigned int rotation,
4829                       u32 plane_pixel_rate, struct skl_wm_params *wp,
4830                       int color_plane)
4831 {
4832         struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
4833         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
4834         u32 interm_pbpl;
4835
4836         /* only planar format has two planes */
4837         if (color_plane == 1 &&
4838             !intel_format_info_is_yuv_semiplanar(format, modifier)) {
4839                 drm_dbg_kms(&dev_priv->drm,
4840                             "Non planar format have single plane\n");
4841                 return -EINVAL;
4842         }
4843
4844         wp->y_tiled = modifier == I915_FORMAT_MOD_Y_TILED ||
4845                       modifier == I915_FORMAT_MOD_Yf_TILED ||
4846                       modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4847                       modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4848         wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED;
4849         wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4850                          modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4851         wp->is_planar = intel_format_info_is_yuv_semiplanar(format, modifier);
4852
4853         wp->width = width;
4854         if (color_plane == 1 && wp->is_planar)
4855                 wp->width /= 2;
4856
4857         wp->cpp = format->cpp[color_plane];
4858         wp->plane_pixel_rate = plane_pixel_rate;
4859
4860         if (INTEL_GEN(dev_priv) >= 11 &&
4861             modifier == I915_FORMAT_MOD_Yf_TILED  && wp->cpp == 1)
4862                 wp->dbuf_block_size = 256;
4863         else
4864                 wp->dbuf_block_size = 512;
4865
4866         if (drm_rotation_90_or_270(rotation)) {
4867                 switch (wp->cpp) {
4868                 case 1:
4869                         wp->y_min_scanlines = 16;
4870                         break;
4871                 case 2:
4872                         wp->y_min_scanlines = 8;
4873                         break;
4874                 case 4:
4875                         wp->y_min_scanlines = 4;
4876                         break;
4877                 default:
4878                         MISSING_CASE(wp->cpp);
4879                         return -EINVAL;
4880                 }
4881         } else {
4882                 wp->y_min_scanlines = 4;
4883         }
4884
4885         if (skl_needs_memory_bw_wa(dev_priv))
4886                 wp->y_min_scanlines *= 2;
4887
4888         wp->plane_bytes_per_line = wp->width * wp->cpp;
4889         if (wp->y_tiled) {
4890                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
4891                                            wp->y_min_scanlines,
4892                                            wp->dbuf_block_size);
4893
4894                 if (INTEL_GEN(dev_priv) >= 10)
4895                         interm_pbpl++;
4896
4897                 wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4898                                                         wp->y_min_scanlines);
4899         } else if (wp->x_tiled && IS_GEN(dev_priv, 9)) {
4900                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4901                                            wp->dbuf_block_size);
4902                 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4903         } else {
4904                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4905                                            wp->dbuf_block_size) + 1;
4906                 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4907         }
4908
4909         wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4910                                              wp->plane_blocks_per_line);
4911
4912         wp->linetime_us = fixed16_to_u32_round_up(
4913                                         intel_get_linetime_us(crtc_state));
4914
4915         return 0;
4916 }
4917
4918 static int
4919 skl_compute_plane_wm_params(const struct intel_crtc_state *crtc_state,
4920                             const struct intel_plane_state *plane_state,
4921                             struct skl_wm_params *wp, int color_plane)
4922 {
4923         const struct drm_framebuffer *fb = plane_state->hw.fb;
4924         int width;
4925
4926         /*
4927          * Src coordinates are already rotated by 270 degrees for
4928          * the 90/270 degree plane rotation cases (to match the
4929          * GTT mapping), hence no need to account for rotation here.
4930          */
4931         width = drm_rect_width(&plane_state->uapi.src) >> 16;
4932
4933         return skl_compute_wm_params(crtc_state, width,
4934                                      fb->format, fb->modifier,
4935                                      plane_state->hw.rotation,
4936                                      skl_adjusted_plane_pixel_rate(crtc_state, plane_state),
4937                                      wp, color_plane);
4938 }
4939
4940 static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level)
4941 {
4942         if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
4943                 return true;
4944
4945         /* The number of lines are ignored for the level 0 watermark. */
4946         return level > 0;
4947 }
4948
4949 static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
4950                                  int level,
4951                                  const struct skl_wm_params *wp,
4952                                  const struct skl_wm_level *result_prev,
4953                                  struct skl_wm_level *result /* out */)
4954 {
4955         struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
4956         u32 latency = dev_priv->wm.skl_latency[level];
4957         uint_fixed_16_16_t method1, method2;
4958         uint_fixed_16_16_t selected_result;
4959         u32 res_blocks, res_lines, min_ddb_alloc = 0;
4960
4961         if (latency == 0) {
4962                 /* reject it */
4963                 result->min_ddb_alloc = U16_MAX;
4964                 return;
4965         }
4966
4967         /*
4968          * WaIncreaseLatencyIPCEnabled: kbl,cfl
4969          * Display WA #1141: kbl,cfl
4970          */
4971         if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) ||
4972             dev_priv->ipc_enabled)
4973                 latency += 4;
4974
4975         if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled)
4976                 latency += 15;
4977
4978         method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
4979                                  wp->cpp, latency, wp->dbuf_block_size);
4980         method2 = skl_wm_method2(wp->plane_pixel_rate,
4981                                  crtc_state->hw.adjusted_mode.crtc_htotal,
4982                                  latency,
4983                                  wp->plane_blocks_per_line);
4984
4985         if (wp->y_tiled) {
4986                 selected_result = max_fixed16(method2, wp->y_tile_minimum);
4987         } else {
4988                 if ((wp->cpp * crtc_state->hw.adjusted_mode.crtc_htotal /
4989                      wp->dbuf_block_size < 1) &&
4990                      (wp->plane_bytes_per_line / wp->dbuf_block_size < 1)) {
4991                         selected_result = method2;
4992                 } else if (latency >= wp->linetime_us) {
4993                         if (IS_GEN(dev_priv, 9) &&
4994                             !IS_GEMINILAKE(dev_priv))
4995                                 selected_result = min_fixed16(method1, method2);
4996                         else
4997                                 selected_result = method2;
4998                 } else {
4999                         selected_result = method1;
5000                 }
5001         }
5002
5003         res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
5004         res_lines = div_round_up_fixed16(selected_result,
5005                                          wp->plane_blocks_per_line);
5006
5007         if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) {
5008                 /* Display WA #1125: skl,bxt,kbl */
5009                 if (level == 0 && wp->rc_surface)
5010                         res_blocks +=
5011                                 fixed16_to_u32_round_up(wp->y_tile_minimum);
5012
5013                 /* Display WA #1126: skl,bxt,kbl */
5014                 if (level >= 1 && level <= 7) {
5015                         if (wp->y_tiled) {
5016                                 res_blocks +=
5017                                     fixed16_to_u32_round_up(wp->y_tile_minimum);
5018                                 res_lines += wp->y_min_scanlines;
5019                         } else {
5020                                 res_blocks++;
5021                         }
5022
5023                         /*
5024                          * Make sure result blocks for higher latency levels are
5025                          * atleast as high as level below the current level.
5026                          * Assumption in DDB algorithm optimization for special
5027                          * cases. Also covers Display WA #1125 for RC.
5028                          */
5029                         if (result_prev->plane_res_b > res_blocks)
5030                                 res_blocks = result_prev->plane_res_b;
5031                 }
5032         }
5033
5034         if (INTEL_GEN(dev_priv) >= 11) {
5035                 if (wp->y_tiled) {
5036                         int extra_lines;
5037
5038                         if (res_lines % wp->y_min_scanlines == 0)
5039                                 extra_lines = wp->y_min_scanlines;
5040                         else
5041                                 extra_lines = wp->y_min_scanlines * 2 -
5042                                         res_lines % wp->y_min_scanlines;
5043
5044                         min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines,
5045                                                                  wp->plane_blocks_per_line);
5046                 } else {
5047                         min_ddb_alloc = res_blocks +
5048                                 DIV_ROUND_UP(res_blocks, 10);
5049                 }
5050         }
5051
5052         if (!skl_wm_has_lines(dev_priv, level))
5053                 res_lines = 0;
5054
5055         if (res_lines > 31) {
5056                 /* reject it */
5057                 result->min_ddb_alloc = U16_MAX;
5058                 return;
5059         }
5060
5061         /*
5062          * If res_lines is valid, assume we can use this watermark level
5063          * for now.  We'll come back and disable it after we calculate the
5064          * DDB allocation if it turns out we don't actually have enough
5065          * blocks to satisfy it.
5066          */
5067         result->plane_res_b = res_blocks;
5068         result->plane_res_l = res_lines;
5069         /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */
5070         result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1;
5071         result->plane_en = true;
5072 }
5073
5074 static void
5075 skl_compute_wm_levels(const struct intel_crtc_state *crtc_state,
5076                       const struct skl_wm_params *wm_params,
5077                       struct skl_wm_level *levels)
5078 {
5079         struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
5080         int level, max_level = ilk_wm_max_level(dev_priv);
5081         struct skl_wm_level *result_prev = &levels[0];
5082
5083         for (level = 0; level <= max_level; level++) {
5084                 struct skl_wm_level *result = &levels[level];
5085
5086                 skl_compute_plane_wm(crtc_state, level, wm_params,
5087                                      result_prev, result);
5088
5089                 result_prev = result;
5090         }
5091 }
5092
5093 static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state,
5094                                       const struct skl_wm_params *wp,
5095                                       struct skl_plane_wm *wm)
5096 {
5097         struct drm_device *dev = crtc_state->uapi.crtc->dev;
5098         const struct drm_i915_private *dev_priv = to_i915(dev);
5099         u16 trans_min, trans_y_tile_min;
5100         const u16 trans_amount = 10; /* This is configurable amount */
5101         u16 wm0_sel_res_b, trans_offset_b, res_blocks;
5102
5103         /* Transition WM are not recommended by HW team for GEN9 */
5104         if (INTEL_GEN(dev_priv) <= 9)
5105                 return;
5106
5107         /* Transition WM don't make any sense if ipc is disabled */
5108         if (!dev_priv->ipc_enabled)
5109                 return;
5110
5111         trans_min = 14;
5112         if (INTEL_GEN(dev_priv) >= 11)
5113                 trans_min = 4;
5114
5115         trans_offset_b = trans_min + trans_amount;
5116
5117         /*
5118          * The spec asks for Selected Result Blocks for wm0 (the real value),
5119          * not Result Blocks (the integer value). Pay attention to the capital
5120          * letters. The value wm_l0->plane_res_b is actually Result Blocks, but
5121          * since Result Blocks is the ceiling of Selected Result Blocks plus 1,
5122          * and since we later will have to get the ceiling of the sum in the
5123          * transition watermarks calculation, we can just pretend Selected
5124          * Result Blocks is Result Blocks minus 1 and it should work for the
5125          * current platforms.
5126          */
5127         wm0_sel_res_b = wm->wm[0].plane_res_b - 1;
5128
5129         if (wp->y_tiled) {
5130                 trans_y_tile_min =
5131                         (u16)mul_round_up_u32_fixed16(2, wp->y_tile_minimum);
5132                 res_blocks = max(wm0_sel_res_b, trans_y_tile_min) +
5133                                 trans_offset_b;
5134         } else {
5135                 res_blocks = wm0_sel_res_b + trans_offset_b;
5136
5137                 /* WA BUG:1938466 add one block for non y-tile planes */
5138                 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
5139                         res_blocks += 1;
5140
5141         }
5142
5143         /*
5144          * Just assume we can enable the transition watermark.  After
5145          * computing the DDB we'll come back and disable it if that
5146          * assumption turns out to be false.
5147          */
5148         wm->trans_wm.plane_res_b = res_blocks + 1;
5149         wm->trans_wm.plane_en = true;
5150 }
5151
5152 static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,
5153                                      const struct intel_plane_state *plane_state,
5154                                      enum plane_id plane_id, int color_plane)
5155 {
5156         struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
5157         struct skl_wm_params wm_params;
5158         int ret;
5159
5160         ret = skl_compute_plane_wm_params(crtc_state, plane_state,
5161                                           &wm_params, color_plane);
5162         if (ret)
5163                 return ret;
5164
5165         skl_compute_wm_levels(crtc_state, &wm_params, wm->wm);
5166         skl_compute_transition_wm(crtc_state, &wm_params, wm);
5167
5168         return 0;
5169 }
5170
5171 static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state,
5172                                  const struct intel_plane_state *plane_state,
5173                                  enum plane_id plane_id)
5174 {
5175         struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
5176         struct skl_wm_params wm_params;
5177         int ret;
5178
5179         wm->is_planar = true;
5180
5181         /* uv plane watermarks must also be validated for NV12/Planar */
5182         ret = skl_compute_plane_wm_params(crtc_state, plane_state,
5183                                           &wm_params, 1);
5184         if (ret)
5185                 return ret;
5186
5187         skl_compute_wm_levels(crtc_state, &wm_params, wm->uv_wm);
5188
5189         return 0;
5190 }
5191
5192 static int skl_build_plane_wm(struct intel_crtc_state *crtc_state,
5193                               const struct intel_plane_state *plane_state)
5194 {
5195         struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
5196         const struct drm_framebuffer *fb = plane_state->hw.fb;
5197         enum plane_id plane_id = plane->id;
5198         int ret;
5199
5200         if (!intel_wm_plane_visible(crtc_state, plane_state))
5201                 return 0;
5202
5203         ret = skl_build_plane_wm_single(crtc_state, plane_state,
5204                                         plane_id, 0);
5205         if (ret)
5206                 return ret;
5207
5208         if (fb->format->is_yuv && fb->format->num_planes > 1) {
5209                 ret = skl_build_plane_wm_uv(crtc_state, plane_state,
5210                                             plane_id);
5211                 if (ret)
5212                         return ret;
5213         }
5214
5215         return 0;
5216 }
5217
5218 static int icl_build_plane_wm(struct intel_crtc_state *crtc_state,
5219                               const struct intel_plane_state *plane_state)
5220 {
5221         enum plane_id plane_id = to_intel_plane(plane_state->uapi.plane)->id;
5222         int ret;
5223
5224         /* Watermarks calculated in master */
5225         if (plane_state->planar_slave)
5226                 return 0;
5227
5228         if (plane_state->planar_linked_plane) {
5229                 const struct drm_framebuffer *fb = plane_state->hw.fb;
5230                 enum plane_id y_plane_id = plane_state->planar_linked_plane->id;
5231
5232                 WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state));
5233                 WARN_ON(!fb->format->is_yuv ||
5234                         fb->format->num_planes == 1);
5235
5236                 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5237                                                 y_plane_id, 0);
5238                 if (ret)
5239                         return ret;
5240
5241                 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5242                                                 plane_id, 1);
5243                 if (ret)
5244                         return ret;
5245         } else if (intel_wm_plane_visible(crtc_state, plane_state)) {
5246                 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5247                                                 plane_id, 0);
5248                 if (ret)
5249                         return ret;
5250         }
5251
5252         return 0;
5253 }
5254
5255 static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state)
5256 {
5257         struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
5258         struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal;
5259         struct intel_plane *plane;
5260         const struct intel_plane_state *plane_state;
5261         int ret;
5262
5263         /*
5264          * We'll only calculate watermarks for planes that are actually
5265          * enabled, so make sure all other planes are set as disabled.
5266          */
5267         memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
5268
5269         intel_atomic_crtc_state_for_each_plane_state(plane, plane_state,
5270                                                      crtc_state) {
5271
5272                 if (INTEL_GEN(dev_priv) >= 11)
5273                         ret = icl_build_plane_wm(crtc_state, plane_state);
5274                 else
5275                         ret = skl_build_plane_wm(crtc_state, plane_state);
5276                 if (ret)
5277                         return ret;
5278         }
5279
5280         return 0;
5281 }
5282
5283 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
5284                                 i915_reg_t reg,
5285                                 const struct skl_ddb_entry *entry)
5286 {
5287         if (entry->end)
5288                 intel_de_write_fw(dev_priv, reg,
5289                                   (entry->end - 1) << 16 | entry->start);
5290         else
5291                 intel_de_write_fw(dev_priv, reg, 0);
5292 }
5293
5294 static void skl_write_wm_level(struct drm_i915_private *dev_priv,
5295                                i915_reg_t reg,
5296                                const struct skl_wm_level *level)
5297 {
5298         u32 val = 0;
5299
5300         if (level->plane_en)
5301                 val |= PLANE_WM_EN;
5302         if (level->ignore_lines)
5303                 val |= PLANE_WM_IGNORE_LINES;
5304         val |= level->plane_res_b;
5305         val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
5306
5307         intel_de_write_fw(dev_priv, reg, val);
5308 }
5309
5310 void skl_write_plane_wm(struct intel_plane *plane,
5311                         const struct intel_crtc_state *crtc_state)
5312 {
5313         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5314         int level, max_level = ilk_wm_max_level(dev_priv);
5315         enum plane_id plane_id = plane->id;
5316         enum pipe pipe = plane->pipe;
5317         const struct skl_plane_wm *wm =
5318                 &crtc_state->wm.skl.optimal.planes[plane_id];
5319         const struct skl_ddb_entry *ddb_y =
5320                 &crtc_state->wm.skl.plane_ddb_y[plane_id];
5321         const struct skl_ddb_entry *ddb_uv =
5322                 &crtc_state->wm.skl.plane_ddb_uv[plane_id];
5323
5324         for (level = 0; level <= max_level; level++) {
5325                 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
5326                                    &wm->wm[level]);
5327         }
5328         skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
5329                            &wm->trans_wm);
5330
5331         if (INTEL_GEN(dev_priv) >= 11) {
5332                 skl_ddb_entry_write(dev_priv,
5333                                     PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5334                 return;
5335         }
5336
5337         if (wm->is_planar)
5338                 swap(ddb_y, ddb_uv);
5339
5340         skl_ddb_entry_write(dev_priv,
5341                             PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5342         skl_ddb_entry_write(dev_priv,
5343                             PLANE_NV12_BUF_CFG(pipe, plane_id), ddb_uv);
5344 }
5345
5346 void skl_write_cursor_wm(struct intel_plane *plane,
5347                          const struct intel_crtc_state *crtc_state)
5348 {
5349         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5350         int level, max_level = ilk_wm_max_level(dev_priv);
5351         enum plane_id plane_id = plane->id;
5352         enum pipe pipe = plane->pipe;
5353         const struct skl_plane_wm *wm =
5354                 &crtc_state->wm.skl.optimal.planes[plane_id];
5355         const struct skl_ddb_entry *ddb =
5356                 &crtc_state->wm.skl.plane_ddb_y[plane_id];
5357
5358         for (level = 0; level <= max_level; level++) {
5359                 skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
5360                                    &wm->wm[level]);
5361         }
5362         skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
5363
5364         skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), ddb);
5365 }
5366
5367 bool skl_wm_level_equals(const struct skl_wm_level *l1,
5368                          const struct skl_wm_level *l2)
5369 {
5370         return l1->plane_en == l2->plane_en &&
5371                 l1->ignore_lines == l2->ignore_lines &&
5372                 l1->plane_res_l == l2->plane_res_l &&
5373                 l1->plane_res_b == l2->plane_res_b;
5374 }
5375
5376 static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,
5377                                 const struct skl_plane_wm *wm1,
5378                                 const struct skl_plane_wm *wm2)
5379 {
5380         int level, max_level = ilk_wm_max_level(dev_priv);
5381
5382         for (level = 0; level <= max_level; level++) {
5383                 if (!skl_wm_level_equals(&wm1->wm[level], &wm2->wm[level]) ||
5384                     !skl_wm_level_equals(&wm1->uv_wm[level], &wm2->uv_wm[level]))
5385                         return false;
5386         }
5387
5388         return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);
5389 }
5390
5391 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
5392                                            const struct skl_ddb_entry *b)
5393 {
5394         return a->start < b->end && b->start < a->end;
5395 }
5396
5397 bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb,
5398                                  const struct skl_ddb_entry *entries,
5399                                  int num_entries, int ignore_idx)
5400 {
5401         int i;
5402
5403         for (i = 0; i < num_entries; i++) {
5404                 if (i != ignore_idx &&
5405                     skl_ddb_entries_overlap(ddb, &entries[i]))
5406                         return true;
5407         }
5408
5409         return false;
5410 }
5411
5412 static int
5413 skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state,
5414                             struct intel_crtc_state *new_crtc_state)
5415 {
5416         struct intel_atomic_state *state = to_intel_atomic_state(new_crtc_state->uapi.state);
5417         struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
5418         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5419         struct intel_plane *plane;
5420
5421         for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5422                 struct intel_plane_state *plane_state;
5423                 enum plane_id plane_id = plane->id;
5424
5425                 if (skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_y[plane_id],
5426                                         &new_crtc_state->wm.skl.plane_ddb_y[plane_id]) &&
5427                     skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_uv[plane_id],
5428                                         &new_crtc_state->wm.skl.plane_ddb_uv[plane_id]))
5429                         continue;
5430
5431                 plane_state = intel_atomic_get_plane_state(state, plane);
5432                 if (IS_ERR(plane_state))
5433                         return PTR_ERR(plane_state);
5434
5435                 new_crtc_state->update_planes |= BIT(plane_id);
5436         }
5437
5438         return 0;
5439 }
5440
5441 static int
5442 skl_compute_ddb(struct intel_atomic_state *state)
5443 {
5444         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5445         struct intel_crtc_state *old_crtc_state;
5446         struct intel_crtc_state *new_crtc_state;
5447         struct intel_crtc *crtc;
5448         int ret, i;
5449
5450         state->enabled_dbuf_slices_mask = dev_priv->enabled_dbuf_slices_mask;
5451
5452         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5453                                             new_crtc_state, i) {
5454                 ret = skl_allocate_pipe_ddb(new_crtc_state);
5455                 if (ret)
5456                         return ret;
5457
5458                 ret = skl_ddb_add_affected_planes(old_crtc_state,
5459                                                   new_crtc_state);
5460                 if (ret)
5461                         return ret;
5462         }
5463
5464         return 0;
5465 }
5466
5467 static char enast(bool enable)
5468 {
5469         return enable ? '*' : ' ';
5470 }
5471
5472 static void
5473 skl_print_wm_changes(struct intel_atomic_state *state)
5474 {
5475         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5476         const struct intel_crtc_state *old_crtc_state;
5477         const struct intel_crtc_state *new_crtc_state;
5478         struct intel_plane *plane;
5479         struct intel_crtc *crtc;
5480         int i;
5481
5482         if (!drm_debug_enabled(DRM_UT_KMS))
5483                 return;
5484
5485         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5486                                             new_crtc_state, i) {
5487                 const struct skl_pipe_wm *old_pipe_wm, *new_pipe_wm;
5488
5489                 old_pipe_wm = &old_crtc_state->wm.skl.optimal;
5490                 new_pipe_wm = &new_crtc_state->wm.skl.optimal;
5491
5492                 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5493                         enum plane_id plane_id = plane->id;
5494                         const struct skl_ddb_entry *old, *new;
5495
5496                         old = &old_crtc_state->wm.skl.plane_ddb_y[plane_id];
5497                         new = &new_crtc_state->wm.skl.plane_ddb_y[plane_id];
5498
5499                         if (skl_ddb_entry_equal(old, new))
5500                                 continue;
5501
5502                         drm_dbg_kms(&dev_priv->drm,
5503                                     "[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n",
5504                                     plane->base.base.id, plane->base.name,
5505                                     old->start, old->end, new->start, new->end,
5506                                     skl_ddb_entry_size(old), skl_ddb_entry_size(new));
5507                 }
5508
5509                 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5510                         enum plane_id plane_id = plane->id;
5511                         const struct skl_plane_wm *old_wm, *new_wm;
5512
5513                         old_wm = &old_pipe_wm->planes[plane_id];
5514                         new_wm = &new_pipe_wm->planes[plane_id];
5515
5516                         if (skl_plane_wm_equals(dev_priv, old_wm, new_wm))
5517                                 continue;
5518
5519                         drm_dbg_kms(&dev_priv->drm,
5520                                     "[PLANE:%d:%s]   level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm"
5521                                     " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n",
5522                                     plane->base.base.id, plane->base.name,
5523                                     enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en),
5524                                     enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en),
5525                                     enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en),
5526                                     enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en),
5527                                     enast(old_wm->trans_wm.plane_en),
5528                                     enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en),
5529                                     enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en),
5530                                     enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en),
5531                                     enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en),
5532                                     enast(new_wm->trans_wm.plane_en));
5533
5534                         drm_dbg_kms(&dev_priv->drm,
5535                                     "[PLANE:%d:%s]   lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d"
5536                                       " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n",
5537                                     plane->base.base.id, plane->base.name,
5538                                     enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l,
5539                                     enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l,
5540                                     enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l,
5541                                     enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l,
5542                                     enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l,
5543                                     enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l,
5544                                     enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l,
5545                                     enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l,
5546                                     enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l,
5547
5548                                     enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l,
5549                                     enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l,
5550                                     enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l,
5551                                     enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l,
5552                                     enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l,
5553                                     enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l,
5554                                     enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l,
5555                                     enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l,
5556                                     enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l);
5557
5558                         drm_dbg_kms(&dev_priv->drm,
5559                                     "[PLANE:%d:%s]  blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
5560                                     " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
5561                                     plane->base.base.id, plane->base.name,
5562                                     old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b,
5563                                     old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b,
5564                                     old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b,
5565                                     old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b,
5566                                     old_wm->trans_wm.plane_res_b,
5567                                     new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b,
5568                                     new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b,
5569                                     new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b,
5570                                     new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b,
5571                                     new_wm->trans_wm.plane_res_b);
5572
5573                         drm_dbg_kms(&dev_priv->drm,
5574                                     "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
5575                                     " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
5576                                     plane->base.base.id, plane->base.name,
5577                                     old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc,
5578                                     old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc,
5579                                     old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc,
5580                                     old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc,
5581                                     old_wm->trans_wm.min_ddb_alloc,
5582                                     new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc,
5583                                     new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc,
5584                                     new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc,
5585                                     new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc,
5586                                     new_wm->trans_wm.min_ddb_alloc);
5587                 }
5588         }
5589 }
5590
5591 static int intel_add_all_pipes(struct intel_atomic_state *state)
5592 {
5593         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5594         struct intel_crtc *crtc;
5595
5596         for_each_intel_crtc(&dev_priv->drm, crtc) {
5597                 struct intel_crtc_state *crtc_state;
5598
5599                 crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
5600                 if (IS_ERR(crtc_state))
5601                         return PTR_ERR(crtc_state);
5602         }
5603
5604         return 0;
5605 }
5606
5607 static int
5608 skl_ddb_add_affected_pipes(struct intel_atomic_state *state)
5609 {
5610         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5611         int ret;
5612
5613         /*
5614          * If this is our first atomic update following hardware readout,
5615          * we can't trust the DDB that the BIOS programmed for us.  Let's
5616          * pretend that all pipes switched active status so that we'll
5617          * ensure a full DDB recompute.
5618          */
5619         if (dev_priv->wm.distrust_bios_wm) {
5620                 ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
5621                                        state->base.acquire_ctx);
5622                 if (ret)
5623                         return ret;
5624
5625                 state->active_pipe_changes = INTEL_INFO(dev_priv)->pipe_mask;
5626
5627                 /*
5628                  * We usually only initialize state->active_pipes if we
5629                  * we're doing a modeset; make sure this field is always
5630                  * initialized during the sanitization process that happens
5631                  * on the first commit too.
5632                  */
5633                 if (!state->modeset)
5634                         state->active_pipes = dev_priv->active_pipes;
5635         }
5636
5637         /*
5638          * If the modeset changes which CRTC's are active, we need to
5639          * recompute the DDB allocation for *all* active pipes, even
5640          * those that weren't otherwise being modified in any way by this
5641          * atomic commit.  Due to the shrinking of the per-pipe allocations
5642          * when new active CRTC's are added, it's possible for a pipe that
5643          * we were already using and aren't changing at all here to suddenly
5644          * become invalid if its DDB needs exceeds its new allocation.
5645          *
5646          * Note that if we wind up doing a full DDB recompute, we can't let
5647          * any other display updates race with this transaction, so we need
5648          * to grab the lock on *all* CRTC's.
5649          */
5650         if (state->active_pipe_changes || state->modeset) {
5651                 ret = intel_add_all_pipes(state);
5652                 if (ret)
5653                         return ret;
5654         }
5655
5656         return 0;
5657 }
5658
5659 /*
5660  * To make sure the cursor watermark registers are always consistent
5661  * with our computed state the following scenario needs special
5662  * treatment:
5663  *
5664  * 1. enable cursor
5665  * 2. move cursor entirely offscreen
5666  * 3. disable cursor
5667  *
5668  * Step 2. does call .disable_plane() but does not zero the watermarks
5669  * (since we consider an offscreen cursor still active for the purposes
5670  * of watermarks). Step 3. would not normally call .disable_plane()
5671  * because the actual plane visibility isn't changing, and we don't
5672  * deallocate the cursor ddb until the pipe gets disabled. So we must
5673  * force step 3. to call .disable_plane() to update the watermark
5674  * registers properly.
5675  *
5676  * Other planes do not suffer from this issues as their watermarks are
5677  * calculated based on the actual plane visibility. The only time this
5678  * can trigger for the other planes is during the initial readout as the
5679  * default value of the watermarks registers is not zero.
5680  */
5681 static int skl_wm_add_affected_planes(struct intel_atomic_state *state,
5682                                       struct intel_crtc *crtc)
5683 {
5684         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5685         const struct intel_crtc_state *old_crtc_state =
5686                 intel_atomic_get_old_crtc_state(state, crtc);
5687         struct intel_crtc_state *new_crtc_state =
5688                 intel_atomic_get_new_crtc_state(state, crtc);
5689         struct intel_plane *plane;
5690
5691         for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5692                 struct intel_plane_state *plane_state;
5693                 enum plane_id plane_id = plane->id;
5694
5695                 /*
5696                  * Force a full wm update for every plane on modeset.
5697                  * Required because the reset value of the wm registers
5698                  * is non-zero, whereas we want all disabled planes to
5699                  * have zero watermarks. So if we turn off the relevant
5700                  * power well the hardware state will go out of sync
5701                  * with the software state.
5702                  */
5703                 if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi) &&
5704                     skl_plane_wm_equals(dev_priv,
5705                                         &old_crtc_state->wm.skl.optimal.planes[plane_id],
5706                                         &new_crtc_state->wm.skl.optimal.planes[plane_id]))
5707                         continue;
5708
5709                 plane_state = intel_atomic_get_plane_state(state, plane);
5710                 if (IS_ERR(plane_state))
5711                         return PTR_ERR(plane_state);
5712
5713                 new_crtc_state->update_planes |= BIT(plane_id);
5714         }
5715
5716         return 0;
5717 }
5718
5719 static int
5720 skl_compute_wm(struct intel_atomic_state *state)
5721 {
5722         struct intel_crtc *crtc;
5723         struct intel_crtc_state *new_crtc_state;
5724         struct intel_crtc_state *old_crtc_state;
5725         int ret, i;
5726
5727         ret = skl_ddb_add_affected_pipes(state);
5728         if (ret)
5729                 return ret;
5730
5731         /*
5732          * Calculate WM's for all pipes that are part of this transaction.
5733          * Note that skl_ddb_add_affected_pipes may have added more CRTC's that
5734          * weren't otherwise being modified if pipe allocations had to change.
5735          */
5736         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5737                                             new_crtc_state, i) {
5738                 ret = skl_build_pipe_wm(new_crtc_state);
5739                 if (ret)
5740                         return ret;
5741
5742                 ret = skl_wm_add_affected_planes(state, crtc);
5743                 if (ret)
5744                         return ret;
5745         }
5746
5747         ret = skl_compute_ddb(state);
5748         if (ret)
5749                 return ret;
5750
5751         skl_print_wm_changes(state);
5752
5753         return 0;
5754 }
5755
5756 static void ilk_compute_wm_config(struct drm_i915_private *dev_priv,
5757                                   struct intel_wm_config *config)
5758 {
5759         struct intel_crtc *crtc;
5760
5761         /* Compute the currently _active_ config */
5762         for_each_intel_crtc(&dev_priv->drm, crtc) {
5763                 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
5764
5765                 if (!wm->pipe_enabled)
5766                         continue;
5767
5768                 config->sprites_enabled |= wm->sprites_enabled;
5769                 config->sprites_scaled |= wm->sprites_scaled;
5770                 config->num_pipes_active++;
5771         }
5772 }
5773
5774 static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
5775 {
5776         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
5777         struct ilk_wm_maximums max;
5778         struct intel_wm_config config = {};
5779         struct ilk_wm_values results = {};
5780         enum intel_ddb_partitioning partitioning;
5781
5782         ilk_compute_wm_config(dev_priv, &config);
5783
5784         ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_1_2, &max);
5785         ilk_wm_merge(dev_priv, &config, &max, &lp_wm_1_2);
5786
5787         /* 5/6 split only in single pipe config on IVB+ */
5788         if (INTEL_GEN(dev_priv) >= 7 &&
5789             config.num_pipes_active == 1 && config.sprites_enabled) {
5790                 ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_5_6, &max);
5791                 ilk_wm_merge(dev_priv, &config, &max, &lp_wm_5_6);
5792
5793                 best_lp_wm = ilk_find_best_result(dev_priv, &lp_wm_1_2, &lp_wm_5_6);
5794         } else {
5795                 best_lp_wm = &lp_wm_1_2;
5796         }
5797
5798         partitioning = (best_lp_wm == &lp_wm_1_2) ?
5799                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
5800
5801         ilk_compute_wm_results(dev_priv, best_lp_wm, partitioning, &results);
5802
5803         ilk_write_wm_values(dev_priv, &results);
5804 }
5805
5806 static void ilk_initial_watermarks(struct intel_atomic_state *state,
5807                                    struct intel_crtc *crtc)
5808 {
5809         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5810         const struct intel_crtc_state *crtc_state =
5811                 intel_atomic_get_new_crtc_state(state, crtc);
5812
5813         mutex_lock(&dev_priv->wm.wm_mutex);
5814         crtc->wm.active.ilk = crtc_state->wm.ilk.intermediate;
5815         ilk_program_watermarks(dev_priv);
5816         mutex_unlock(&dev_priv->wm.wm_mutex);
5817 }
5818
5819 static void ilk_optimize_watermarks(struct intel_atomic_state *state,
5820                                     struct intel_crtc *crtc)
5821 {
5822         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5823         const struct intel_crtc_state *crtc_state =
5824                 intel_atomic_get_new_crtc_state(state, crtc);
5825
5826         if (!crtc_state->wm.need_postvbl_update)
5827                 return;
5828
5829         mutex_lock(&dev_priv->wm.wm_mutex);
5830         crtc->wm.active.ilk = crtc_state->wm.ilk.optimal;
5831         ilk_program_watermarks(dev_priv);
5832         mutex_unlock(&dev_priv->wm.wm_mutex);
5833 }
5834
5835 static inline void skl_wm_level_from_reg_val(u32 val,
5836                                              struct skl_wm_level *level)
5837 {
5838         level->plane_en = val & PLANE_WM_EN;
5839         level->ignore_lines = val & PLANE_WM_IGNORE_LINES;
5840         level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
5841         level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
5842                 PLANE_WM_LINES_MASK;
5843 }
5844
5845 void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,
5846                               struct skl_pipe_wm *out)
5847 {
5848         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5849         enum pipe pipe = crtc->pipe;
5850         int level, max_level;
5851         enum plane_id plane_id;
5852         u32 val;
5853
5854         max_level = ilk_wm_max_level(dev_priv);
5855
5856         for_each_plane_id_on_crtc(crtc, plane_id) {
5857                 struct skl_plane_wm *wm = &out->planes[plane_id];
5858
5859                 for (level = 0; level <= max_level; level++) {
5860                         if (plane_id != PLANE_CURSOR)
5861                                 val = I915_READ(PLANE_WM(pipe, plane_id, level));
5862                         else
5863                                 val = I915_READ(CUR_WM(pipe, level));
5864
5865                         skl_wm_level_from_reg_val(val, &wm->wm[level]);
5866                 }
5867
5868                 if (plane_id != PLANE_CURSOR)
5869                         val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
5870                 else
5871                         val = I915_READ(CUR_WM_TRANS(pipe));
5872
5873                 skl_wm_level_from_reg_val(val, &wm->trans_wm);
5874         }
5875
5876         if (!crtc->active)
5877                 return;
5878 }
5879
5880 void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
5881 {
5882         struct intel_crtc *crtc;
5883         struct intel_crtc_state *crtc_state;
5884
5885         skl_ddb_get_hw_state(dev_priv);
5886         for_each_intel_crtc(&dev_priv->drm, crtc) {
5887                 crtc_state = to_intel_crtc_state(crtc->base.state);
5888
5889                 skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal);
5890         }
5891
5892         if (dev_priv->active_pipes) {
5893                 /* Fully recompute DDB on first atomic commit */
5894                 dev_priv->wm.distrust_bios_wm = true;
5895         }
5896 }
5897
5898 static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc)
5899 {
5900         struct drm_device *dev = crtc->base.dev;
5901         struct drm_i915_private *dev_priv = to_i915(dev);
5902         struct ilk_wm_values *hw = &dev_priv->wm.hw;
5903         struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state);
5904         struct intel_pipe_wm *active = &crtc_state->wm.ilk.optimal;
5905         enum pipe pipe = crtc->pipe;
5906         static const i915_reg_t wm0_pipe_reg[] = {
5907                 [PIPE_A] = WM0_PIPEA_ILK,
5908                 [PIPE_B] = WM0_PIPEB_ILK,
5909                 [PIPE_C] = WM0_PIPEC_IVB,
5910         };
5911
5912         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
5913
5914         memset(active, 0, sizeof(*active));
5915
5916         active->pipe_enabled = crtc->active;
5917
5918         if (active->pipe_enabled) {
5919                 u32 tmp = hw->wm_pipe[pipe];
5920
5921                 /*
5922                  * For active pipes LP0 watermark is marked as
5923                  * enabled, and LP1+ watermaks as disabled since
5924                  * we can't really reverse compute them in case
5925                  * multiple pipes are active.
5926                  */
5927                 active->wm[0].enable = true;
5928                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
5929                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
5930                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
5931         } else {
5932                 int level, max_level = ilk_wm_max_level(dev_priv);
5933
5934                 /*
5935                  * For inactive pipes, all watermark levels
5936                  * should be marked as enabled but zeroed,
5937                  * which is what we'd compute them to.
5938                  */
5939                 for (level = 0; level <= max_level; level++)
5940                         active->wm[level].enable = true;
5941         }
5942
5943         crtc->wm.active.ilk = *active;
5944 }
5945
5946 #define _FW_WM(value, plane) \
5947         (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5948 #define _FW_WM_VLV(value, plane) \
5949         (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5950
5951 static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5952                                struct g4x_wm_values *wm)
5953 {
5954         u32 tmp;
5955
5956         tmp = I915_READ(DSPFW1);
5957         wm->sr.plane = _FW_WM(tmp, SR);
5958         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5959         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5960         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5961
5962         tmp = I915_READ(DSPFW2);
5963         wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5964         wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5965         wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5966         wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5967         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5968         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5969
5970         tmp = I915_READ(DSPFW3);
5971         wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5972         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5973         wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5974         wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5975 }
5976
5977 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
5978                                struct vlv_wm_values *wm)
5979 {
5980         enum pipe pipe;
5981         u32 tmp;
5982
5983         for_each_pipe(dev_priv, pipe) {
5984                 tmp = I915_READ(VLV_DDL(pipe));
5985
5986                 wm->ddl[pipe].plane[PLANE_PRIMARY] =
5987                         (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5988                 wm->ddl[pipe].plane[PLANE_CURSOR] =
5989                         (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5990                 wm->ddl[pipe].plane[PLANE_SPRITE0] =
5991                         (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5992                 wm->ddl[pipe].plane[PLANE_SPRITE1] =
5993                         (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5994         }
5995
5996         tmp = I915_READ(DSPFW1);
5997         wm->sr.plane = _FW_WM(tmp, SR);
5998         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5999         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
6000         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
6001
6002         tmp = I915_READ(DSPFW2);
6003         wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
6004         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
6005         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
6006
6007         tmp = I915_READ(DSPFW3);
6008         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
6009
6010         if (IS_CHERRYVIEW(dev_priv)) {
6011                 tmp = I915_READ(DSPFW7_CHV);
6012                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
6013                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
6014
6015                 tmp = I915_READ(DSPFW8_CHV);
6016                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
6017                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
6018
6019                 tmp = I915_READ(DSPFW9_CHV);
6020                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
6021                 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
6022
6023                 tmp = I915_READ(DSPHOWM);
6024                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
6025                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
6026                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
6027                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
6028                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
6029                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
6030                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
6031                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
6032                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
6033                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
6034         } else {
6035                 tmp = I915_READ(DSPFW7);
6036                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
6037                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
6038
6039                 tmp = I915_READ(DSPHOWM);
6040                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
6041                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
6042                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
6043                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
6044                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
6045                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
6046                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
6047         }
6048 }
6049
6050 #undef _FW_WM
6051 #undef _FW_WM_VLV
6052
6053 void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv)
6054 {
6055         struct g4x_wm_values *wm = &dev_priv->wm.g4x;
6056         struct intel_crtc *crtc;
6057
6058         g4x_read_wm_values(dev_priv, wm);
6059
6060         wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
6061
6062         for_each_intel_crtc(&dev_priv->drm, crtc) {
6063                 struct intel_crtc_state *crtc_state =
6064                         to_intel_crtc_state(crtc->base.state);
6065                 struct g4x_wm_state *active = &crtc->wm.active.g4x;
6066                 struct g4x_pipe_wm *raw;
6067                 enum pipe pipe = crtc->pipe;
6068                 enum plane_id plane_id;
6069                 int level, max_level;
6070
6071                 active->cxsr = wm->cxsr;
6072                 active->hpll_en = wm->hpll_en;
6073                 active->fbc_en = wm->fbc_en;
6074
6075                 active->sr = wm->sr;
6076                 active->hpll = wm->hpll;
6077
6078                 for_each_plane_id_on_crtc(crtc, plane_id) {
6079                         active->wm.plane[plane_id] =
6080                                 wm->pipe[pipe].plane[plane_id];
6081                 }
6082
6083                 if (wm->cxsr && wm->hpll_en)
6084                         max_level = G4X_WM_LEVEL_HPLL;
6085                 else if (wm->cxsr)
6086                         max_level = G4X_WM_LEVEL_SR;
6087                 else
6088                         max_level = G4X_WM_LEVEL_NORMAL;
6089
6090                 level = G4X_WM_LEVEL_NORMAL;
6091                 raw = &crtc_state->wm.g4x.raw[level];
6092                 for_each_plane_id_on_crtc(crtc, plane_id)
6093                         raw->plane[plane_id] = active->wm.plane[plane_id];
6094
6095                 if (++level > max_level)
6096                         goto out;
6097
6098                 raw = &crtc_state->wm.g4x.raw[level];
6099                 raw->plane[PLANE_PRIMARY] = active->sr.plane;
6100                 raw->plane[PLANE_CURSOR] = active->sr.cursor;
6101                 raw->plane[PLANE_SPRITE0] = 0;
6102                 raw->fbc = active->sr.fbc;
6103
6104                 if (++level > max_level)
6105                         goto out;
6106
6107                 raw = &crtc_state->wm.g4x.raw[level];
6108                 raw->plane[PLANE_PRIMARY] = active->hpll.plane;
6109                 raw->plane[PLANE_CURSOR] = active->hpll.cursor;
6110                 raw->plane[PLANE_SPRITE0] = 0;
6111                 raw->fbc = active->hpll.fbc;
6112
6113         out:
6114                 for_each_plane_id_on_crtc(crtc, plane_id)
6115                         g4x_raw_plane_wm_set(crtc_state, level,
6116                                              plane_id, USHRT_MAX);
6117                 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
6118
6119                 crtc_state->wm.g4x.optimal = *active;
6120                 crtc_state->wm.g4x.intermediate = *active;
6121
6122                 drm_dbg_kms(&dev_priv->drm,
6123                             "Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
6124                             pipe_name(pipe),
6125                             wm->pipe[pipe].plane[PLANE_PRIMARY],
6126                             wm->pipe[pipe].plane[PLANE_CURSOR],
6127                             wm->pipe[pipe].plane[PLANE_SPRITE0]);
6128         }
6129
6130         drm_dbg_kms(&dev_priv->drm,
6131                     "Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
6132                     wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
6133         drm_dbg_kms(&dev_priv->drm,
6134                     "Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
6135                     wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
6136         drm_dbg_kms(&dev_priv->drm, "Initial SR=%s HPLL=%s FBC=%s\n",
6137                     yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
6138 }
6139
6140 void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
6141 {
6142         struct intel_plane *plane;
6143         struct intel_crtc *crtc;
6144
6145         mutex_lock(&dev_priv->wm.wm_mutex);
6146
6147         for_each_intel_plane(&dev_priv->drm, plane) {
6148                 struct intel_crtc *crtc =
6149                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6150                 struct intel_crtc_state *crtc_state =
6151                         to_intel_crtc_state(crtc->base.state);
6152                 struct intel_plane_state *plane_state =
6153                         to_intel_plane_state(plane->base.state);
6154                 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
6155                 enum plane_id plane_id = plane->id;
6156                 int level;
6157
6158                 if (plane_state->uapi.visible)
6159                         continue;
6160
6161                 for (level = 0; level < 3; level++) {
6162                         struct g4x_pipe_wm *raw =
6163                                 &crtc_state->wm.g4x.raw[level];
6164
6165                         raw->plane[plane_id] = 0;
6166                         wm_state->wm.plane[plane_id] = 0;
6167                 }
6168
6169                 if (plane_id == PLANE_PRIMARY) {
6170                         for (level = 0; level < 3; level++) {
6171                                 struct g4x_pipe_wm *raw =
6172                                         &crtc_state->wm.g4x.raw[level];
6173                                 raw->fbc = 0;
6174                         }
6175
6176                         wm_state->sr.fbc = 0;
6177                         wm_state->hpll.fbc = 0;
6178                         wm_state->fbc_en = false;
6179                 }
6180         }
6181
6182         for_each_intel_crtc(&dev_priv->drm, crtc) {
6183                 struct intel_crtc_state *crtc_state =
6184                         to_intel_crtc_state(crtc->base.state);
6185
6186                 crtc_state->wm.g4x.intermediate =
6187                         crtc_state->wm.g4x.optimal;
6188                 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
6189         }
6190
6191         g4x_program_watermarks(dev_priv);
6192
6193         mutex_unlock(&dev_priv->wm.wm_mutex);
6194 }
6195
6196 void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
6197 {
6198         struct vlv_wm_values *wm = &dev_priv->wm.vlv;
6199         struct intel_crtc *crtc;
6200         u32 val;
6201
6202         vlv_read_wm_values(dev_priv, wm);
6203
6204         wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
6205         wm->level = VLV_WM_LEVEL_PM2;
6206
6207         if (IS_CHERRYVIEW(dev_priv)) {
6208                 vlv_punit_get(dev_priv);
6209
6210                 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
6211                 if (val & DSP_MAXFIFO_PM5_ENABLE)
6212                         wm->level = VLV_WM_LEVEL_PM5;
6213
6214                 /*
6215                  * If DDR DVFS is disabled in the BIOS, Punit
6216                  * will never ack the request. So if that happens
6217                  * assume we don't have to enable/disable DDR DVFS
6218                  * dynamically. To test that just set the REQ_ACK
6219                  * bit to poke the Punit, but don't change the
6220                  * HIGH/LOW bits so that we don't actually change
6221                  * the current state.
6222                  */
6223                 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
6224                 val |= FORCE_DDR_FREQ_REQ_ACK;
6225                 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
6226
6227                 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
6228                               FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
6229                         drm_dbg_kms(&dev_priv->drm,
6230                                     "Punit not acking DDR DVFS request, "
6231                                     "assuming DDR DVFS is disabled\n");
6232                         dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
6233                 } else {
6234                         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
6235                         if ((val & FORCE_DDR_HIGH_FREQ) == 0)
6236                                 wm->level = VLV_WM_LEVEL_DDR_DVFS;
6237                 }
6238
6239                 vlv_punit_put(dev_priv);
6240         }
6241
6242         for_each_intel_crtc(&dev_priv->drm, crtc) {
6243                 struct intel_crtc_state *crtc_state =
6244                         to_intel_crtc_state(crtc->base.state);
6245                 struct vlv_wm_state *active = &crtc->wm.active.vlv;
6246                 const struct vlv_fifo_state *fifo_state =
6247                         &crtc_state->wm.vlv.fifo_state;
6248                 enum pipe pipe = crtc->pipe;
6249                 enum plane_id plane_id;
6250                 int level;
6251
6252                 vlv_get_fifo_size(crtc_state);
6253
6254                 active->num_levels = wm->level + 1;
6255                 active->cxsr = wm->cxsr;
6256
6257                 for (level = 0; level < active->num_levels; level++) {
6258                         struct g4x_pipe_wm *raw =
6259                                 &crtc_state->wm.vlv.raw[level];
6260
6261                         active->sr[level].plane = wm->sr.plane;
6262                         active->sr[level].cursor = wm->sr.cursor;
6263
6264                         for_each_plane_id_on_crtc(crtc, plane_id) {
6265                                 active->wm[level].plane[plane_id] =
6266                                         wm->pipe[pipe].plane[plane_id];
6267
6268                                 raw->plane[plane_id] =
6269                                         vlv_invert_wm_value(active->wm[level].plane[plane_id],
6270                                                             fifo_state->plane[plane_id]);
6271                         }
6272                 }
6273
6274                 for_each_plane_id_on_crtc(crtc, plane_id)
6275                         vlv_raw_plane_wm_set(crtc_state, level,
6276                                              plane_id, USHRT_MAX);
6277                 vlv_invalidate_wms(crtc, active, level);
6278
6279                 crtc_state->wm.vlv.optimal = *active;
6280                 crtc_state->wm.vlv.intermediate = *active;
6281
6282                 drm_dbg_kms(&dev_priv->drm,
6283                             "Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
6284                             pipe_name(pipe),
6285                             wm->pipe[pipe].plane[PLANE_PRIMARY],
6286                             wm->pipe[pipe].plane[PLANE_CURSOR],
6287                             wm->pipe[pipe].plane[PLANE_SPRITE0],
6288                             wm->pipe[pipe].plane[PLANE_SPRITE1]);
6289         }
6290
6291         drm_dbg_kms(&dev_priv->drm,
6292                     "Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
6293                     wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
6294 }
6295
6296 void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
6297 {
6298         struct intel_plane *plane;
6299         struct intel_crtc *crtc;
6300
6301         mutex_lock(&dev_priv->wm.wm_mutex);
6302
6303         for_each_intel_plane(&dev_priv->drm, plane) {
6304                 struct intel_crtc *crtc =
6305                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6306                 struct intel_crtc_state *crtc_state =
6307                         to_intel_crtc_state(crtc->base.state);
6308                 struct intel_plane_state *plane_state =
6309                         to_intel_plane_state(plane->base.state);
6310                 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
6311                 const struct vlv_fifo_state *fifo_state =
6312                         &crtc_state->wm.vlv.fifo_state;
6313                 enum plane_id plane_id = plane->id;
6314                 int level;
6315
6316                 if (plane_state->uapi.visible)
6317                         continue;
6318
6319                 for (level = 0; level < wm_state->num_levels; level++) {
6320                         struct g4x_pipe_wm *raw =
6321                                 &crtc_state->wm.vlv.raw[level];
6322
6323                         raw->plane[plane_id] = 0;
6324
6325                         wm_state->wm[level].plane[plane_id] =
6326                                 vlv_invert_wm_value(raw->plane[plane_id],
6327                                                     fifo_state->plane[plane_id]);
6328                 }
6329         }
6330
6331         for_each_intel_crtc(&dev_priv->drm, crtc) {
6332                 struct intel_crtc_state *crtc_state =
6333                         to_intel_crtc_state(crtc->base.state);
6334
6335                 crtc_state->wm.vlv.intermediate =
6336                         crtc_state->wm.vlv.optimal;
6337                 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
6338         }
6339
6340         vlv_program_watermarks(dev_priv);
6341
6342         mutex_unlock(&dev_priv->wm.wm_mutex);
6343 }
6344
6345 /*
6346  * FIXME should probably kill this and improve
6347  * the real watermark readout/sanitation instead
6348  */
6349 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
6350 {
6351         I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6352         I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6353         I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6354
6355         /*
6356          * Don't touch WM1S_LP_EN here.
6357          * Doing so could cause underruns.
6358          */
6359 }
6360
6361 void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv)
6362 {
6363         struct ilk_wm_values *hw = &dev_priv->wm.hw;
6364         struct intel_crtc *crtc;
6365
6366         ilk_init_lp_watermarks(dev_priv);
6367
6368         for_each_intel_crtc(&dev_priv->drm, crtc)
6369                 ilk_pipe_wm_get_hw_state(crtc);
6370
6371         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
6372         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
6373         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
6374
6375         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
6376         if (INTEL_GEN(dev_priv) >= 7) {
6377                 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
6378                 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
6379         }
6380
6381         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6382                 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
6383                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6384         else if (IS_IVYBRIDGE(dev_priv))
6385                 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
6386                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6387
6388         hw->enable_fbc_wm =
6389                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
6390 }
6391
6392 /**
6393  * intel_update_watermarks - update FIFO watermark values based on current modes
6394  * @crtc: the #intel_crtc on which to compute the WM
6395  *
6396  * Calculate watermark values for the various WM regs based on current mode
6397  * and plane configuration.
6398  *
6399  * There are several cases to deal with here:
6400  *   - normal (i.e. non-self-refresh)
6401  *   - self-refresh (SR) mode
6402  *   - lines are large relative to FIFO size (buffer can hold up to 2)
6403  *   - lines are small relative to FIFO size (buffer can hold more than 2
6404  *     lines), so need to account for TLB latency
6405  *
6406  *   The normal calculation is:
6407  *     watermark = dotclock * bytes per pixel * latency
6408  *   where latency is platform & configuration dependent (we assume pessimal
6409  *   values here).
6410  *
6411  *   The SR calculation is:
6412  *     watermark = (trunc(latency/line time)+1) * surface width *
6413  *       bytes per pixel
6414  *   where
6415  *     line time = htotal / dotclock
6416  *     surface width = hdisplay for normal plane and 64 for cursor
6417  *   and latency is assumed to be high, as above.
6418  *
6419  * The final value programmed to the register should always be rounded up,
6420  * and include an extra 2 entries to account for clock crossings.
6421  *
6422  * We don't use the sprite, so we can ignore that.  And on Crestline we have
6423  * to set the non-SR watermarks to 8.
6424  */
6425 void intel_update_watermarks(struct intel_crtc *crtc)
6426 {
6427         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
6428
6429         if (dev_priv->display.update_wm)
6430                 dev_priv->display.update_wm(crtc);
6431 }
6432
6433 void intel_enable_ipc(struct drm_i915_private *dev_priv)
6434 {
6435         u32 val;
6436
6437         if (!HAS_IPC(dev_priv))
6438                 return;
6439
6440         val = I915_READ(DISP_ARB_CTL2);
6441
6442         if (dev_priv->ipc_enabled)
6443                 val |= DISP_IPC_ENABLE;
6444         else
6445                 val &= ~DISP_IPC_ENABLE;
6446
6447         I915_WRITE(DISP_ARB_CTL2, val);
6448 }
6449
6450 static bool intel_can_enable_ipc(struct drm_i915_private *dev_priv)
6451 {
6452         /* Display WA #0477 WaDisableIPC: skl */
6453         if (IS_SKYLAKE(dev_priv))
6454                 return false;
6455
6456         /* Display WA #1141: SKL:all KBL:all CFL */
6457         if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv))
6458                 return dev_priv->dram_info.symmetric_memory;
6459
6460         return true;
6461 }
6462
6463 void intel_init_ipc(struct drm_i915_private *dev_priv)
6464 {
6465         if (!HAS_IPC(dev_priv))
6466                 return;
6467
6468         dev_priv->ipc_enabled = intel_can_enable_ipc(dev_priv);
6469
6470         intel_enable_ipc(dev_priv);
6471 }
6472
6473 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
6474 {
6475         /*
6476          * On Ibex Peak and Cougar Point, we need to disable clock
6477          * gating for the panel power sequencer or it will fail to
6478          * start up when no ports are active.
6479          */
6480         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
6481 }
6482
6483 static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
6484 {
6485         enum pipe pipe;
6486
6487         for_each_pipe(dev_priv, pipe) {
6488                 I915_WRITE(DSPCNTR(pipe),
6489                            I915_READ(DSPCNTR(pipe)) |
6490                            DISPPLANE_TRICKLE_FEED_DISABLE);
6491
6492                 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
6493                 POSTING_READ(DSPSURF(pipe));
6494         }
6495 }
6496
6497 static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
6498 {
6499         u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6500
6501         /*
6502          * Required for FBC
6503          * WaFbcDisableDpfcClockGating:ilk
6504          */
6505         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
6506                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
6507                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
6508
6509         I915_WRITE(PCH_3DCGDIS0,
6510                    MARIUNIT_CLOCK_GATE_DISABLE |
6511                    SVSMUNIT_CLOCK_GATE_DISABLE);
6512         I915_WRITE(PCH_3DCGDIS1,
6513                    VFMUNIT_CLOCK_GATE_DISABLE);
6514
6515         /*
6516          * According to the spec the following bits should be set in
6517          * order to enable memory self-refresh
6518          * The bit 22/21 of 0x42004
6519          * The bit 5 of 0x42020
6520          * The bit 15 of 0x45000
6521          */
6522         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6523                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
6524                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
6525         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
6526         I915_WRITE(DISP_ARB_CTL,
6527                    (I915_READ(DISP_ARB_CTL) |
6528                     DISP_FBC_WM_DIS));
6529
6530         /*
6531          * Based on the document from hardware guys the following bits
6532          * should be set unconditionally in order to enable FBC.
6533          * The bit 22 of 0x42000
6534          * The bit 22 of 0x42004
6535          * The bit 7,8,9 of 0x42020.
6536          */
6537         if (IS_IRONLAKE_M(dev_priv)) {
6538                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
6539                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
6540                            I915_READ(ILK_DISPLAY_CHICKEN1) |
6541                            ILK_FBCQ_DIS);
6542                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6543                            I915_READ(ILK_DISPLAY_CHICKEN2) |
6544                            ILK_DPARB_GATE);
6545         }
6546
6547         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6548
6549         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6550                    I915_READ(ILK_DISPLAY_CHICKEN2) |
6551                    ILK_ELPIN_409_SELECT);
6552         I915_WRITE(_3D_CHICKEN2,
6553                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
6554                    _3D_CHICKEN2_WM_READ_PIPELINED);
6555
6556         /* WaDisableRenderCachePipelinedFlush:ilk */
6557         I915_WRITE(CACHE_MODE_0,
6558                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
6559
6560         /* WaDisable_RenderCache_OperationalFlush:ilk */
6561         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6562
6563         g4x_disable_trickle_feed(dev_priv);
6564
6565         ibx_init_clock_gating(dev_priv);
6566 }
6567
6568 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
6569 {
6570         enum pipe pipe;
6571         u32 val;
6572
6573         /*
6574          * On Ibex Peak and Cougar Point, we need to disable clock
6575          * gating for the panel power sequencer or it will fail to
6576          * start up when no ports are active.
6577          */
6578         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
6579                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
6580                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
6581         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
6582                    DPLS_EDP_PPS_FIX_DIS);
6583         /* The below fixes the weird display corruption, a few pixels shifted
6584          * downward, on (only) LVDS of some HP laptops with IVY.
6585          */
6586         for_each_pipe(dev_priv, pipe) {
6587                 val = I915_READ(TRANS_CHICKEN2(pipe));
6588                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
6589                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6590                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
6591                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6592                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
6593                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
6594                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
6595         }
6596         /* WADP0ClockGatingDisable */
6597         for_each_pipe(dev_priv, pipe) {
6598                 I915_WRITE(TRANS_CHICKEN1(pipe),
6599                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6600         }
6601 }
6602
6603 static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
6604 {
6605         u32 tmp;
6606
6607         tmp = I915_READ(MCH_SSKPD);
6608         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
6609                 drm_dbg_kms(&dev_priv->drm,
6610                             "Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
6611                             tmp);
6612 }
6613
6614 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
6615 {
6616         u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6617
6618         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6619
6620         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6621                    I915_READ(ILK_DISPLAY_CHICKEN2) |
6622                    ILK_ELPIN_409_SELECT);
6623
6624         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
6625         I915_WRITE(_3D_CHICKEN,
6626                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
6627
6628         /* WaDisable_RenderCache_OperationalFlush:snb */
6629         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6630
6631         /*
6632          * BSpec recoomends 8x4 when MSAA is used,
6633          * however in practice 16x4 seems fastest.
6634          *
6635          * Note that PS/WM thread counts depend on the WIZ hashing
6636          * disable bit, which we don't touch here, but it's good
6637          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6638          */
6639         I915_WRITE(GEN6_GT_MODE,
6640                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6641
6642         I915_WRITE(CACHE_MODE_0,
6643                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
6644
6645         I915_WRITE(GEN6_UCGCTL1,
6646                    I915_READ(GEN6_UCGCTL1) |
6647                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
6648                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6649
6650         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
6651          * gating disable must be set.  Failure to set it results in
6652          * flickering pixels due to Z write ordering failures after
6653          * some amount of runtime in the Mesa "fire" demo, and Unigine
6654          * Sanctuary and Tropics, and apparently anything else with
6655          * alpha test or pixel discard.
6656          *
6657          * According to the spec, bit 11 (RCCUNIT) must also be set,
6658          * but we didn't debug actual testcases to find it out.
6659          *
6660          * WaDisableRCCUnitClockGating:snb
6661          * WaDisableRCPBUnitClockGating:snb
6662          */
6663         I915_WRITE(GEN6_UCGCTL2,
6664                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
6665                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
6666
6667         /* WaStripsFansDisableFastClipPerformanceFix:snb */
6668         I915_WRITE(_3D_CHICKEN3,
6669                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
6670
6671         /*
6672          * Bspec says:
6673          * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
6674          * 3DSTATE_SF number of SF output attributes is more than 16."
6675          */
6676         I915_WRITE(_3D_CHICKEN3,
6677                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
6678
6679         /*
6680          * According to the spec the following bits should be
6681          * set in order to enable memory self-refresh and fbc:
6682          * The bit21 and bit22 of 0x42000
6683          * The bit21 and bit22 of 0x42004
6684          * The bit5 and bit7 of 0x42020
6685          * The bit14 of 0x70180
6686          * The bit14 of 0x71180
6687          *
6688          * WaFbcAsynchFlipDisableFbcQueue:snb
6689          */
6690         I915_WRITE(ILK_DISPLAY_CHICKEN1,
6691                    I915_READ(ILK_DISPLAY_CHICKEN1) |
6692                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
6693         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6694                    I915_READ(ILK_DISPLAY_CHICKEN2) |
6695                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
6696         I915_WRITE(ILK_DSPCLK_GATE_D,
6697                    I915_READ(ILK_DSPCLK_GATE_D) |
6698                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
6699                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
6700
6701         g4x_disable_trickle_feed(dev_priv);
6702
6703         cpt_init_clock_gating(dev_priv);
6704
6705         gen6_check_mch_setup(dev_priv);
6706 }
6707
6708 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
6709 {
6710         u32 reg = I915_READ(GEN7_FF_THREAD_MODE);
6711
6712         /*
6713          * WaVSThreadDispatchOverride:ivb,vlv
6714          *
6715          * This actually overrides the dispatch
6716          * mode for all thread types.
6717          */
6718         reg &= ~GEN7_FF_SCHED_MASK;
6719         reg |= GEN7_FF_TS_SCHED_HW;
6720         reg |= GEN7_FF_VS_SCHED_HW;
6721         reg |= GEN7_FF_DS_SCHED_HW;
6722
6723         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
6724 }
6725
6726 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
6727 {
6728         /*
6729          * TODO: this bit should only be enabled when really needed, then
6730          * disabled when not needed anymore in order to save power.
6731          */
6732         if (HAS_PCH_LPT_LP(dev_priv))
6733                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
6734                            I915_READ(SOUTH_DSPCLK_GATE_D) |
6735                            PCH_LP_PARTITION_LEVEL_DISABLE);
6736
6737         /* WADPOClockGatingDisable:hsw */
6738         I915_WRITE(TRANS_CHICKEN1(PIPE_A),
6739                    I915_READ(TRANS_CHICKEN1(PIPE_A)) |
6740                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6741 }
6742
6743 static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
6744 {
6745         if (HAS_PCH_LPT_LP(dev_priv)) {
6746                 u32 val = I915_READ(SOUTH_DSPCLK_GATE_D);
6747
6748                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
6749                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
6750         }
6751 }
6752
6753 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
6754                                    int general_prio_credits,
6755                                    int high_prio_credits)
6756 {
6757         u32 misccpctl;
6758         u32 val;
6759
6760         /* WaTempDisableDOPClkGating:bdw */
6761         misccpctl = I915_READ(GEN7_MISCCPCTL);
6762         I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
6763
6764         val = I915_READ(GEN8_L3SQCREG1);
6765         val &= ~L3_PRIO_CREDITS_MASK;
6766         val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
6767         val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
6768         I915_WRITE(GEN8_L3SQCREG1, val);
6769
6770         /*
6771          * Wait at least 100 clocks before re-enabling clock gating.
6772          * See the definition of L3SQCREG1 in BSpec.
6773          */
6774         POSTING_READ(GEN8_L3SQCREG1);
6775         udelay(1);
6776         I915_WRITE(GEN7_MISCCPCTL, misccpctl);
6777 }
6778
6779 static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
6780 {
6781         /* This is not an Wa. Enable to reduce Sampler power */
6782         I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN,
6783                    I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE);
6784
6785         /* WaEnable32PlaneMode:icl */
6786         I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
6787                    _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE));
6788
6789         /*
6790          * Wa_1408615072:icl,ehl  (vsunit)
6791          * Wa_1407596294:icl,ehl  (hsunit)
6792          */
6793         intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE,
6794                          0, VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
6795
6796         /* Wa_1407352427:icl,ehl */
6797         intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE2,
6798                          0, PSDUNIT_CLKGATE_DIS);
6799
6800         /*Wa_14010594013:icl, ehl */
6801         intel_uncore_rmw(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1,
6802                          0, CNL_DELAY_PMRSP);
6803 }
6804
6805 static void tgl_init_clock_gating(struct drm_i915_private *dev_priv)
6806 {
6807         u32 vd_pg_enable = 0;
6808         unsigned int i;
6809
6810         /* Wa_1408615072:tgl */
6811         intel_uncore_rmw(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE2,
6812                          0, VSUNIT_CLKGATE_DIS_TGL);
6813
6814         /* This is not a WA. Enable VD HCP & MFX_ENC powergate */
6815         for (i = 0; i < I915_MAX_VCS; i++) {
6816                 if (HAS_ENGINE(dev_priv, _VCS(i)))
6817                         vd_pg_enable |= VDN_HCP_POWERGATE_ENABLE(i) |
6818                                         VDN_MFX_POWERGATE_ENABLE(i);
6819         }
6820
6821         I915_WRITE(POWERGATE_ENABLE,
6822                    I915_READ(POWERGATE_ENABLE) | vd_pg_enable);
6823
6824         /* Wa_1409825376:tgl (pre-prod)*/
6825         if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0))
6826                 I915_WRITE(GEN9_CLKGATE_DIS_3, I915_READ(GEN9_CLKGATE_DIS_3) |
6827                            TGL_VRH_GATING_DIS);
6828 }
6829
6830 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
6831 {
6832         if (!HAS_PCH_CNP(dev_priv))
6833                 return;
6834
6835         /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
6836         I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
6837                    CNP_PWM_CGE_GATING_DISABLE);
6838 }
6839
6840 static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
6841 {
6842         u32 val;
6843         cnp_init_clock_gating(dev_priv);
6844
6845         /* This is not an Wa. Enable for better image quality */
6846         I915_WRITE(_3D_CHICKEN3,
6847                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
6848
6849         /* WaEnableChickenDCPR:cnl */
6850         I915_WRITE(GEN8_CHICKEN_DCPR_1,
6851                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
6852
6853         /* WaFbcWakeMemOn:cnl */
6854         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
6855                    DISP_FBC_MEMORY_WAKE);
6856
6857         val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);
6858         /* ReadHitWriteOnlyDisable:cnl */
6859         val |= RCCUNIT_CLKGATE_DIS;
6860         /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
6861         if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
6862                 val |= SARBUNIT_CLKGATE_DIS;
6863         I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
6864
6865         /* Wa_2201832410:cnl */
6866         val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE);
6867         val |= GWUNIT_CLKGATE_DIS;
6868         I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val);
6869
6870         /* WaDisableVFclkgate:cnl */
6871         /* WaVFUnitClockGatingDisable:cnl */
6872         val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
6873         val |= VFUNIT_CLKGATE_DIS;
6874         I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
6875 }
6876
6877 static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
6878 {
6879         cnp_init_clock_gating(dev_priv);
6880         gen9_init_clock_gating(dev_priv);
6881
6882         /* WaFbcNukeOnHostModify:cfl */
6883         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
6884                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
6885 }
6886
6887 static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
6888 {
6889         gen9_init_clock_gating(dev_priv);
6890
6891         /* WaDisableSDEUnitClockGating:kbl */
6892         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
6893                 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6894                            GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6895
6896         /* WaDisableGamClockGating:kbl */
6897         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
6898                 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
6899                            GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
6900
6901         /* WaFbcNukeOnHostModify:kbl */
6902         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
6903                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
6904 }
6905
6906 static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
6907 {
6908         gen9_init_clock_gating(dev_priv);
6909
6910         /* WAC6entrylatency:skl */
6911         I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
6912                    FBC_LLC_FULLY_OPEN);
6913
6914         /* WaFbcNukeOnHostModify:skl */
6915         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
6916                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
6917 }
6918
6919 static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
6920 {
6921         enum pipe pipe;
6922
6923         /* WaSwitchSolVfFArbitrationPriority:bdw */
6924         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6925
6926         /* WaPsrDPAMaskVBlankInSRD:bdw */
6927         I915_WRITE(CHICKEN_PAR1_1,
6928                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
6929
6930         /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
6931         for_each_pipe(dev_priv, pipe) {
6932                 I915_WRITE(CHICKEN_PIPESL_1(pipe),
6933                            I915_READ(CHICKEN_PIPESL_1(pipe)) |
6934                            BDW_DPRS_MASK_VBLANK_SRD);
6935         }
6936
6937         /* WaVSRefCountFullforceMissDisable:bdw */
6938         /* WaDSRefCountFullforceMissDisable:bdw */
6939         I915_WRITE(GEN7_FF_THREAD_MODE,
6940                    I915_READ(GEN7_FF_THREAD_MODE) &
6941                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6942
6943         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6944                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6945
6946         /* WaDisableSDEUnitClockGating:bdw */
6947         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6948                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6949
6950         /* WaProgramL3SqcReg1Default:bdw */
6951         gen8_set_l3sqc_credits(dev_priv, 30, 2);
6952
6953         /* WaKVMNotificationOnConfigChange:bdw */
6954         I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
6955                    | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
6956
6957         lpt_init_clock_gating(dev_priv);
6958
6959         /* WaDisableDopClockGating:bdw
6960          *
6961          * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
6962          * clock gating.
6963          */
6964         I915_WRITE(GEN6_UCGCTL1,
6965                    I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
6966 }
6967
6968 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
6969 {
6970         /* L3 caching of data atomics doesn't work -- disable it. */
6971         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
6972         I915_WRITE(HSW_ROW_CHICKEN3,
6973                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
6974
6975         /* This is required by WaCatErrorRejectionIssue:hsw */
6976         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6977                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6978                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6979
6980         /* WaVSRefCountFullforceMissDisable:hsw */
6981         I915_WRITE(GEN7_FF_THREAD_MODE,
6982                    I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
6983
6984         /* WaDisable_RenderCache_OperationalFlush:hsw */
6985         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6986
6987         /* enable HiZ Raw Stall Optimization */
6988         I915_WRITE(CACHE_MODE_0_GEN7,
6989                    _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6990
6991         /* WaDisable4x2SubspanOptimization:hsw */
6992         I915_WRITE(CACHE_MODE_1,
6993                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6994
6995         /*
6996          * BSpec recommends 8x4 when MSAA is used,
6997          * however in practice 16x4 seems fastest.
6998          *
6999          * Note that PS/WM thread counts depend on the WIZ hashing
7000          * disable bit, which we don't touch here, but it's good
7001          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
7002          */
7003         I915_WRITE(GEN7_GT_MODE,
7004                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
7005
7006         /* WaSampleCChickenBitEnable:hsw */
7007         I915_WRITE(HALF_SLICE_CHICKEN3,
7008                    _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
7009
7010         /* WaSwitchSolVfFArbitrationPriority:hsw */
7011         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
7012
7013         lpt_init_clock_gating(dev_priv);
7014 }
7015
7016 static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
7017 {
7018         u32 snpcr;
7019
7020         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
7021
7022         /* WaDisableEarlyCull:ivb */
7023         I915_WRITE(_3D_CHICKEN3,
7024                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
7025
7026         /* WaDisableBackToBackFlipFix:ivb */
7027         I915_WRITE(IVB_CHICKEN3,
7028                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
7029                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
7030
7031         /* WaDisablePSDDualDispatchEnable:ivb */
7032         if (IS_IVB_GT1(dev_priv))
7033                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
7034                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
7035
7036         /* WaDisable_RenderCache_OperationalFlush:ivb */
7037         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7038
7039         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
7040         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
7041                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
7042
7043         /* WaApplyL3ControlAndL3ChickenMode:ivb */
7044         I915_WRITE(GEN7_L3CNTLREG1,
7045                         GEN7_WA_FOR_GEN7_L3_CONTROL);
7046         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
7047                    GEN7_WA_L3_CHICKEN_MODE);
7048         if (IS_IVB_GT1(dev_priv))
7049                 I915_WRITE(GEN7_ROW_CHICKEN2,
7050                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
7051         else {
7052                 /* must write both registers */
7053                 I915_WRITE(GEN7_ROW_CHICKEN2,
7054                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
7055                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
7056                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
7057         }
7058
7059         /* WaForceL3Serialization:ivb */
7060         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
7061                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
7062
7063         /*
7064          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
7065          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
7066          */
7067         I915_WRITE(GEN6_UCGCTL2,
7068                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
7069
7070         /* This is required by WaCatErrorRejectionIssue:ivb */
7071         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
7072                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
7073                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
7074
7075         g4x_disable_trickle_feed(dev_priv);
7076
7077         gen7_setup_fixed_func_scheduler(dev_priv);
7078
7079         if (0) { /* causes HiZ corruption on ivb:gt1 */
7080                 /* enable HiZ Raw Stall Optimization */
7081                 I915_WRITE(CACHE_MODE_0_GEN7,
7082                            _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
7083         }
7084
7085         /* WaDisable4x2SubspanOptimization:ivb */
7086         I915_WRITE(CACHE_MODE_1,
7087                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
7088
7089         /*
7090          * BSpec recommends 8x4 when MSAA is used,
7091          * however in practice 16x4 seems fastest.
7092          *
7093          * Note that PS/WM thread counts depend on the WIZ hashing
7094          * disable bit, which we don't touch here, but it's good
7095          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
7096          */
7097         I915_WRITE(GEN7_GT_MODE,
7098                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
7099
7100         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
7101         snpcr &= ~GEN6_MBC_SNPCR_MASK;
7102         snpcr |= GEN6_MBC_SNPCR_MED;
7103         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
7104
7105         if (!HAS_PCH_NOP(dev_priv))
7106                 cpt_init_clock_gating(dev_priv);
7107
7108         gen6_check_mch_setup(dev_priv);
7109 }
7110
7111 static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
7112 {
7113         /* WaDisableEarlyCull:vlv */
7114         I915_WRITE(_3D_CHICKEN3,
7115                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
7116
7117         /* WaDisableBackToBackFlipFix:vlv */
7118         I915_WRITE(IVB_CHICKEN3,
7119                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
7120                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
7121
7122         /* WaPsdDispatchEnable:vlv */
7123         /* WaDisablePSDDualDispatchEnable:vlv */
7124         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
7125                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
7126                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
7127
7128         /* WaDisable_RenderCache_OperationalFlush:vlv */
7129         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7130
7131         /* WaForceL3Serialization:vlv */
7132         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
7133                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
7134
7135         /* WaDisableDopClockGating:vlv */
7136         I915_WRITE(GEN7_ROW_CHICKEN2,
7137                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
7138
7139         /* This is required by WaCatErrorRejectionIssue:vlv */
7140         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
7141                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
7142                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
7143
7144         gen7_setup_fixed_func_scheduler(dev_priv);
7145
7146         /*
7147          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
7148          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
7149          */
7150         I915_WRITE(GEN6_UCGCTL2,
7151                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
7152
7153         /* WaDisableL3Bank2xClockGate:vlv
7154          * Disabling L3 clock gating- MMIO 940c[25] = 1
7155          * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
7156         I915_WRITE(GEN7_UCGCTL4,
7157                    I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
7158
7159         /*
7160          * BSpec says this must be set, even though
7161          * WaDisable4x2SubspanOptimization isn't listed for VLV.
7162          */
7163         I915_WRITE(CACHE_MODE_1,
7164                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
7165
7166         /*
7167          * BSpec recommends 8x4 when MSAA is used,
7168          * however in practice 16x4 seems fastest.
7169          *
7170          * Note that PS/WM thread counts depend on the WIZ hashing
7171          * disable bit, which we don't touch here, but it's good
7172          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
7173          */
7174         I915_WRITE(GEN7_GT_MODE,
7175                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
7176
7177         /*
7178          * WaIncreaseL3CreditsForVLVB0:vlv
7179          * This is the hardware default actually.
7180          */
7181         I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
7182
7183         /*
7184          * WaDisableVLVClockGating_VBIIssue:vlv
7185          * Disable clock gating on th GCFG unit to prevent a delay
7186          * in the reporting of vblank events.
7187          */
7188         I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
7189 }
7190
7191 static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
7192 {
7193         /* WaVSRefCountFullforceMissDisable:chv */
7194         /* WaDSRefCountFullforceMissDisable:chv */
7195         I915_WRITE(GEN7_FF_THREAD_MODE,
7196                    I915_READ(GEN7_FF_THREAD_MODE) &
7197                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
7198
7199         /* WaDisableSemaphoreAndSyncFlipWait:chv */
7200         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
7201                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
7202
7203         /* WaDisableCSUnitClockGating:chv */
7204         I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
7205                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
7206
7207         /* WaDisableSDEUnitClockGating:chv */
7208         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
7209                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
7210
7211         /*
7212          * WaProgramL3SqcReg1Default:chv
7213          * See gfxspecs/Related Documents/Performance Guide/
7214          * LSQC Setting Recommendations.
7215          */
7216         gen8_set_l3sqc_credits(dev_priv, 38, 2);
7217 }
7218
7219 static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
7220 {
7221         u32 dspclk_gate;
7222
7223         I915_WRITE(RENCLK_GATE_D1, 0);
7224         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
7225                    GS_UNIT_CLOCK_GATE_DISABLE |
7226                    CL_UNIT_CLOCK_GATE_DISABLE);
7227         I915_WRITE(RAMCLK_GATE_D, 0);
7228         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
7229                 OVRUNIT_CLOCK_GATE_DISABLE |
7230                 OVCUNIT_CLOCK_GATE_DISABLE;
7231         if (IS_GM45(dev_priv))
7232                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
7233         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
7234
7235         /* WaDisableRenderCachePipelinedFlush */
7236         I915_WRITE(CACHE_MODE_0,
7237                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
7238
7239         /* WaDisable_RenderCache_OperationalFlush:g4x */
7240         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7241
7242         g4x_disable_trickle_feed(dev_priv);
7243 }
7244
7245 static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
7246 {
7247         struct intel_uncore *uncore = &dev_priv->uncore;
7248
7249         intel_uncore_write(uncore, RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
7250         intel_uncore_write(uncore, RENCLK_GATE_D2, 0);
7251         intel_uncore_write(uncore, DSPCLK_GATE_D, 0);
7252         intel_uncore_write(uncore, RAMCLK_GATE_D, 0);
7253         intel_uncore_write16(uncore, DEUC, 0);
7254         intel_uncore_write(uncore,
7255                            MI_ARB_STATE,
7256                            _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7257
7258         /* WaDisable_RenderCache_OperationalFlush:gen4 */
7259         intel_uncore_write(uncore,
7260                            CACHE_MODE_0,
7261                            _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7262 }
7263
7264 static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
7265 {
7266         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
7267                    I965_RCC_CLOCK_GATE_DISABLE |
7268                    I965_RCPB_CLOCK_GATE_DISABLE |
7269                    I965_ISC_CLOCK_GATE_DISABLE |
7270                    I965_FBC_CLOCK_GATE_DISABLE);
7271         I915_WRITE(RENCLK_GATE_D2, 0);
7272         I915_WRITE(MI_ARB_STATE,
7273                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7274
7275         /* WaDisable_RenderCache_OperationalFlush:gen4 */
7276         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7277 }
7278
7279 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
7280 {
7281         u32 dstate = I915_READ(D_STATE);
7282
7283         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
7284                 DSTATE_DOT_CLOCK_GATING;
7285         I915_WRITE(D_STATE, dstate);
7286
7287         if (IS_PINEVIEW(dev_priv))
7288                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
7289
7290         /* IIR "flip pending" means done if this bit is set */
7291         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
7292
7293         /* interrupts should cause a wake up from C3 */
7294         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
7295
7296         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
7297         I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
7298
7299         I915_WRITE(MI_ARB_STATE,
7300                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7301 }
7302
7303 static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
7304 {
7305         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
7306
7307         /* interrupts should cause a wake up from C3 */
7308         I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
7309                    _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
7310
7311         I915_WRITE(MEM_MODE,
7312                    _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
7313 }
7314
7315 static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
7316 {
7317         I915_WRITE(MEM_MODE,
7318                    _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
7319                    _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
7320 }
7321
7322 void intel_init_clock_gating(struct drm_i915_private *dev_priv)
7323 {
7324         dev_priv->display.init_clock_gating(dev_priv);
7325 }
7326
7327 void intel_suspend_hw(struct drm_i915_private *dev_priv)
7328 {
7329         if (HAS_PCH_LPT(dev_priv))
7330                 lpt_suspend_hw(dev_priv);
7331 }
7332
7333 static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
7334 {
7335         drm_dbg_kms(&dev_priv->drm,
7336                     "No clock gating settings or workarounds applied.\n");
7337 }
7338
7339 /**
7340  * intel_init_clock_gating_hooks - setup the clock gating hooks
7341  * @dev_priv: device private
7342  *
7343  * Setup the hooks that configure which clocks of a given platform can be
7344  * gated and also apply various GT and display specific workarounds for these
7345  * platforms. Note that some GT specific workarounds are applied separately
7346  * when GPU contexts or batchbuffers start their execution.
7347  */
7348 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
7349 {
7350         if (IS_GEN(dev_priv, 12))
7351                 dev_priv->display.init_clock_gating = tgl_init_clock_gating;
7352         else if (IS_GEN(dev_priv, 11))
7353                 dev_priv->display.init_clock_gating = icl_init_clock_gating;
7354         else if (IS_CANNONLAKE(dev_priv))
7355                 dev_priv->display.init_clock_gating = cnl_init_clock_gating;
7356         else if (IS_COFFEELAKE(dev_priv))
7357                 dev_priv->display.init_clock_gating = cfl_init_clock_gating;
7358         else if (IS_SKYLAKE(dev_priv))
7359                 dev_priv->display.init_clock_gating = skl_init_clock_gating;
7360         else if (IS_KABYLAKE(dev_priv))
7361                 dev_priv->display.init_clock_gating = kbl_init_clock_gating;
7362         else if (IS_BROXTON(dev_priv))
7363                 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
7364         else if (IS_GEMINILAKE(dev_priv))
7365                 dev_priv->display.init_clock_gating = glk_init_clock_gating;
7366         else if (IS_BROADWELL(dev_priv))
7367                 dev_priv->display.init_clock_gating = bdw_init_clock_gating;
7368         else if (IS_CHERRYVIEW(dev_priv))
7369                 dev_priv->display.init_clock_gating = chv_init_clock_gating;
7370         else if (IS_HASWELL(dev_priv))
7371                 dev_priv->display.init_clock_gating = hsw_init_clock_gating;
7372         else if (IS_IVYBRIDGE(dev_priv))
7373                 dev_priv->display.init_clock_gating = ivb_init_clock_gating;
7374         else if (IS_VALLEYVIEW(dev_priv))
7375                 dev_priv->display.init_clock_gating = vlv_init_clock_gating;
7376         else if (IS_GEN(dev_priv, 6))
7377                 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
7378         else if (IS_GEN(dev_priv, 5))
7379                 dev_priv->display.init_clock_gating = ilk_init_clock_gating;
7380         else if (IS_G4X(dev_priv))
7381                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
7382         else if (IS_I965GM(dev_priv))
7383                 dev_priv->display.init_clock_gating = i965gm_init_clock_gating;
7384         else if (IS_I965G(dev_priv))
7385                 dev_priv->display.init_clock_gating = i965g_init_clock_gating;
7386         else if (IS_GEN(dev_priv, 3))
7387                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
7388         else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
7389                 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
7390         else if (IS_GEN(dev_priv, 2))
7391                 dev_priv->display.init_clock_gating = i830_init_clock_gating;
7392         else {
7393                 MISSING_CASE(INTEL_DEVID(dev_priv));
7394                 dev_priv->display.init_clock_gating = nop_init_clock_gating;
7395         }
7396 }
7397
7398 /* Set up chip specific power management-related functions */
7399 void intel_init_pm(struct drm_i915_private *dev_priv)
7400 {
7401         /* For cxsr */
7402         if (IS_PINEVIEW(dev_priv))
7403                 pnv_get_mem_freq(dev_priv);
7404         else if (IS_GEN(dev_priv, 5))
7405                 ilk_get_mem_freq(dev_priv);
7406
7407         if (intel_has_sagv(dev_priv))
7408                 skl_setup_sagv_block_time(dev_priv);
7409
7410         /* For FIFO watermark updates */
7411         if (INTEL_GEN(dev_priv) >= 9) {
7412                 skl_setup_wm_latency(dev_priv);
7413                 dev_priv->display.compute_global_watermarks = skl_compute_wm;
7414         } else if (HAS_PCH_SPLIT(dev_priv)) {
7415                 ilk_setup_wm_latency(dev_priv);
7416
7417                 if ((IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[1] &&
7418                      dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
7419                     (!IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[0] &&
7420                      dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
7421                         dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
7422                         dev_priv->display.compute_intermediate_wm =
7423                                 ilk_compute_intermediate_wm;
7424                         dev_priv->display.initial_watermarks =
7425                                 ilk_initial_watermarks;
7426                         dev_priv->display.optimize_watermarks =
7427                                 ilk_optimize_watermarks;
7428                 } else {
7429                         drm_dbg_kms(&dev_priv->drm,
7430                                     "Failed to read display plane latency. "
7431                                     "Disable CxSR\n");
7432                 }
7433         } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
7434                 vlv_setup_wm_latency(dev_priv);
7435                 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
7436                 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
7437                 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
7438                 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
7439                 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
7440         } else if (IS_G4X(dev_priv)) {
7441                 g4x_setup_wm_latency(dev_priv);
7442                 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
7443                 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
7444                 dev_priv->display.initial_watermarks = g4x_initial_watermarks;
7445                 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
7446         } else if (IS_PINEVIEW(dev_priv)) {
7447                 if (!intel_get_cxsr_latency(!IS_MOBILE(dev_priv),
7448                                             dev_priv->is_ddr3,
7449                                             dev_priv->fsb_freq,
7450                                             dev_priv->mem_freq)) {
7451                         drm_info(&dev_priv->drm,
7452                                  "failed to find known CxSR latency "
7453                                  "(found ddr%s fsb freq %d, mem freq %d), "
7454                                  "disabling CxSR\n",
7455                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
7456                                  dev_priv->fsb_freq, dev_priv->mem_freq);
7457                         /* Disable CxSR and never update its watermark again */
7458                         intel_set_memory_cxsr(dev_priv, false);
7459                         dev_priv->display.update_wm = NULL;
7460                 } else
7461                         dev_priv->display.update_wm = pnv_update_wm;
7462         } else if (IS_GEN(dev_priv, 4)) {
7463                 dev_priv->display.update_wm = i965_update_wm;
7464         } else if (IS_GEN(dev_priv, 3)) {
7465                 dev_priv->display.update_wm = i9xx_update_wm;
7466                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
7467         } else if (IS_GEN(dev_priv, 2)) {
7468                 if (INTEL_NUM_PIPES(dev_priv) == 1) {
7469                         dev_priv->display.update_wm = i845_update_wm;
7470                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
7471                 } else {
7472                         dev_priv->display.update_wm = i9xx_update_wm;
7473                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
7474                 }
7475         } else {
7476                 drm_err(&dev_priv->drm,
7477                         "unexpected fall-through in %s\n", __func__);
7478         }
7479 }
7480
7481 void intel_pm_setup(struct drm_i915_private *dev_priv)
7482 {
7483         dev_priv->runtime_pm.suspended = false;
7484         atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
7485 }