9c97a95c1816b0d6cb6b1f12f0dd500e8d476a74
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / intel_pm.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27
28 #include <linux/cpufreq.h>
29 #include <linux/module.h>
30 #include <linux/pm_runtime.h>
31
32 #include <drm/drm_atomic_helper.h>
33 #include <drm/drm_fourcc.h>
34 #include <drm/drm_plane_helper.h>
35
36 #include "i915_drv.h"
37 #include "intel_drv.h"
38 #include "../../../platform/x86/intel_ips.h"
39
40 /**
41  * DOC: RC6
42  *
43  * RC6 is a special power stage which allows the GPU to enter an very
44  * low-voltage mode when idle, using down to 0V while at this stage.  This
45  * stage is entered automatically when the GPU is idle when RC6 support is
46  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
47  *
48  * There are different RC6 modes available in Intel GPU, which differentiate
49  * among each other with the latency required to enter and leave RC6 and
50  * voltage consumed by the GPU in different states.
51  *
52  * The combination of the following flags define which states GPU is allowed
53  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
54  * RC6pp is deepest RC6. Their support by hardware varies according to the
55  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
56  * which brings the most power savings; deeper states save more power, but
57  * require higher latency to switch to and wake up.
58  */
59
60 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
61 {
62         if (HAS_LLC(dev_priv)) {
63                 /*
64                  * WaCompressedResourceDisplayNewHashMode:skl,kbl
65                  * Display WA #0390: skl,kbl
66                  *
67                  * Must match Sampler, Pixel Back End, and Media. See
68                  * WaCompressedResourceSamplerPbeMediaNewHashMode.
69                  */
70                 I915_WRITE(CHICKEN_PAR1_1,
71                            I915_READ(CHICKEN_PAR1_1) |
72                            SKL_DE_COMPRESSED_HASH_MODE);
73         }
74
75         /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
76         I915_WRITE(CHICKEN_PAR1_1,
77                    I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
78
79         /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
80         I915_WRITE(GEN8_CHICKEN_DCPR_1,
81                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
82
83         /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
84         /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
85         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
86                    DISP_FBC_WM_DIS |
87                    DISP_FBC_MEMORY_WAKE);
88
89         /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
90         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
91                    ILK_DPFC_DISABLE_DUMMY0);
92
93         if (IS_SKYLAKE(dev_priv)) {
94                 /* WaDisableDopClockGating */
95                 I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
96                            & ~GEN7_DOP_CLOCK_GATE_ENABLE);
97         }
98 }
99
100 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
101 {
102         gen9_init_clock_gating(dev_priv);
103
104         /* WaDisableSDEUnitClockGating:bxt */
105         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
106                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
107
108         /*
109          * FIXME:
110          * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
111          */
112         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
113                    GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
114
115         /*
116          * Wa: Backlight PWM may stop in the asserted state, causing backlight
117          * to stay fully on.
118          */
119         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
120                    PWM1_GATING_DIS | PWM2_GATING_DIS);
121 }
122
123 static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
124 {
125         gen9_init_clock_gating(dev_priv);
126
127         /*
128          * WaDisablePWMClockGating:glk
129          * Backlight PWM may stop in the asserted state, causing backlight
130          * to stay fully on.
131          */
132         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
133                    PWM1_GATING_DIS | PWM2_GATING_DIS);
134
135         /* WaDDIIOTimeout:glk */
136         if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
137                 u32 val = I915_READ(CHICKEN_MISC_2);
138                 val &= ~(GLK_CL0_PWR_DOWN |
139                          GLK_CL1_PWR_DOWN |
140                          GLK_CL2_PWR_DOWN);
141                 I915_WRITE(CHICKEN_MISC_2, val);
142         }
143
144 }
145
146 static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
147 {
148         u32 tmp;
149
150         tmp = I915_READ(CLKCFG);
151
152         switch (tmp & CLKCFG_FSB_MASK) {
153         case CLKCFG_FSB_533:
154                 dev_priv->fsb_freq = 533; /* 133*4 */
155                 break;
156         case CLKCFG_FSB_800:
157                 dev_priv->fsb_freq = 800; /* 200*4 */
158                 break;
159         case CLKCFG_FSB_667:
160                 dev_priv->fsb_freq =  667; /* 167*4 */
161                 break;
162         case CLKCFG_FSB_400:
163                 dev_priv->fsb_freq = 400; /* 100*4 */
164                 break;
165         }
166
167         switch (tmp & CLKCFG_MEM_MASK) {
168         case CLKCFG_MEM_533:
169                 dev_priv->mem_freq = 533;
170                 break;
171         case CLKCFG_MEM_667:
172                 dev_priv->mem_freq = 667;
173                 break;
174         case CLKCFG_MEM_800:
175                 dev_priv->mem_freq = 800;
176                 break;
177         }
178
179         /* detect pineview DDR3 setting */
180         tmp = I915_READ(CSHRDDR3CTL);
181         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
182 }
183
184 static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
185 {
186         u16 ddrpll, csipll;
187
188         ddrpll = I915_READ16(DDRMPLL1);
189         csipll = I915_READ16(CSIPLL0);
190
191         switch (ddrpll & 0xff) {
192         case 0xc:
193                 dev_priv->mem_freq = 800;
194                 break;
195         case 0x10:
196                 dev_priv->mem_freq = 1066;
197                 break;
198         case 0x14:
199                 dev_priv->mem_freq = 1333;
200                 break;
201         case 0x18:
202                 dev_priv->mem_freq = 1600;
203                 break;
204         default:
205                 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
206                                  ddrpll & 0xff);
207                 dev_priv->mem_freq = 0;
208                 break;
209         }
210
211         dev_priv->ips.r_t = dev_priv->mem_freq;
212
213         switch (csipll & 0x3ff) {
214         case 0x00c:
215                 dev_priv->fsb_freq = 3200;
216                 break;
217         case 0x00e:
218                 dev_priv->fsb_freq = 3733;
219                 break;
220         case 0x010:
221                 dev_priv->fsb_freq = 4266;
222                 break;
223         case 0x012:
224                 dev_priv->fsb_freq = 4800;
225                 break;
226         case 0x014:
227                 dev_priv->fsb_freq = 5333;
228                 break;
229         case 0x016:
230                 dev_priv->fsb_freq = 5866;
231                 break;
232         case 0x018:
233                 dev_priv->fsb_freq = 6400;
234                 break;
235         default:
236                 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
237                                  csipll & 0x3ff);
238                 dev_priv->fsb_freq = 0;
239                 break;
240         }
241
242         if (dev_priv->fsb_freq == 3200) {
243                 dev_priv->ips.c_m = 0;
244         } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
245                 dev_priv->ips.c_m = 1;
246         } else {
247                 dev_priv->ips.c_m = 2;
248         }
249 }
250
251 static const struct cxsr_latency cxsr_latency_table[] = {
252         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
253         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
254         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
255         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
256         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
257
258         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
259         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
260         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
261         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
262         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
263
264         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
265         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
266         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
267         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
268         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
269
270         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
271         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
272         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
273         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
274         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
275
276         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
277         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
278         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
279         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
280         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
281
282         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
283         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
284         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
285         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
286         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
287 };
288
289 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
290                                                          bool is_ddr3,
291                                                          int fsb,
292                                                          int mem)
293 {
294         const struct cxsr_latency *latency;
295         int i;
296
297         if (fsb == 0 || mem == 0)
298                 return NULL;
299
300         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
301                 latency = &cxsr_latency_table[i];
302                 if (is_desktop == latency->is_desktop &&
303                     is_ddr3 == latency->is_ddr3 &&
304                     fsb == latency->fsb_freq && mem == latency->mem_freq)
305                         return latency;
306         }
307
308         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
309
310         return NULL;
311 }
312
313 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
314 {
315         u32 val;
316
317         mutex_lock(&dev_priv->pcu_lock);
318
319         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
320         if (enable)
321                 val &= ~FORCE_DDR_HIGH_FREQ;
322         else
323                 val |= FORCE_DDR_HIGH_FREQ;
324         val &= ~FORCE_DDR_LOW_FREQ;
325         val |= FORCE_DDR_FREQ_REQ_ACK;
326         vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
327
328         if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
329                       FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
330                 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
331
332         mutex_unlock(&dev_priv->pcu_lock);
333 }
334
335 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
336 {
337         u32 val;
338
339         mutex_lock(&dev_priv->pcu_lock);
340
341         val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
342         if (enable)
343                 val |= DSP_MAXFIFO_PM5_ENABLE;
344         else
345                 val &= ~DSP_MAXFIFO_PM5_ENABLE;
346         vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, val);
347
348         mutex_unlock(&dev_priv->pcu_lock);
349 }
350
351 #define FW_WM(value, plane) \
352         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
353
354 static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
355 {
356         bool was_enabled;
357         u32 val;
358
359         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
360                 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
361                 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
362                 POSTING_READ(FW_BLC_SELF_VLV);
363         } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
364                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
365                 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
366                 POSTING_READ(FW_BLC_SELF);
367         } else if (IS_PINEVIEW(dev_priv)) {
368                 val = I915_READ(DSPFW3);
369                 was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
370                 if (enable)
371                         val |= PINEVIEW_SELF_REFRESH_EN;
372                 else
373                         val &= ~PINEVIEW_SELF_REFRESH_EN;
374                 I915_WRITE(DSPFW3, val);
375                 POSTING_READ(DSPFW3);
376         } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
377                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
378                 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
379                                _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
380                 I915_WRITE(FW_BLC_SELF, val);
381                 POSTING_READ(FW_BLC_SELF);
382         } else if (IS_I915GM(dev_priv)) {
383                 /*
384                  * FIXME can't find a bit like this for 915G, and
385                  * and yet it does have the related watermark in
386                  * FW_BLC_SELF. What's going on?
387                  */
388                 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
389                 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
390                                _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
391                 I915_WRITE(INSTPM, val);
392                 POSTING_READ(INSTPM);
393         } else {
394                 return false;
395         }
396
397         trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
398
399         DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
400                       enableddisabled(enable),
401                       enableddisabled(was_enabled));
402
403         return was_enabled;
404 }
405
406 /**
407  * intel_set_memory_cxsr - Configure CxSR state
408  * @dev_priv: i915 device
409  * @enable: Allow vs. disallow CxSR
410  *
411  * Allow or disallow the system to enter a special CxSR
412  * (C-state self refresh) state. What typically happens in CxSR mode
413  * is that several display FIFOs may get combined into a single larger
414  * FIFO for a particular plane (so called max FIFO mode) to allow the
415  * system to defer memory fetches longer, and the memory will enter
416  * self refresh.
417  *
418  * Note that enabling CxSR does not guarantee that the system enter
419  * this special mode, nor does it guarantee that the system stays
420  * in that mode once entered. So this just allows/disallows the system
421  * to autonomously utilize the CxSR mode. Other factors such as core
422  * C-states will affect when/if the system actually enters/exits the
423  * CxSR mode.
424  *
425  * Note that on VLV/CHV this actually only controls the max FIFO mode,
426  * and the system is free to enter/exit memory self refresh at any time
427  * even when the use of CxSR has been disallowed.
428  *
429  * While the system is actually in the CxSR/max FIFO mode, some plane
430  * control registers will not get latched on vblank. Thus in order to
431  * guarantee the system will respond to changes in the plane registers
432  * we must always disallow CxSR prior to making changes to those registers.
433  * Unfortunately the system will re-evaluate the CxSR conditions at
434  * frame start which happens after vblank start (which is when the plane
435  * registers would get latched), so we can't proceed with the plane update
436  * during the same frame where we disallowed CxSR.
437  *
438  * Certain platforms also have a deeper HPLL SR mode. Fortunately the
439  * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
440  * the hardware w.r.t. HPLL SR when writing to plane registers.
441  * Disallowing just CxSR is sufficient.
442  */
443 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
444 {
445         bool ret;
446
447         mutex_lock(&dev_priv->wm.wm_mutex);
448         ret = _intel_set_memory_cxsr(dev_priv, enable);
449         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
450                 dev_priv->wm.vlv.cxsr = enable;
451         else if (IS_G4X(dev_priv))
452                 dev_priv->wm.g4x.cxsr = enable;
453         mutex_unlock(&dev_priv->wm.wm_mutex);
454
455         return ret;
456 }
457
458 /*
459  * Latency for FIFO fetches is dependent on several factors:
460  *   - memory configuration (speed, channels)
461  *   - chipset
462  *   - current MCH state
463  * It can be fairly high in some situations, so here we assume a fairly
464  * pessimal value.  It's a tradeoff between extra memory fetches (if we
465  * set this value too high, the FIFO will fetch frequently to stay full)
466  * and power consumption (set it too low to save power and we might see
467  * FIFO underruns and display "flicker").
468  *
469  * A value of 5us seems to be a good balance; safe for very low end
470  * platforms but not overly aggressive on lower latency configs.
471  */
472 static const int pessimal_latency_ns = 5000;
473
474 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
475         ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
476
477 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
478 {
479         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
480         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
481         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
482         enum pipe pipe = crtc->pipe;
483         int sprite0_start, sprite1_start;
484
485         switch (pipe) {
486                 u32 dsparb, dsparb2, dsparb3;
487         case PIPE_A:
488                 dsparb = I915_READ(DSPARB);
489                 dsparb2 = I915_READ(DSPARB2);
490                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
491                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
492                 break;
493         case PIPE_B:
494                 dsparb = I915_READ(DSPARB);
495                 dsparb2 = I915_READ(DSPARB2);
496                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
497                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
498                 break;
499         case PIPE_C:
500                 dsparb2 = I915_READ(DSPARB2);
501                 dsparb3 = I915_READ(DSPARB3);
502                 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
503                 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
504                 break;
505         default:
506                 MISSING_CASE(pipe);
507                 return;
508         }
509
510         fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
511         fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
512         fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
513         fifo_state->plane[PLANE_CURSOR] = 63;
514 }
515
516 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
517                               enum i9xx_plane_id i9xx_plane)
518 {
519         u32 dsparb = I915_READ(DSPARB);
520         int size;
521
522         size = dsparb & 0x7f;
523         if (i9xx_plane == PLANE_B)
524                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
525
526         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
527                       dsparb, plane_name(i9xx_plane), size);
528
529         return size;
530 }
531
532 static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
533                               enum i9xx_plane_id i9xx_plane)
534 {
535         u32 dsparb = I915_READ(DSPARB);
536         int size;
537
538         size = dsparb & 0x1ff;
539         if (i9xx_plane == PLANE_B)
540                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
541         size >>= 1; /* Convert to cachelines */
542
543         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
544                       dsparb, plane_name(i9xx_plane), size);
545
546         return size;
547 }
548
549 static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
550                               enum i9xx_plane_id i9xx_plane)
551 {
552         u32 dsparb = I915_READ(DSPARB);
553         int size;
554
555         size = dsparb & 0x7f;
556         size >>= 2; /* Convert to cachelines */
557
558         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
559                       dsparb, plane_name(i9xx_plane), size);
560
561         return size;
562 }
563
564 /* Pineview has different values for various configs */
565 static const struct intel_watermark_params pineview_display_wm = {
566         .fifo_size = PINEVIEW_DISPLAY_FIFO,
567         .max_wm = PINEVIEW_MAX_WM,
568         .default_wm = PINEVIEW_DFT_WM,
569         .guard_size = PINEVIEW_GUARD_WM,
570         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
571 };
572 static const struct intel_watermark_params pineview_display_hplloff_wm = {
573         .fifo_size = PINEVIEW_DISPLAY_FIFO,
574         .max_wm = PINEVIEW_MAX_WM,
575         .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
576         .guard_size = PINEVIEW_GUARD_WM,
577         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
578 };
579 static const struct intel_watermark_params pineview_cursor_wm = {
580         .fifo_size = PINEVIEW_CURSOR_FIFO,
581         .max_wm = PINEVIEW_CURSOR_MAX_WM,
582         .default_wm = PINEVIEW_CURSOR_DFT_WM,
583         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
584         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
585 };
586 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
587         .fifo_size = PINEVIEW_CURSOR_FIFO,
588         .max_wm = PINEVIEW_CURSOR_MAX_WM,
589         .default_wm = PINEVIEW_CURSOR_DFT_WM,
590         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
591         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
592 };
593 static const struct intel_watermark_params i965_cursor_wm_info = {
594         .fifo_size = I965_CURSOR_FIFO,
595         .max_wm = I965_CURSOR_MAX_WM,
596         .default_wm = I965_CURSOR_DFT_WM,
597         .guard_size = 2,
598         .cacheline_size = I915_FIFO_LINE_SIZE,
599 };
600 static const struct intel_watermark_params i945_wm_info = {
601         .fifo_size = I945_FIFO_SIZE,
602         .max_wm = I915_MAX_WM,
603         .default_wm = 1,
604         .guard_size = 2,
605         .cacheline_size = I915_FIFO_LINE_SIZE,
606 };
607 static const struct intel_watermark_params i915_wm_info = {
608         .fifo_size = I915_FIFO_SIZE,
609         .max_wm = I915_MAX_WM,
610         .default_wm = 1,
611         .guard_size = 2,
612         .cacheline_size = I915_FIFO_LINE_SIZE,
613 };
614 static const struct intel_watermark_params i830_a_wm_info = {
615         .fifo_size = I855GM_FIFO_SIZE,
616         .max_wm = I915_MAX_WM,
617         .default_wm = 1,
618         .guard_size = 2,
619         .cacheline_size = I830_FIFO_LINE_SIZE,
620 };
621 static const struct intel_watermark_params i830_bc_wm_info = {
622         .fifo_size = I855GM_FIFO_SIZE,
623         .max_wm = I915_MAX_WM/2,
624         .default_wm = 1,
625         .guard_size = 2,
626         .cacheline_size = I830_FIFO_LINE_SIZE,
627 };
628 static const struct intel_watermark_params i845_wm_info = {
629         .fifo_size = I830_FIFO_SIZE,
630         .max_wm = I915_MAX_WM,
631         .default_wm = 1,
632         .guard_size = 2,
633         .cacheline_size = I830_FIFO_LINE_SIZE,
634 };
635
636 /**
637  * intel_wm_method1 - Method 1 / "small buffer" watermark formula
638  * @pixel_rate: Pipe pixel rate in kHz
639  * @cpp: Plane bytes per pixel
640  * @latency: Memory wakeup latency in 0.1us units
641  *
642  * Compute the watermark using the method 1 or "small buffer"
643  * formula. The caller may additonally add extra cachelines
644  * to account for TLB misses and clock crossings.
645  *
646  * This method is concerned with the short term drain rate
647  * of the FIFO, ie. it does not account for blanking periods
648  * which would effectively reduce the average drain rate across
649  * a longer period. The name "small" refers to the fact the
650  * FIFO is relatively small compared to the amount of data
651  * fetched.
652  *
653  * The FIFO level vs. time graph might look something like:
654  *
655  *   |\   |\
656  *   | \  | \
657  * __---__---__ (- plane active, _ blanking)
658  * -> time
659  *
660  * or perhaps like this:
661  *
662  *   |\|\  |\|\
663  * __----__----__ (- plane active, _ blanking)
664  * -> time
665  *
666  * Returns:
667  * The watermark in bytes
668  */
669 static unsigned int intel_wm_method1(unsigned int pixel_rate,
670                                      unsigned int cpp,
671                                      unsigned int latency)
672 {
673         u64 ret;
674
675         ret = (u64)pixel_rate * cpp * latency;
676         ret = DIV_ROUND_UP_ULL(ret, 10000);
677
678         return ret;
679 }
680
681 /**
682  * intel_wm_method2 - Method 2 / "large buffer" watermark formula
683  * @pixel_rate: Pipe pixel rate in kHz
684  * @htotal: Pipe horizontal total
685  * @width: Plane width in pixels
686  * @cpp: Plane bytes per pixel
687  * @latency: Memory wakeup latency in 0.1us units
688  *
689  * Compute the watermark using the method 2 or "large buffer"
690  * formula. The caller may additonally add extra cachelines
691  * to account for TLB misses and clock crossings.
692  *
693  * This method is concerned with the long term drain rate
694  * of the FIFO, ie. it does account for blanking periods
695  * which effectively reduce the average drain rate across
696  * a longer period. The name "large" refers to the fact the
697  * FIFO is relatively large compared to the amount of data
698  * fetched.
699  *
700  * The FIFO level vs. time graph might look something like:
701  *
702  *    |\___       |\___
703  *    |    \___   |    \___
704  *    |        \  |        \
705  * __ --__--__--__--__--__--__ (- plane active, _ blanking)
706  * -> time
707  *
708  * Returns:
709  * The watermark in bytes
710  */
711 static unsigned int intel_wm_method2(unsigned int pixel_rate,
712                                      unsigned int htotal,
713                                      unsigned int width,
714                                      unsigned int cpp,
715                                      unsigned int latency)
716 {
717         unsigned int ret;
718
719         /*
720          * FIXME remove once all users are computing
721          * watermarks in the correct place.
722          */
723         if (WARN_ON_ONCE(htotal == 0))
724                 htotal = 1;
725
726         ret = (latency * pixel_rate) / (htotal * 10000);
727         ret = (ret + 1) * width * cpp;
728
729         return ret;
730 }
731
732 /**
733  * intel_calculate_wm - calculate watermark level
734  * @pixel_rate: pixel clock
735  * @wm: chip FIFO params
736  * @fifo_size: size of the FIFO buffer
737  * @cpp: bytes per pixel
738  * @latency_ns: memory latency for the platform
739  *
740  * Calculate the watermark level (the level at which the display plane will
741  * start fetching from memory again).  Each chip has a different display
742  * FIFO size and allocation, so the caller needs to figure that out and pass
743  * in the correct intel_watermark_params structure.
744  *
745  * As the pixel clock runs, the FIFO will be drained at a rate that depends
746  * on the pixel size.  When it reaches the watermark level, it'll start
747  * fetching FIFO line sized based chunks from memory until the FIFO fills
748  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
749  * will occur, and a display engine hang could result.
750  */
751 static unsigned int intel_calculate_wm(int pixel_rate,
752                                        const struct intel_watermark_params *wm,
753                                        int fifo_size, int cpp,
754                                        unsigned int latency_ns)
755 {
756         int entries, wm_size;
757
758         /*
759          * Note: we need to make sure we don't overflow for various clock &
760          * latency values.
761          * clocks go from a few thousand to several hundred thousand.
762          * latency is usually a few thousand
763          */
764         entries = intel_wm_method1(pixel_rate, cpp,
765                                    latency_ns / 100);
766         entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
767                 wm->guard_size;
768         DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
769
770         wm_size = fifo_size - entries;
771         DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
772
773         /* Don't promote wm_size to unsigned... */
774         if (wm_size > wm->max_wm)
775                 wm_size = wm->max_wm;
776         if (wm_size <= 0)
777                 wm_size = wm->default_wm;
778
779         /*
780          * Bspec seems to indicate that the value shouldn't be lower than
781          * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
782          * Lets go for 8 which is the burst size since certain platforms
783          * already use a hardcoded 8 (which is what the spec says should be
784          * done).
785          */
786         if (wm_size <= 8)
787                 wm_size = 8;
788
789         return wm_size;
790 }
791
792 static bool is_disabling(int old, int new, int threshold)
793 {
794         return old >= threshold && new < threshold;
795 }
796
797 static bool is_enabling(int old, int new, int threshold)
798 {
799         return old < threshold && new >= threshold;
800 }
801
802 static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
803 {
804         return dev_priv->wm.max_level + 1;
805 }
806
807 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
808                                    const struct intel_plane_state *plane_state)
809 {
810         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
811
812         /* FIXME check the 'enable' instead */
813         if (!crtc_state->base.active)
814                 return false;
815
816         /*
817          * Treat cursor with fb as always visible since cursor updates
818          * can happen faster than the vrefresh rate, and the current
819          * watermark code doesn't handle that correctly. Cursor updates
820          * which set/clear the fb or change the cursor size are going
821          * to get throttled by intel_legacy_cursor_update() to work
822          * around this problem with the watermark code.
823          */
824         if (plane->id == PLANE_CURSOR)
825                 return plane_state->base.fb != NULL;
826         else
827                 return plane_state->base.visible;
828 }
829
830 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
831 {
832         struct intel_crtc *crtc, *enabled = NULL;
833
834         for_each_intel_crtc(&dev_priv->drm, crtc) {
835                 if (intel_crtc_active(crtc)) {
836                         if (enabled)
837                                 return NULL;
838                         enabled = crtc;
839                 }
840         }
841
842         return enabled;
843 }
844
845 static void pineview_update_wm(struct intel_crtc *unused_crtc)
846 {
847         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
848         struct intel_crtc *crtc;
849         const struct cxsr_latency *latency;
850         u32 reg;
851         unsigned int wm;
852
853         latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
854                                          dev_priv->is_ddr3,
855                                          dev_priv->fsb_freq,
856                                          dev_priv->mem_freq);
857         if (!latency) {
858                 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
859                 intel_set_memory_cxsr(dev_priv, false);
860                 return;
861         }
862
863         crtc = single_enabled_crtc(dev_priv);
864         if (crtc) {
865                 const struct drm_display_mode *adjusted_mode =
866                         &crtc->config->base.adjusted_mode;
867                 const struct drm_framebuffer *fb =
868                         crtc->base.primary->state->fb;
869                 int cpp = fb->format->cpp[0];
870                 int clock = adjusted_mode->crtc_clock;
871
872                 /* Display SR */
873                 wm = intel_calculate_wm(clock, &pineview_display_wm,
874                                         pineview_display_wm.fifo_size,
875                                         cpp, latency->display_sr);
876                 reg = I915_READ(DSPFW1);
877                 reg &= ~DSPFW_SR_MASK;
878                 reg |= FW_WM(wm, SR);
879                 I915_WRITE(DSPFW1, reg);
880                 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
881
882                 /* cursor SR */
883                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
884                                         pineview_display_wm.fifo_size,
885                                         4, latency->cursor_sr);
886                 reg = I915_READ(DSPFW3);
887                 reg &= ~DSPFW_CURSOR_SR_MASK;
888                 reg |= FW_WM(wm, CURSOR_SR);
889                 I915_WRITE(DSPFW3, reg);
890
891                 /* Display HPLL off SR */
892                 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
893                                         pineview_display_hplloff_wm.fifo_size,
894                                         cpp, latency->display_hpll_disable);
895                 reg = I915_READ(DSPFW3);
896                 reg &= ~DSPFW_HPLL_SR_MASK;
897                 reg |= FW_WM(wm, HPLL_SR);
898                 I915_WRITE(DSPFW3, reg);
899
900                 /* cursor HPLL off SR */
901                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
902                                         pineview_display_hplloff_wm.fifo_size,
903                                         4, latency->cursor_hpll_disable);
904                 reg = I915_READ(DSPFW3);
905                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
906                 reg |= FW_WM(wm, HPLL_CURSOR);
907                 I915_WRITE(DSPFW3, reg);
908                 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
909
910                 intel_set_memory_cxsr(dev_priv, true);
911         } else {
912                 intel_set_memory_cxsr(dev_priv, false);
913         }
914 }
915
916 /*
917  * Documentation says:
918  * "If the line size is small, the TLB fetches can get in the way of the
919  *  data fetches, causing some lag in the pixel data return which is not
920  *  accounted for in the above formulas. The following adjustment only
921  *  needs to be applied if eight whole lines fit in the buffer at once.
922  *  The WM is adjusted upwards by the difference between the FIFO size
923  *  and the size of 8 whole lines. This adjustment is always performed
924  *  in the actual pixel depth regardless of whether FBC is enabled or not."
925  */
926 static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
927 {
928         int tlb_miss = fifo_size * 64 - width * cpp * 8;
929
930         return max(0, tlb_miss);
931 }
932
933 static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
934                                 const struct g4x_wm_values *wm)
935 {
936         enum pipe pipe;
937
938         for_each_pipe(dev_priv, pipe)
939                 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
940
941         I915_WRITE(DSPFW1,
942                    FW_WM(wm->sr.plane, SR) |
943                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
944                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
945                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
946         I915_WRITE(DSPFW2,
947                    (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
948                    FW_WM(wm->sr.fbc, FBC_SR) |
949                    FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
950                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
951                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
952                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
953         I915_WRITE(DSPFW3,
954                    (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
955                    FW_WM(wm->sr.cursor, CURSOR_SR) |
956                    FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
957                    FW_WM(wm->hpll.plane, HPLL_SR));
958
959         POSTING_READ(DSPFW1);
960 }
961
962 #define FW_WM_VLV(value, plane) \
963         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
964
965 static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
966                                 const struct vlv_wm_values *wm)
967 {
968         enum pipe pipe;
969
970         for_each_pipe(dev_priv, pipe) {
971                 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
972
973                 I915_WRITE(VLV_DDL(pipe),
974                            (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
975                            (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
976                            (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
977                            (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
978         }
979
980         /*
981          * Zero the (unused) WM1 watermarks, and also clear all the
982          * high order bits so that there are no out of bounds values
983          * present in the registers during the reprogramming.
984          */
985         I915_WRITE(DSPHOWM, 0);
986         I915_WRITE(DSPHOWM1, 0);
987         I915_WRITE(DSPFW4, 0);
988         I915_WRITE(DSPFW5, 0);
989         I915_WRITE(DSPFW6, 0);
990
991         I915_WRITE(DSPFW1,
992                    FW_WM(wm->sr.plane, SR) |
993                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
994                    FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
995                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
996         I915_WRITE(DSPFW2,
997                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
998                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
999                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
1000         I915_WRITE(DSPFW3,
1001                    FW_WM(wm->sr.cursor, CURSOR_SR));
1002
1003         if (IS_CHERRYVIEW(dev_priv)) {
1004                 I915_WRITE(DSPFW7_CHV,
1005                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1006                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1007                 I915_WRITE(DSPFW8_CHV,
1008                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1009                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
1010                 I915_WRITE(DSPFW9_CHV,
1011                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1012                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
1013                 I915_WRITE(DSPHOWM,
1014                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1015                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1016                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1017                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1018                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1019                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1020                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1021                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1022                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1023                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1024         } else {
1025                 I915_WRITE(DSPFW7,
1026                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1027                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1028                 I915_WRITE(DSPHOWM,
1029                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1030                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1031                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1032                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1033                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1034                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1035                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1036         }
1037
1038         POSTING_READ(DSPFW1);
1039 }
1040
1041 #undef FW_WM_VLV
1042
1043 static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1044 {
1045         /* all latencies in usec */
1046         dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1047         dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
1048         dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
1049
1050         dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
1051 }
1052
1053 static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1054 {
1055         /*
1056          * DSPCNTR[13] supposedly controls whether the
1057          * primary plane can use the FIFO space otherwise
1058          * reserved for the sprite plane. It's not 100% clear
1059          * what the actual FIFO size is, but it looks like we
1060          * can happily set both primary and sprite watermarks
1061          * up to 127 cachelines. So that would seem to mean
1062          * that either DSPCNTR[13] doesn't do anything, or that
1063          * the total FIFO is >= 256 cachelines in size. Either
1064          * way, we don't seem to have to worry about this
1065          * repartitioning as the maximum watermark value the
1066          * register can hold for each plane is lower than the
1067          * minimum FIFO size.
1068          */
1069         switch (plane_id) {
1070         case PLANE_CURSOR:
1071                 return 63;
1072         case PLANE_PRIMARY:
1073                 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1074         case PLANE_SPRITE0:
1075                 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1076         default:
1077                 MISSING_CASE(plane_id);
1078                 return 0;
1079         }
1080 }
1081
1082 static int g4x_fbc_fifo_size(int level)
1083 {
1084         switch (level) {
1085         case G4X_WM_LEVEL_SR:
1086                 return 7;
1087         case G4X_WM_LEVEL_HPLL:
1088                 return 15;
1089         default:
1090                 MISSING_CASE(level);
1091                 return 0;
1092         }
1093 }
1094
1095 static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1096                           const struct intel_plane_state *plane_state,
1097                           int level)
1098 {
1099         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1100         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1101         const struct drm_display_mode *adjusted_mode =
1102                 &crtc_state->base.adjusted_mode;
1103         unsigned int latency = dev_priv->wm.pri_latency[level] * 10;
1104         unsigned int clock, htotal, cpp, width, wm;
1105
1106         if (latency == 0)
1107                 return USHRT_MAX;
1108
1109         if (!intel_wm_plane_visible(crtc_state, plane_state))
1110                 return 0;
1111
1112         /*
1113          * Not 100% sure which way ELK should go here as the
1114          * spec only says CL/CTG should assume 32bpp and BW
1115          * doesn't need to. But as these things followed the
1116          * mobile vs. desktop lines on gen3 as well, let's
1117          * assume ELK doesn't need this.
1118          *
1119          * The spec also fails to list such a restriction for
1120          * the HPLL watermark, which seems a little strange.
1121          * Let's use 32bpp for the HPLL watermark as well.
1122          */
1123         if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1124             level != G4X_WM_LEVEL_NORMAL)
1125                 cpp = 4;
1126         else
1127                 cpp = plane_state->base.fb->format->cpp[0];
1128
1129         clock = adjusted_mode->crtc_clock;
1130         htotal = adjusted_mode->crtc_htotal;
1131
1132         if (plane->id == PLANE_CURSOR)
1133                 width = plane_state->base.crtc_w;
1134         else
1135                 width = drm_rect_width(&plane_state->base.dst);
1136
1137         if (plane->id == PLANE_CURSOR) {
1138                 wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1139         } else if (plane->id == PLANE_PRIMARY &&
1140                    level == G4X_WM_LEVEL_NORMAL) {
1141                 wm = intel_wm_method1(clock, cpp, latency);
1142         } else {
1143                 unsigned int small, large;
1144
1145                 small = intel_wm_method1(clock, cpp, latency);
1146                 large = intel_wm_method2(clock, htotal, width, cpp, latency);
1147
1148                 wm = min(small, large);
1149         }
1150
1151         wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1152                               width, cpp);
1153
1154         wm = DIV_ROUND_UP(wm, 64) + 2;
1155
1156         return min_t(unsigned int, wm, USHRT_MAX);
1157 }
1158
1159 static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1160                                  int level, enum plane_id plane_id, u16 value)
1161 {
1162         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1163         bool dirty = false;
1164
1165         for (; level < intel_wm_num_levels(dev_priv); level++) {
1166                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1167
1168                 dirty |= raw->plane[plane_id] != value;
1169                 raw->plane[plane_id] = value;
1170         }
1171
1172         return dirty;
1173 }
1174
1175 static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1176                                int level, u16 value)
1177 {
1178         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1179         bool dirty = false;
1180
1181         /* NORMAL level doesn't have an FBC watermark */
1182         level = max(level, G4X_WM_LEVEL_SR);
1183
1184         for (; level < intel_wm_num_levels(dev_priv); level++) {
1185                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1186
1187                 dirty |= raw->fbc != value;
1188                 raw->fbc = value;
1189         }
1190
1191         return dirty;
1192 }
1193
1194 static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1195                               const struct intel_plane_state *pstate,
1196                               u32 pri_val);
1197
1198 static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1199                                      const struct intel_plane_state *plane_state)
1200 {
1201         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1202         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1203         enum plane_id plane_id = plane->id;
1204         bool dirty = false;
1205         int level;
1206
1207         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1208                 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1209                 if (plane_id == PLANE_PRIMARY)
1210                         dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1211                 goto out;
1212         }
1213
1214         for (level = 0; level < num_levels; level++) {
1215                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1216                 int wm, max_wm;
1217
1218                 wm = g4x_compute_wm(crtc_state, plane_state, level);
1219                 max_wm = g4x_plane_fifo_size(plane_id, level);
1220
1221                 if (wm > max_wm)
1222                         break;
1223
1224                 dirty |= raw->plane[plane_id] != wm;
1225                 raw->plane[plane_id] = wm;
1226
1227                 if (plane_id != PLANE_PRIMARY ||
1228                     level == G4X_WM_LEVEL_NORMAL)
1229                         continue;
1230
1231                 wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1232                                         raw->plane[plane_id]);
1233                 max_wm = g4x_fbc_fifo_size(level);
1234
1235                 /*
1236                  * FBC wm is not mandatory as we
1237                  * can always just disable its use.
1238                  */
1239                 if (wm > max_wm)
1240                         wm = USHRT_MAX;
1241
1242                 dirty |= raw->fbc != wm;
1243                 raw->fbc = wm;
1244         }
1245
1246         /* mark watermarks as invalid */
1247         dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1248
1249         if (plane_id == PLANE_PRIMARY)
1250                 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1251
1252  out:
1253         if (dirty) {
1254                 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1255                               plane->base.name,
1256                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1257                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1258                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1259
1260                 if (plane_id == PLANE_PRIMARY)
1261                         DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1262                                       crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1263                                       crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1264         }
1265
1266         return dirty;
1267 }
1268
1269 static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1270                                       enum plane_id plane_id, int level)
1271 {
1272         const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1273
1274         return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1275 }
1276
1277 static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1278                                      int level)
1279 {
1280         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1281
1282         if (level > dev_priv->wm.max_level)
1283                 return false;
1284
1285         return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1286                 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1287                 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1288 }
1289
1290 /* mark all levels starting from 'level' as invalid */
1291 static void g4x_invalidate_wms(struct intel_crtc *crtc,
1292                                struct g4x_wm_state *wm_state, int level)
1293 {
1294         if (level <= G4X_WM_LEVEL_NORMAL) {
1295                 enum plane_id plane_id;
1296
1297                 for_each_plane_id_on_crtc(crtc, plane_id)
1298                         wm_state->wm.plane[plane_id] = USHRT_MAX;
1299         }
1300
1301         if (level <= G4X_WM_LEVEL_SR) {
1302                 wm_state->cxsr = false;
1303                 wm_state->sr.cursor = USHRT_MAX;
1304                 wm_state->sr.plane = USHRT_MAX;
1305                 wm_state->sr.fbc = USHRT_MAX;
1306         }
1307
1308         if (level <= G4X_WM_LEVEL_HPLL) {
1309                 wm_state->hpll_en = false;
1310                 wm_state->hpll.cursor = USHRT_MAX;
1311                 wm_state->hpll.plane = USHRT_MAX;
1312                 wm_state->hpll.fbc = USHRT_MAX;
1313         }
1314 }
1315
1316 static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1317 {
1318         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1319         struct intel_atomic_state *state =
1320                 to_intel_atomic_state(crtc_state->base.state);
1321         struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1322         int num_active_planes = hweight32(crtc_state->active_planes &
1323                                           ~BIT(PLANE_CURSOR));
1324         const struct g4x_pipe_wm *raw;
1325         const struct intel_plane_state *old_plane_state;
1326         const struct intel_plane_state *new_plane_state;
1327         struct intel_plane *plane;
1328         enum plane_id plane_id;
1329         int i, level;
1330         unsigned int dirty = 0;
1331
1332         for_each_oldnew_intel_plane_in_state(state, plane,
1333                                              old_plane_state,
1334                                              new_plane_state, i) {
1335                 if (new_plane_state->base.crtc != &crtc->base &&
1336                     old_plane_state->base.crtc != &crtc->base)
1337                         continue;
1338
1339                 if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
1340                         dirty |= BIT(plane->id);
1341         }
1342
1343         if (!dirty)
1344                 return 0;
1345
1346         level = G4X_WM_LEVEL_NORMAL;
1347         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1348                 goto out;
1349
1350         raw = &crtc_state->wm.g4x.raw[level];
1351         for_each_plane_id_on_crtc(crtc, plane_id)
1352                 wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1353
1354         level = G4X_WM_LEVEL_SR;
1355
1356         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1357                 goto out;
1358
1359         raw = &crtc_state->wm.g4x.raw[level];
1360         wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1361         wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1362         wm_state->sr.fbc = raw->fbc;
1363
1364         wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1365
1366         level = G4X_WM_LEVEL_HPLL;
1367
1368         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1369                 goto out;
1370
1371         raw = &crtc_state->wm.g4x.raw[level];
1372         wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1373         wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1374         wm_state->hpll.fbc = raw->fbc;
1375
1376         wm_state->hpll_en = wm_state->cxsr;
1377
1378         level++;
1379
1380  out:
1381         if (level == G4X_WM_LEVEL_NORMAL)
1382                 return -EINVAL;
1383
1384         /* invalidate the higher levels */
1385         g4x_invalidate_wms(crtc, wm_state, level);
1386
1387         /*
1388          * Determine if the FBC watermark(s) can be used. IF
1389          * this isn't the case we prefer to disable the FBC
1390          ( watermark(s) rather than disable the SR/HPLL
1391          * level(s) entirely.
1392          */
1393         wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1394
1395         if (level >= G4X_WM_LEVEL_SR &&
1396             wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1397                 wm_state->fbc_en = false;
1398         else if (level >= G4X_WM_LEVEL_HPLL &&
1399                  wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1400                 wm_state->fbc_en = false;
1401
1402         return 0;
1403 }
1404
1405 static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
1406 {
1407         struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
1408         struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate;
1409         const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal;
1410         struct intel_atomic_state *intel_state =
1411                 to_intel_atomic_state(new_crtc_state->base.state);
1412         const struct intel_crtc_state *old_crtc_state =
1413                 intel_atomic_get_old_crtc_state(intel_state, crtc);
1414         const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal;
1415         enum plane_id plane_id;
1416
1417         if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
1418                 *intermediate = *optimal;
1419
1420                 intermediate->cxsr = false;
1421                 intermediate->hpll_en = false;
1422                 goto out;
1423         }
1424
1425         intermediate->cxsr = optimal->cxsr && active->cxsr &&
1426                 !new_crtc_state->disable_cxsr;
1427         intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
1428                 !new_crtc_state->disable_cxsr;
1429         intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1430
1431         for_each_plane_id_on_crtc(crtc, plane_id) {
1432                 intermediate->wm.plane[plane_id] =
1433                         max(optimal->wm.plane[plane_id],
1434                             active->wm.plane[plane_id]);
1435
1436                 WARN_ON(intermediate->wm.plane[plane_id] >
1437                         g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1438         }
1439
1440         intermediate->sr.plane = max(optimal->sr.plane,
1441                                      active->sr.plane);
1442         intermediate->sr.cursor = max(optimal->sr.cursor,
1443                                       active->sr.cursor);
1444         intermediate->sr.fbc = max(optimal->sr.fbc,
1445                                    active->sr.fbc);
1446
1447         intermediate->hpll.plane = max(optimal->hpll.plane,
1448                                        active->hpll.plane);
1449         intermediate->hpll.cursor = max(optimal->hpll.cursor,
1450                                         active->hpll.cursor);
1451         intermediate->hpll.fbc = max(optimal->hpll.fbc,
1452                                      active->hpll.fbc);
1453
1454         WARN_ON((intermediate->sr.plane >
1455                  g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1456                  intermediate->sr.cursor >
1457                  g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1458                 intermediate->cxsr);
1459         WARN_ON((intermediate->sr.plane >
1460                  g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1461                  intermediate->sr.cursor >
1462                  g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1463                 intermediate->hpll_en);
1464
1465         WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1466                 intermediate->fbc_en && intermediate->cxsr);
1467         WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1468                 intermediate->fbc_en && intermediate->hpll_en);
1469
1470 out:
1471         /*
1472          * If our intermediate WM are identical to the final WM, then we can
1473          * omit the post-vblank programming; only update if it's different.
1474          */
1475         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
1476                 new_crtc_state->wm.need_postvbl_update = true;
1477
1478         return 0;
1479 }
1480
1481 static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1482                          struct g4x_wm_values *wm)
1483 {
1484         struct intel_crtc *crtc;
1485         int num_active_crtcs = 0;
1486
1487         wm->cxsr = true;
1488         wm->hpll_en = true;
1489         wm->fbc_en = true;
1490
1491         for_each_intel_crtc(&dev_priv->drm, crtc) {
1492                 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1493
1494                 if (!crtc->active)
1495                         continue;
1496
1497                 if (!wm_state->cxsr)
1498                         wm->cxsr = false;
1499                 if (!wm_state->hpll_en)
1500                         wm->hpll_en = false;
1501                 if (!wm_state->fbc_en)
1502                         wm->fbc_en = false;
1503
1504                 num_active_crtcs++;
1505         }
1506
1507         if (num_active_crtcs != 1) {
1508                 wm->cxsr = false;
1509                 wm->hpll_en = false;
1510                 wm->fbc_en = false;
1511         }
1512
1513         for_each_intel_crtc(&dev_priv->drm, crtc) {
1514                 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1515                 enum pipe pipe = crtc->pipe;
1516
1517                 wm->pipe[pipe] = wm_state->wm;
1518                 if (crtc->active && wm->cxsr)
1519                         wm->sr = wm_state->sr;
1520                 if (crtc->active && wm->hpll_en)
1521                         wm->hpll = wm_state->hpll;
1522         }
1523 }
1524
1525 static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1526 {
1527         struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1528         struct g4x_wm_values new_wm = {};
1529
1530         g4x_merge_wm(dev_priv, &new_wm);
1531
1532         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1533                 return;
1534
1535         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1536                 _intel_set_memory_cxsr(dev_priv, false);
1537
1538         g4x_write_wm_values(dev_priv, &new_wm);
1539
1540         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1541                 _intel_set_memory_cxsr(dev_priv, true);
1542
1543         *old_wm = new_wm;
1544 }
1545
1546 static void g4x_initial_watermarks(struct intel_atomic_state *state,
1547                                    struct intel_crtc_state *crtc_state)
1548 {
1549         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1550         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1551
1552         mutex_lock(&dev_priv->wm.wm_mutex);
1553         crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1554         g4x_program_watermarks(dev_priv);
1555         mutex_unlock(&dev_priv->wm.wm_mutex);
1556 }
1557
1558 static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1559                                     struct intel_crtc_state *crtc_state)
1560 {
1561         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1562         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
1563
1564         if (!crtc_state->wm.need_postvbl_update)
1565                 return;
1566
1567         mutex_lock(&dev_priv->wm.wm_mutex);
1568         intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1569         g4x_program_watermarks(dev_priv);
1570         mutex_unlock(&dev_priv->wm.wm_mutex);
1571 }
1572
1573 /* latency must be in 0.1us units. */
1574 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
1575                                    unsigned int htotal,
1576                                    unsigned int width,
1577                                    unsigned int cpp,
1578                                    unsigned int latency)
1579 {
1580         unsigned int ret;
1581
1582         ret = intel_wm_method2(pixel_rate, htotal,
1583                                width, cpp, latency);
1584         ret = DIV_ROUND_UP(ret, 64);
1585
1586         return ret;
1587 }
1588
1589 static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
1590 {
1591         /* all latencies in usec */
1592         dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1593
1594         dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1595
1596         if (IS_CHERRYVIEW(dev_priv)) {
1597                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1598                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
1599
1600                 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
1601         }
1602 }
1603
1604 static u16 vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1605                                 const struct intel_plane_state *plane_state,
1606                                 int level)
1607 {
1608         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1609         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1610         const struct drm_display_mode *adjusted_mode =
1611                 &crtc_state->base.adjusted_mode;
1612         unsigned int clock, htotal, cpp, width, wm;
1613
1614         if (dev_priv->wm.pri_latency[level] == 0)
1615                 return USHRT_MAX;
1616
1617         if (!intel_wm_plane_visible(crtc_state, plane_state))
1618                 return 0;
1619
1620         cpp = plane_state->base.fb->format->cpp[0];
1621         clock = adjusted_mode->crtc_clock;
1622         htotal = adjusted_mode->crtc_htotal;
1623         width = crtc_state->pipe_src_w;
1624
1625         if (plane->id == PLANE_CURSOR) {
1626                 /*
1627                  * FIXME the formula gives values that are
1628                  * too big for the cursor FIFO, and hence we
1629                  * would never be able to use cursors. For
1630                  * now just hardcode the watermark.
1631                  */
1632                 wm = 63;
1633         } else {
1634                 wm = vlv_wm_method2(clock, htotal, width, cpp,
1635                                     dev_priv->wm.pri_latency[level] * 10);
1636         }
1637
1638         return min_t(unsigned int, wm, USHRT_MAX);
1639 }
1640
1641 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1642 {
1643         return (active_planes & (BIT(PLANE_SPRITE0) |
1644                                  BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1645 }
1646
1647 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1648 {
1649         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1650         const struct g4x_pipe_wm *raw =
1651                 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
1652         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
1653         unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1654         int num_active_planes = hweight32(active_planes);
1655         const int fifo_size = 511;
1656         int fifo_extra, fifo_left = fifo_size;
1657         int sprite0_fifo_extra = 0;
1658         unsigned int total_rate;
1659         enum plane_id plane_id;
1660
1661         /*
1662          * When enabling sprite0 after sprite1 has already been enabled
1663          * we tend to get an underrun unless sprite0 already has some
1664          * FIFO space allcoated. Hence we always allocate at least one
1665          * cacheline for sprite0 whenever sprite1 is enabled.
1666          *
1667          * All other plane enable sequences appear immune to this problem.
1668          */
1669         if (vlv_need_sprite0_fifo_workaround(active_planes))
1670                 sprite0_fifo_extra = 1;
1671
1672         total_rate = raw->plane[PLANE_PRIMARY] +
1673                 raw->plane[PLANE_SPRITE0] +
1674                 raw->plane[PLANE_SPRITE1] +
1675                 sprite0_fifo_extra;
1676
1677         if (total_rate > fifo_size)
1678                 return -EINVAL;
1679
1680         if (total_rate == 0)
1681                 total_rate = 1;
1682
1683         for_each_plane_id_on_crtc(crtc, plane_id) {
1684                 unsigned int rate;
1685
1686                 if ((active_planes & BIT(plane_id)) == 0) {
1687                         fifo_state->plane[plane_id] = 0;
1688                         continue;
1689                 }
1690
1691                 rate = raw->plane[plane_id];
1692                 fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1693                 fifo_left -= fifo_state->plane[plane_id];
1694         }
1695
1696         fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1697         fifo_left -= sprite0_fifo_extra;
1698
1699         fifo_state->plane[PLANE_CURSOR] = 63;
1700
1701         fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
1702
1703         /* spread the remainder evenly */
1704         for_each_plane_id_on_crtc(crtc, plane_id) {
1705                 int plane_extra;
1706
1707                 if (fifo_left == 0)
1708                         break;
1709
1710                 if ((active_planes & BIT(plane_id)) == 0)
1711                         continue;
1712
1713                 plane_extra = min(fifo_extra, fifo_left);
1714                 fifo_state->plane[plane_id] += plane_extra;
1715                 fifo_left -= plane_extra;
1716         }
1717
1718         WARN_ON(active_planes != 0 && fifo_left != 0);
1719
1720         /* give it all to the first plane if none are active */
1721         if (active_planes == 0) {
1722                 WARN_ON(fifo_left != fifo_size);
1723                 fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1724         }
1725
1726         return 0;
1727 }
1728
1729 /* mark all levels starting from 'level' as invalid */
1730 static void vlv_invalidate_wms(struct intel_crtc *crtc,
1731                                struct vlv_wm_state *wm_state, int level)
1732 {
1733         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1734
1735         for (; level < intel_wm_num_levels(dev_priv); level++) {
1736                 enum plane_id plane_id;
1737
1738                 for_each_plane_id_on_crtc(crtc, plane_id)
1739                         wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1740
1741                 wm_state->sr[level].cursor = USHRT_MAX;
1742                 wm_state->sr[level].plane = USHRT_MAX;
1743         }
1744 }
1745
1746 static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1747 {
1748         if (wm > fifo_size)
1749                 return USHRT_MAX;
1750         else
1751                 return fifo_size - wm;
1752 }
1753
1754 /*
1755  * Starting from 'level' set all higher
1756  * levels to 'value' in the "raw" watermarks.
1757  */
1758 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1759                                  int level, enum plane_id plane_id, u16 value)
1760 {
1761         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1762         int num_levels = intel_wm_num_levels(dev_priv);
1763         bool dirty = false;
1764
1765         for (; level < num_levels; level++) {
1766                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1767
1768                 dirty |= raw->plane[plane_id] != value;
1769                 raw->plane[plane_id] = value;
1770         }
1771
1772         return dirty;
1773 }
1774
1775 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1776                                      const struct intel_plane_state *plane_state)
1777 {
1778         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1779         enum plane_id plane_id = plane->id;
1780         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1781         int level;
1782         bool dirty = false;
1783
1784         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1785                 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1786                 goto out;
1787         }
1788
1789         for (level = 0; level < num_levels; level++) {
1790                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1791                 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1792                 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
1793
1794                 if (wm > max_wm)
1795                         break;
1796
1797                 dirty |= raw->plane[plane_id] != wm;
1798                 raw->plane[plane_id] = wm;
1799         }
1800
1801         /* mark all higher levels as invalid */
1802         dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1803
1804 out:
1805         if (dirty)
1806                 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1807                               plane->base.name,
1808                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1809                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1810                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1811
1812         return dirty;
1813 }
1814
1815 static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1816                                       enum plane_id plane_id, int level)
1817 {
1818         const struct g4x_pipe_wm *raw =
1819                 &crtc_state->wm.vlv.raw[level];
1820         const struct vlv_fifo_state *fifo_state =
1821                 &crtc_state->wm.vlv.fifo_state;
1822
1823         return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1824 }
1825
1826 static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
1827 {
1828         return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1829                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1830                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1831                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1832 }
1833
1834 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1835 {
1836         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1837         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1838         struct intel_atomic_state *state =
1839                 to_intel_atomic_state(crtc_state->base.state);
1840         struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1841         const struct vlv_fifo_state *fifo_state =
1842                 &crtc_state->wm.vlv.fifo_state;
1843         int num_active_planes = hweight32(crtc_state->active_planes &
1844                                           ~BIT(PLANE_CURSOR));
1845         bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
1846         const struct intel_plane_state *old_plane_state;
1847         const struct intel_plane_state *new_plane_state;
1848         struct intel_plane *plane;
1849         enum plane_id plane_id;
1850         int level, ret, i;
1851         unsigned int dirty = 0;
1852
1853         for_each_oldnew_intel_plane_in_state(state, plane,
1854                                              old_plane_state,
1855                                              new_plane_state, i) {
1856                 if (new_plane_state->base.crtc != &crtc->base &&
1857                     old_plane_state->base.crtc != &crtc->base)
1858                         continue;
1859
1860                 if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
1861                         dirty |= BIT(plane->id);
1862         }
1863
1864         /*
1865          * DSPARB registers may have been reset due to the
1866          * power well being turned off. Make sure we restore
1867          * them to a consistent state even if no primary/sprite
1868          * planes are initially active.
1869          */
1870         if (needs_modeset)
1871                 crtc_state->fifo_changed = true;
1872
1873         if (!dirty)
1874                 return 0;
1875
1876         /* cursor changes don't warrant a FIFO recompute */
1877         if (dirty & ~BIT(PLANE_CURSOR)) {
1878                 const struct intel_crtc_state *old_crtc_state =
1879                         intel_atomic_get_old_crtc_state(state, crtc);
1880                 const struct vlv_fifo_state *old_fifo_state =
1881                         &old_crtc_state->wm.vlv.fifo_state;
1882
1883                 ret = vlv_compute_fifo(crtc_state);
1884                 if (ret)
1885                         return ret;
1886
1887                 if (needs_modeset ||
1888                     memcmp(old_fifo_state, fifo_state,
1889                            sizeof(*fifo_state)) != 0)
1890                         crtc_state->fifo_changed = true;
1891         }
1892
1893         /* initially allow all levels */
1894         wm_state->num_levels = intel_wm_num_levels(dev_priv);
1895         /*
1896          * Note that enabling cxsr with no primary/sprite planes
1897          * enabled can wedge the pipe. Hence we only allow cxsr
1898          * with exactly one enabled primary/sprite plane.
1899          */
1900         wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
1901
1902         for (level = 0; level < wm_state->num_levels; level++) {
1903                 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1904                 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
1905
1906                 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
1907                         break;
1908
1909                 for_each_plane_id_on_crtc(crtc, plane_id) {
1910                         wm_state->wm[level].plane[plane_id] =
1911                                 vlv_invert_wm_value(raw->plane[plane_id],
1912                                                     fifo_state->plane[plane_id]);
1913                 }
1914
1915                 wm_state->sr[level].plane =
1916                         vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
1917                                                  raw->plane[PLANE_SPRITE0],
1918                                                  raw->plane[PLANE_SPRITE1]),
1919                                             sr_fifo_size);
1920
1921                 wm_state->sr[level].cursor =
1922                         vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1923                                             63);
1924         }
1925
1926         if (level == 0)
1927                 return -EINVAL;
1928
1929         /* limit to only levels we can actually handle */
1930         wm_state->num_levels = level;
1931
1932         /* invalidate the higher levels */
1933         vlv_invalidate_wms(crtc, wm_state, level);
1934
1935         return 0;
1936 }
1937
1938 #define VLV_FIFO(plane, value) \
1939         (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1940
1941 static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1942                                    struct intel_crtc_state *crtc_state)
1943 {
1944         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1945         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1946         const struct vlv_fifo_state *fifo_state =
1947                 &crtc_state->wm.vlv.fifo_state;
1948         int sprite0_start, sprite1_start, fifo_size;
1949
1950         if (!crtc_state->fifo_changed)
1951                 return;
1952
1953         sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1954         sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1955         fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
1956
1957         WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63);
1958         WARN_ON(fifo_size != 511);
1959
1960         trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1961
1962         /*
1963          * uncore.lock serves a double purpose here. It allows us to
1964          * use the less expensive I915_{READ,WRITE}_FW() functions, and
1965          * it protects the DSPARB registers from getting clobbered by
1966          * parallel updates from multiple pipes.
1967          *
1968          * intel_pipe_update_start() has already disabled interrupts
1969          * for us, so a plain spin_lock() is sufficient here.
1970          */
1971         spin_lock(&dev_priv->uncore.lock);
1972
1973         switch (crtc->pipe) {
1974                 u32 dsparb, dsparb2, dsparb3;
1975         case PIPE_A:
1976                 dsparb = I915_READ_FW(DSPARB);
1977                 dsparb2 = I915_READ_FW(DSPARB2);
1978
1979                 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1980                             VLV_FIFO(SPRITEB, 0xff));
1981                 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1982                            VLV_FIFO(SPRITEB, sprite1_start));
1983
1984                 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1985                              VLV_FIFO(SPRITEB_HI, 0x1));
1986                 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1987                            VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1988
1989                 I915_WRITE_FW(DSPARB, dsparb);
1990                 I915_WRITE_FW(DSPARB2, dsparb2);
1991                 break;
1992         case PIPE_B:
1993                 dsparb = I915_READ_FW(DSPARB);
1994                 dsparb2 = I915_READ_FW(DSPARB2);
1995
1996                 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1997                             VLV_FIFO(SPRITED, 0xff));
1998                 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1999                            VLV_FIFO(SPRITED, sprite1_start));
2000
2001                 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
2002                              VLV_FIFO(SPRITED_HI, 0xff));
2003                 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
2004                            VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
2005
2006                 I915_WRITE_FW(DSPARB, dsparb);
2007                 I915_WRITE_FW(DSPARB2, dsparb2);
2008                 break;
2009         case PIPE_C:
2010                 dsparb3 = I915_READ_FW(DSPARB3);
2011                 dsparb2 = I915_READ_FW(DSPARB2);
2012
2013                 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
2014                              VLV_FIFO(SPRITEF, 0xff));
2015                 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
2016                             VLV_FIFO(SPRITEF, sprite1_start));
2017
2018                 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
2019                              VLV_FIFO(SPRITEF_HI, 0xff));
2020                 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
2021                            VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
2022
2023                 I915_WRITE_FW(DSPARB3, dsparb3);
2024                 I915_WRITE_FW(DSPARB2, dsparb2);
2025                 break;
2026         default:
2027                 break;
2028         }
2029
2030         POSTING_READ_FW(DSPARB);
2031
2032         spin_unlock(&dev_priv->uncore.lock);
2033 }
2034
2035 #undef VLV_FIFO
2036
2037 static int vlv_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
2038 {
2039         struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
2040         struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate;
2041         const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal;
2042         struct intel_atomic_state *intel_state =
2043                 to_intel_atomic_state(new_crtc_state->base.state);
2044         const struct intel_crtc_state *old_crtc_state =
2045                 intel_atomic_get_old_crtc_state(intel_state, crtc);
2046         const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal;
2047         int level;
2048
2049         if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
2050                 *intermediate = *optimal;
2051
2052                 intermediate->cxsr = false;
2053                 goto out;
2054         }
2055
2056         intermediate->num_levels = min(optimal->num_levels, active->num_levels);
2057         intermediate->cxsr = optimal->cxsr && active->cxsr &&
2058                 !new_crtc_state->disable_cxsr;
2059
2060         for (level = 0; level < intermediate->num_levels; level++) {
2061                 enum plane_id plane_id;
2062
2063                 for_each_plane_id_on_crtc(crtc, plane_id) {
2064                         intermediate->wm[level].plane[plane_id] =
2065                                 min(optimal->wm[level].plane[plane_id],
2066                                     active->wm[level].plane[plane_id]);
2067                 }
2068
2069                 intermediate->sr[level].plane = min(optimal->sr[level].plane,
2070                                                     active->sr[level].plane);
2071                 intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
2072                                                      active->sr[level].cursor);
2073         }
2074
2075         vlv_invalidate_wms(crtc, intermediate, level);
2076
2077 out:
2078         /*
2079          * If our intermediate WM are identical to the final WM, then we can
2080          * omit the post-vblank programming; only update if it's different.
2081          */
2082         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
2083                 new_crtc_state->wm.need_postvbl_update = true;
2084
2085         return 0;
2086 }
2087
2088 static void vlv_merge_wm(struct drm_i915_private *dev_priv,
2089                          struct vlv_wm_values *wm)
2090 {
2091         struct intel_crtc *crtc;
2092         int num_active_crtcs = 0;
2093
2094         wm->level = dev_priv->wm.max_level;
2095         wm->cxsr = true;
2096
2097         for_each_intel_crtc(&dev_priv->drm, crtc) {
2098                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2099
2100                 if (!crtc->active)
2101                         continue;
2102
2103                 if (!wm_state->cxsr)
2104                         wm->cxsr = false;
2105
2106                 num_active_crtcs++;
2107                 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
2108         }
2109
2110         if (num_active_crtcs != 1)
2111                 wm->cxsr = false;
2112
2113         if (num_active_crtcs > 1)
2114                 wm->level = VLV_WM_LEVEL_PM2;
2115
2116         for_each_intel_crtc(&dev_priv->drm, crtc) {
2117                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2118                 enum pipe pipe = crtc->pipe;
2119
2120                 wm->pipe[pipe] = wm_state->wm[wm->level];
2121                 if (crtc->active && wm->cxsr)
2122                         wm->sr = wm_state->sr[wm->level];
2123
2124                 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
2125                 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
2126                 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
2127                 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
2128         }
2129 }
2130
2131 static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
2132 {
2133         struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
2134         struct vlv_wm_values new_wm = {};
2135
2136         vlv_merge_wm(dev_priv, &new_wm);
2137
2138         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
2139                 return;
2140
2141         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2142                 chv_set_memory_dvfs(dev_priv, false);
2143
2144         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2145                 chv_set_memory_pm5(dev_priv, false);
2146
2147         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
2148                 _intel_set_memory_cxsr(dev_priv, false);
2149
2150         vlv_write_wm_values(dev_priv, &new_wm);
2151
2152         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
2153                 _intel_set_memory_cxsr(dev_priv, true);
2154
2155         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2156                 chv_set_memory_pm5(dev_priv, true);
2157
2158         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2159                 chv_set_memory_dvfs(dev_priv, true);
2160
2161         *old_wm = new_wm;
2162 }
2163
2164 static void vlv_initial_watermarks(struct intel_atomic_state *state,
2165                                    struct intel_crtc_state *crtc_state)
2166 {
2167         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2168         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
2169
2170         mutex_lock(&dev_priv->wm.wm_mutex);
2171         crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
2172         vlv_program_watermarks(dev_priv);
2173         mutex_unlock(&dev_priv->wm.wm_mutex);
2174 }
2175
2176 static void vlv_optimize_watermarks(struct intel_atomic_state *state,
2177                                     struct intel_crtc_state *crtc_state)
2178 {
2179         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2180         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
2181
2182         if (!crtc_state->wm.need_postvbl_update)
2183                 return;
2184
2185         mutex_lock(&dev_priv->wm.wm_mutex);
2186         intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
2187         vlv_program_watermarks(dev_priv);
2188         mutex_unlock(&dev_priv->wm.wm_mutex);
2189 }
2190
2191 static void i965_update_wm(struct intel_crtc *unused_crtc)
2192 {
2193         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2194         struct intel_crtc *crtc;
2195         int srwm = 1;
2196         int cursor_sr = 16;
2197         bool cxsr_enabled;
2198
2199         /* Calc sr entries for one plane configs */
2200         crtc = single_enabled_crtc(dev_priv);
2201         if (crtc) {
2202                 /* self-refresh has much higher latency */
2203                 static const int sr_latency_ns = 12000;
2204                 const struct drm_display_mode *adjusted_mode =
2205                         &crtc->config->base.adjusted_mode;
2206                 const struct drm_framebuffer *fb =
2207                         crtc->base.primary->state->fb;
2208                 int clock = adjusted_mode->crtc_clock;
2209                 int htotal = adjusted_mode->crtc_htotal;
2210                 int hdisplay = crtc->config->pipe_src_w;
2211                 int cpp = fb->format->cpp[0];
2212                 int entries;
2213
2214                 entries = intel_wm_method2(clock, htotal,
2215                                            hdisplay, cpp, sr_latency_ns / 100);
2216                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
2217                 srwm = I965_FIFO_SIZE - entries;
2218                 if (srwm < 0)
2219                         srwm = 1;
2220                 srwm &= 0x1ff;
2221                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
2222                               entries, srwm);
2223
2224                 entries = intel_wm_method2(clock, htotal,
2225                                            crtc->base.cursor->state->crtc_w, 4,
2226                                            sr_latency_ns / 100);
2227                 entries = DIV_ROUND_UP(entries,
2228                                        i965_cursor_wm_info.cacheline_size) +
2229                         i965_cursor_wm_info.guard_size;
2230
2231                 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
2232                 if (cursor_sr > i965_cursor_wm_info.max_wm)
2233                         cursor_sr = i965_cursor_wm_info.max_wm;
2234
2235                 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
2236                               "cursor %d\n", srwm, cursor_sr);
2237
2238                 cxsr_enabled = true;
2239         } else {
2240                 cxsr_enabled = false;
2241                 /* Turn off self refresh if both pipes are enabled */
2242                 intel_set_memory_cxsr(dev_priv, false);
2243         }
2244
2245         DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2246                       srwm);
2247
2248         /* 965 has limitations... */
2249         I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
2250                    FW_WM(8, CURSORB) |
2251                    FW_WM(8, PLANEB) |
2252                    FW_WM(8, PLANEA));
2253         I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
2254                    FW_WM(8, PLANEC_OLD));
2255         /* update cursor SR watermark */
2256         I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
2257
2258         if (cxsr_enabled)
2259                 intel_set_memory_cxsr(dev_priv, true);
2260 }
2261
2262 #undef FW_WM
2263
2264 static void i9xx_update_wm(struct intel_crtc *unused_crtc)
2265 {
2266         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2267         const struct intel_watermark_params *wm_info;
2268         u32 fwater_lo;
2269         u32 fwater_hi;
2270         int cwm, srwm = 1;
2271         int fifo_size;
2272         int planea_wm, planeb_wm;
2273         struct intel_crtc *crtc, *enabled = NULL;
2274
2275         if (IS_I945GM(dev_priv))
2276                 wm_info = &i945_wm_info;
2277         else if (!IS_GEN(dev_priv, 2))
2278                 wm_info = &i915_wm_info;
2279         else
2280                 wm_info = &i830_a_wm_info;
2281
2282         fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
2283         crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
2284         if (intel_crtc_active(crtc)) {
2285                 const struct drm_display_mode *adjusted_mode =
2286                         &crtc->config->base.adjusted_mode;
2287                 const struct drm_framebuffer *fb =
2288                         crtc->base.primary->state->fb;
2289                 int cpp;
2290
2291                 if (IS_GEN(dev_priv, 2))
2292                         cpp = 4;
2293                 else
2294                         cpp = fb->format->cpp[0];
2295
2296                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2297                                                wm_info, fifo_size, cpp,
2298                                                pessimal_latency_ns);
2299                 enabled = crtc;
2300         } else {
2301                 planea_wm = fifo_size - wm_info->guard_size;
2302                 if (planea_wm > (long)wm_info->max_wm)
2303                         planea_wm = wm_info->max_wm;
2304         }
2305
2306         if (IS_GEN(dev_priv, 2))
2307                 wm_info = &i830_bc_wm_info;
2308
2309         fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
2310         crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
2311         if (intel_crtc_active(crtc)) {
2312                 const struct drm_display_mode *adjusted_mode =
2313                         &crtc->config->base.adjusted_mode;
2314                 const struct drm_framebuffer *fb =
2315                         crtc->base.primary->state->fb;
2316                 int cpp;
2317
2318                 if (IS_GEN(dev_priv, 2))
2319                         cpp = 4;
2320                 else
2321                         cpp = fb->format->cpp[0];
2322
2323                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2324                                                wm_info, fifo_size, cpp,
2325                                                pessimal_latency_ns);
2326                 if (enabled == NULL)
2327                         enabled = crtc;
2328                 else
2329                         enabled = NULL;
2330         } else {
2331                 planeb_wm = fifo_size - wm_info->guard_size;
2332                 if (planeb_wm > (long)wm_info->max_wm)
2333                         planeb_wm = wm_info->max_wm;
2334         }
2335
2336         DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
2337
2338         if (IS_I915GM(dev_priv) && enabled) {
2339                 struct drm_i915_gem_object *obj;
2340
2341                 obj = intel_fb_obj(enabled->base.primary->state->fb);
2342
2343                 /* self-refresh seems busted with untiled */
2344                 if (!i915_gem_object_is_tiled(obj))
2345                         enabled = NULL;
2346         }
2347
2348         /*
2349          * Overlay gets an aggressive default since video jitter is bad.
2350          */
2351         cwm = 2;
2352
2353         /* Play safe and disable self-refresh before adjusting watermarks. */
2354         intel_set_memory_cxsr(dev_priv, false);
2355
2356         /* Calc sr entries for one plane configs */
2357         if (HAS_FW_BLC(dev_priv) && enabled) {
2358                 /* self-refresh has much higher latency */
2359                 static const int sr_latency_ns = 6000;
2360                 const struct drm_display_mode *adjusted_mode =
2361                         &enabled->config->base.adjusted_mode;
2362                 const struct drm_framebuffer *fb =
2363                         enabled->base.primary->state->fb;
2364                 int clock = adjusted_mode->crtc_clock;
2365                 int htotal = adjusted_mode->crtc_htotal;
2366                 int hdisplay = enabled->config->pipe_src_w;
2367                 int cpp;
2368                 int entries;
2369
2370                 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
2371                         cpp = 4;
2372                 else
2373                         cpp = fb->format->cpp[0];
2374
2375                 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
2376                                            sr_latency_ns / 100);
2377                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
2378                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
2379                 srwm = wm_info->fifo_size - entries;
2380                 if (srwm < 0)
2381                         srwm = 1;
2382
2383                 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
2384                         I915_WRITE(FW_BLC_SELF,
2385                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
2386                 else
2387                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2388         }
2389
2390         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2391                       planea_wm, planeb_wm, cwm, srwm);
2392
2393         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2394         fwater_hi = (cwm & 0x1f);
2395
2396         /* Set request length to 8 cachelines per fetch */
2397         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2398         fwater_hi = fwater_hi | (1 << 8);
2399
2400         I915_WRITE(FW_BLC, fwater_lo);
2401         I915_WRITE(FW_BLC2, fwater_hi);
2402
2403         if (enabled)
2404                 intel_set_memory_cxsr(dev_priv, true);
2405 }
2406
2407 static void i845_update_wm(struct intel_crtc *unused_crtc)
2408 {
2409         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2410         struct intel_crtc *crtc;
2411         const struct drm_display_mode *adjusted_mode;
2412         u32 fwater_lo;
2413         int planea_wm;
2414
2415         crtc = single_enabled_crtc(dev_priv);
2416         if (crtc == NULL)
2417                 return;
2418
2419         adjusted_mode = &crtc->config->base.adjusted_mode;
2420         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2421                                        &i845_wm_info,
2422                                        dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
2423                                        4, pessimal_latency_ns);
2424         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2425         fwater_lo |= (3<<8) | planea_wm;
2426
2427         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
2428
2429         I915_WRITE(FW_BLC, fwater_lo);
2430 }
2431
2432 /* latency must be in 0.1us units. */
2433 static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2434                                    unsigned int cpp,
2435                                    unsigned int latency)
2436 {
2437         unsigned int ret;
2438
2439         ret = intel_wm_method1(pixel_rate, cpp, latency);
2440         ret = DIV_ROUND_UP(ret, 64) + 2;
2441
2442         return ret;
2443 }
2444
2445 /* latency must be in 0.1us units. */
2446 static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2447                                    unsigned int htotal,
2448                                    unsigned int width,
2449                                    unsigned int cpp,
2450                                    unsigned int latency)
2451 {
2452         unsigned int ret;
2453
2454         ret = intel_wm_method2(pixel_rate, htotal,
2455                                width, cpp, latency);
2456         ret = DIV_ROUND_UP(ret, 64) + 2;
2457
2458         return ret;
2459 }
2460
2461 static u32 ilk_wm_fbc(u32 pri_val, u32 horiz_pixels, u8 cpp)
2462 {
2463         /*
2464          * Neither of these should be possible since this function shouldn't be
2465          * called if the CRTC is off or the plane is invisible.  But let's be
2466          * extra paranoid to avoid a potential divide-by-zero if we screw up
2467          * elsewhere in the driver.
2468          */
2469         if (WARN_ON(!cpp))
2470                 return 0;
2471         if (WARN_ON(!horiz_pixels))
2472                 return 0;
2473
2474         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
2475 }
2476
2477 struct ilk_wm_maximums {
2478         u16 pri;
2479         u16 spr;
2480         u16 cur;
2481         u16 fbc;
2482 };
2483
2484 /*
2485  * For both WM_PIPE and WM_LP.
2486  * mem_value must be in 0.1us units.
2487  */
2488 static u32 ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
2489                               const struct intel_plane_state *pstate,
2490                               u32 mem_value, bool is_lp)
2491 {
2492         u32 method1, method2;
2493         int cpp;
2494
2495         if (mem_value == 0)
2496                 return U32_MAX;
2497
2498         if (!intel_wm_plane_visible(cstate, pstate))
2499                 return 0;
2500
2501         cpp = pstate->base.fb->format->cpp[0];
2502
2503         method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2504
2505         if (!is_lp)
2506                 return method1;
2507
2508         method2 = ilk_wm_method2(cstate->pixel_rate,
2509                                  cstate->base.adjusted_mode.crtc_htotal,
2510                                  drm_rect_width(&pstate->base.dst),
2511                                  cpp, mem_value);
2512
2513         return min(method1, method2);
2514 }
2515
2516 /*
2517  * For both WM_PIPE and WM_LP.
2518  * mem_value must be in 0.1us units.
2519  */
2520 static u32 ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
2521                               const struct intel_plane_state *pstate,
2522                               u32 mem_value)
2523 {
2524         u32 method1, method2;
2525         int cpp;
2526
2527         if (mem_value == 0)
2528                 return U32_MAX;
2529
2530         if (!intel_wm_plane_visible(cstate, pstate))
2531                 return 0;
2532
2533         cpp = pstate->base.fb->format->cpp[0];
2534
2535         method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2536         method2 = ilk_wm_method2(cstate->pixel_rate,
2537                                  cstate->base.adjusted_mode.crtc_htotal,
2538                                  drm_rect_width(&pstate->base.dst),
2539                                  cpp, mem_value);
2540         return min(method1, method2);
2541 }
2542
2543 /*
2544  * For both WM_PIPE and WM_LP.
2545  * mem_value must be in 0.1us units.
2546  */
2547 static u32 ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
2548                               const struct intel_plane_state *pstate,
2549                               u32 mem_value)
2550 {
2551         int cpp;
2552
2553         if (mem_value == 0)
2554                 return U32_MAX;
2555
2556         if (!intel_wm_plane_visible(cstate, pstate))
2557                 return 0;
2558
2559         cpp = pstate->base.fb->format->cpp[0];
2560
2561         return ilk_wm_method2(cstate->pixel_rate,
2562                               cstate->base.adjusted_mode.crtc_htotal,
2563                               pstate->base.crtc_w, cpp, mem_value);
2564 }
2565
2566 /* Only for WM_LP. */
2567 static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
2568                               const struct intel_plane_state *pstate,
2569                               u32 pri_val)
2570 {
2571         int cpp;
2572
2573         if (!intel_wm_plane_visible(cstate, pstate))
2574                 return 0;
2575
2576         cpp = pstate->base.fb->format->cpp[0];
2577
2578         return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp);
2579 }
2580
2581 static unsigned int
2582 ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
2583 {
2584         if (INTEL_GEN(dev_priv) >= 8)
2585                 return 3072;
2586         else if (INTEL_GEN(dev_priv) >= 7)
2587                 return 768;
2588         else
2589                 return 512;
2590 }
2591
2592 static unsigned int
2593 ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2594                      int level, bool is_sprite)
2595 {
2596         if (INTEL_GEN(dev_priv) >= 8)
2597                 /* BDW primary/sprite plane watermarks */
2598                 return level == 0 ? 255 : 2047;
2599         else if (INTEL_GEN(dev_priv) >= 7)
2600                 /* IVB/HSW primary/sprite plane watermarks */
2601                 return level == 0 ? 127 : 1023;
2602         else if (!is_sprite)
2603                 /* ILK/SNB primary plane watermarks */
2604                 return level == 0 ? 127 : 511;
2605         else
2606                 /* ILK/SNB sprite plane watermarks */
2607                 return level == 0 ? 63 : 255;
2608 }
2609
2610 static unsigned int
2611 ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
2612 {
2613         if (INTEL_GEN(dev_priv) >= 7)
2614                 return level == 0 ? 63 : 255;
2615         else
2616                 return level == 0 ? 31 : 63;
2617 }
2618
2619 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
2620 {
2621         if (INTEL_GEN(dev_priv) >= 8)
2622                 return 31;
2623         else
2624                 return 15;
2625 }
2626
2627 /* Calculate the maximum primary/sprite plane watermark */
2628 static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv,
2629                                      int level,
2630                                      const struct intel_wm_config *config,
2631                                      enum intel_ddb_partitioning ddb_partitioning,
2632                                      bool is_sprite)
2633 {
2634         unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
2635
2636         /* if sprites aren't enabled, sprites get nothing */
2637         if (is_sprite && !config->sprites_enabled)
2638                 return 0;
2639
2640         /* HSW allows LP1+ watermarks even with multiple pipes */
2641         if (level == 0 || config->num_pipes_active > 1) {
2642                 fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
2643
2644                 /*
2645                  * For some reason the non self refresh
2646                  * FIFO size is only half of the self
2647                  * refresh FIFO size on ILK/SNB.
2648                  */
2649                 if (INTEL_GEN(dev_priv) <= 6)
2650                         fifo_size /= 2;
2651         }
2652
2653         if (config->sprites_enabled) {
2654                 /* level 0 is always calculated with 1:1 split */
2655                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2656                         if (is_sprite)
2657                                 fifo_size *= 5;
2658                         fifo_size /= 6;
2659                 } else {
2660                         fifo_size /= 2;
2661                 }
2662         }
2663
2664         /* clamp to max that the registers can hold */
2665         return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
2666 }
2667
2668 /* Calculate the maximum cursor plane watermark */
2669 static unsigned int ilk_cursor_wm_max(const struct drm_i915_private *dev_priv,
2670                                       int level,
2671                                       const struct intel_wm_config *config)
2672 {
2673         /* HSW LP1+ watermarks w/ multiple pipes */
2674         if (level > 0 && config->num_pipes_active > 1)
2675                 return 64;
2676
2677         /* otherwise just report max that registers can hold */
2678         return ilk_cursor_wm_reg_max(dev_priv, level);
2679 }
2680
2681 static void ilk_compute_wm_maximums(const struct drm_i915_private *dev_priv,
2682                                     int level,
2683                                     const struct intel_wm_config *config,
2684                                     enum intel_ddb_partitioning ddb_partitioning,
2685                                     struct ilk_wm_maximums *max)
2686 {
2687         max->pri = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, false);
2688         max->spr = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, true);
2689         max->cur = ilk_cursor_wm_max(dev_priv, level, config);
2690         max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2691 }
2692
2693 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
2694                                         int level,
2695                                         struct ilk_wm_maximums *max)
2696 {
2697         max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2698         max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2699         max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2700         max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2701 }
2702
2703 static bool ilk_validate_wm_level(int level,
2704                                   const struct ilk_wm_maximums *max,
2705                                   struct intel_wm_level *result)
2706 {
2707         bool ret;
2708
2709         /* already determined to be invalid? */
2710         if (!result->enable)
2711                 return false;
2712
2713         result->enable = result->pri_val <= max->pri &&
2714                          result->spr_val <= max->spr &&
2715                          result->cur_val <= max->cur;
2716
2717         ret = result->enable;
2718
2719         /*
2720          * HACK until we can pre-compute everything,
2721          * and thus fail gracefully if LP0 watermarks
2722          * are exceeded...
2723          */
2724         if (level == 0 && !result->enable) {
2725                 if (result->pri_val > max->pri)
2726                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2727                                       level, result->pri_val, max->pri);
2728                 if (result->spr_val > max->spr)
2729                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2730                                       level, result->spr_val, max->spr);
2731                 if (result->cur_val > max->cur)
2732                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2733                                       level, result->cur_val, max->cur);
2734
2735                 result->pri_val = min_t(u32, result->pri_val, max->pri);
2736                 result->spr_val = min_t(u32, result->spr_val, max->spr);
2737                 result->cur_val = min_t(u32, result->cur_val, max->cur);
2738                 result->enable = true;
2739         }
2740
2741         return ret;
2742 }
2743
2744 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2745                                  const struct intel_crtc *intel_crtc,
2746                                  int level,
2747                                  struct intel_crtc_state *cstate,
2748                                  const struct intel_plane_state *pristate,
2749                                  const struct intel_plane_state *sprstate,
2750                                  const struct intel_plane_state *curstate,
2751                                  struct intel_wm_level *result)
2752 {
2753         u16 pri_latency = dev_priv->wm.pri_latency[level];
2754         u16 spr_latency = dev_priv->wm.spr_latency[level];
2755         u16 cur_latency = dev_priv->wm.cur_latency[level];
2756
2757         /* WM1+ latency values stored in 0.5us units */
2758         if (level > 0) {
2759                 pri_latency *= 5;
2760                 spr_latency *= 5;
2761                 cur_latency *= 5;
2762         }
2763
2764         if (pristate) {
2765                 result->pri_val = ilk_compute_pri_wm(cstate, pristate,
2766                                                      pri_latency, level);
2767                 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
2768         }
2769
2770         if (sprstate)
2771                 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
2772
2773         if (curstate)
2774                 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
2775
2776         result->enable = true;
2777 }
2778
2779 static u32
2780 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
2781 {
2782         const struct intel_atomic_state *intel_state =
2783                 to_intel_atomic_state(cstate->base.state);
2784         const struct drm_display_mode *adjusted_mode =
2785                 &cstate->base.adjusted_mode;
2786         u32 linetime, ips_linetime;
2787
2788         if (!cstate->base.active)
2789                 return 0;
2790         if (WARN_ON(adjusted_mode->crtc_clock == 0))
2791                 return 0;
2792         if (WARN_ON(intel_state->cdclk.logical.cdclk == 0))
2793                 return 0;
2794
2795         /* The WM are computed with base on how long it takes to fill a single
2796          * row at the given clock rate, multiplied by 8.
2797          * */
2798         linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2799                                      adjusted_mode->crtc_clock);
2800         ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2801                                          intel_state->cdclk.logical.cdclk);
2802
2803         return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2804                PIPE_WM_LINETIME_TIME(linetime);
2805 }
2806
2807 static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
2808                                   u16 wm[8])
2809 {
2810         if (INTEL_GEN(dev_priv) >= 9) {
2811                 u32 val;
2812                 int ret, i;
2813                 int level, max_level = ilk_wm_max_level(dev_priv);
2814
2815                 /* read the first set of memory latencies[0:3] */
2816                 val = 0; /* data0 to be programmed to 0 for first set */
2817                 mutex_lock(&dev_priv->pcu_lock);
2818                 ret = sandybridge_pcode_read(dev_priv,
2819                                              GEN9_PCODE_READ_MEM_LATENCY,
2820                                              &val);
2821                 mutex_unlock(&dev_priv->pcu_lock);
2822
2823                 if (ret) {
2824                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2825                         return;
2826                 }
2827
2828                 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2829                 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2830                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2831                 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2832                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2833                 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2834                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2835
2836                 /* read the second set of memory latencies[4:7] */
2837                 val = 1; /* data0 to be programmed to 1 for second set */
2838                 mutex_lock(&dev_priv->pcu_lock);
2839                 ret = sandybridge_pcode_read(dev_priv,
2840                                              GEN9_PCODE_READ_MEM_LATENCY,
2841                                              &val);
2842                 mutex_unlock(&dev_priv->pcu_lock);
2843                 if (ret) {
2844                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2845                         return;
2846                 }
2847
2848                 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2849                 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2850                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2851                 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2852                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2853                 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2854                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2855
2856                 /*
2857                  * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2858                  * need to be disabled. We make sure to sanitize the values out
2859                  * of the punit to satisfy this requirement.
2860                  */
2861                 for (level = 1; level <= max_level; level++) {
2862                         if (wm[level] == 0) {
2863                                 for (i = level + 1; i <= max_level; i++)
2864                                         wm[i] = 0;
2865                                 break;
2866                         }
2867                 }
2868
2869                 /*
2870                  * WaWmMemoryReadLatency:skl+,glk
2871                  *
2872                  * punit doesn't take into account the read latency so we need
2873                  * to add 2us to the various latency levels we retrieve from the
2874                  * punit when level 0 response data us 0us.
2875                  */
2876                 if (wm[0] == 0) {
2877                         wm[0] += 2;
2878                         for (level = 1; level <= max_level; level++) {
2879                                 if (wm[level] == 0)
2880                                         break;
2881                                 wm[level] += 2;
2882                         }
2883                 }
2884
2885                 /*
2886                  * WA Level-0 adjustment for 16GB DIMMs: SKL+
2887                  * If we could not get dimm info enable this WA to prevent from
2888                  * any underrun. If not able to get Dimm info assume 16GB dimm
2889                  * to avoid any underrun.
2890                  */
2891                 if (dev_priv->dram_info.is_16gb_dimm)
2892                         wm[0] += 1;
2893
2894         } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
2895                 u64 sskpd = I915_READ64(MCH_SSKPD);
2896
2897                 wm[0] = (sskpd >> 56) & 0xFF;
2898                 if (wm[0] == 0)
2899                         wm[0] = sskpd & 0xF;
2900                 wm[1] = (sskpd >> 4) & 0xFF;
2901                 wm[2] = (sskpd >> 12) & 0xFF;
2902                 wm[3] = (sskpd >> 20) & 0x1FF;
2903                 wm[4] = (sskpd >> 32) & 0x1FF;
2904         } else if (INTEL_GEN(dev_priv) >= 6) {
2905                 u32 sskpd = I915_READ(MCH_SSKPD);
2906
2907                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2908                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2909                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2910                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2911         } else if (INTEL_GEN(dev_priv) >= 5) {
2912                 u32 mltr = I915_READ(MLTR_ILK);
2913
2914                 /* ILK primary LP0 latency is 700 ns */
2915                 wm[0] = 7;
2916                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2917                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2918         } else {
2919                 MISSING_CASE(INTEL_DEVID(dev_priv));
2920         }
2921 }
2922
2923 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
2924                                        u16 wm[5])
2925 {
2926         /* ILK sprite LP0 latency is 1300 ns */
2927         if (IS_GEN(dev_priv, 5))
2928                 wm[0] = 13;
2929 }
2930
2931 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
2932                                        u16 wm[5])
2933 {
2934         /* ILK cursor LP0 latency is 1300 ns */
2935         if (IS_GEN(dev_priv, 5))
2936                 wm[0] = 13;
2937 }
2938
2939 int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
2940 {
2941         /* how many WM levels are we expecting */
2942         if (INTEL_GEN(dev_priv) >= 9)
2943                 return 7;
2944         else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2945                 return 4;
2946         else if (INTEL_GEN(dev_priv) >= 6)
2947                 return 3;
2948         else
2949                 return 2;
2950 }
2951
2952 static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
2953                                    const char *name,
2954                                    const u16 wm[8])
2955 {
2956         int level, max_level = ilk_wm_max_level(dev_priv);
2957
2958         for (level = 0; level <= max_level; level++) {
2959                 unsigned int latency = wm[level];
2960
2961                 if (latency == 0) {
2962                         DRM_DEBUG_KMS("%s WM%d latency not provided\n",
2963                                       name, level);
2964                         continue;
2965                 }
2966
2967                 /*
2968                  * - latencies are in us on gen9.
2969                  * - before then, WM1+ latency values are in 0.5us units
2970                  */
2971                 if (INTEL_GEN(dev_priv) >= 9)
2972                         latency *= 10;
2973                 else if (level > 0)
2974                         latency *= 5;
2975
2976                 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2977                               name, level, wm[level],
2978                               latency / 10, latency % 10);
2979         }
2980 }
2981
2982 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2983                                     u16 wm[5], u16 min)
2984 {
2985         int level, max_level = ilk_wm_max_level(dev_priv);
2986
2987         if (wm[0] >= min)
2988                 return false;
2989
2990         wm[0] = max(wm[0], min);
2991         for (level = 1; level <= max_level; level++)
2992                 wm[level] = max_t(u16, wm[level], DIV_ROUND_UP(min, 5));
2993
2994         return true;
2995 }
2996
2997 static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
2998 {
2999         bool changed;
3000
3001         /*
3002          * The BIOS provided WM memory latency values are often
3003          * inadequate for high resolution displays. Adjust them.
3004          */
3005         changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
3006                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
3007                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
3008
3009         if (!changed)
3010                 return;
3011
3012         DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
3013         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3014         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3015         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3016 }
3017
3018 static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
3019 {
3020         /*
3021          * On some SNB machines (Thinkpad X220 Tablet at least)
3022          * LP3 usage can cause vblank interrupts to be lost.
3023          * The DEIIR bit will go high but it looks like the CPU
3024          * never gets interrupted.
3025          *
3026          * It's not clear whether other interrupt source could
3027          * be affected or if this is somehow limited to vblank
3028          * interrupts only. To play it safe we disable LP3
3029          * watermarks entirely.
3030          */
3031         if (dev_priv->wm.pri_latency[3] == 0 &&
3032             dev_priv->wm.spr_latency[3] == 0 &&
3033             dev_priv->wm.cur_latency[3] == 0)
3034                 return;
3035
3036         dev_priv->wm.pri_latency[3] = 0;
3037         dev_priv->wm.spr_latency[3] = 0;
3038         dev_priv->wm.cur_latency[3] = 0;
3039
3040         DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
3041         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3042         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3043         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3044 }
3045
3046 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
3047 {
3048         intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
3049
3050         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
3051                sizeof(dev_priv->wm.pri_latency));
3052         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
3053                sizeof(dev_priv->wm.pri_latency));
3054
3055         intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
3056         intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
3057
3058         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3059         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3060         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3061
3062         if (IS_GEN(dev_priv, 6)) {
3063                 snb_wm_latency_quirk(dev_priv);
3064                 snb_wm_lp3_irq_quirk(dev_priv);
3065         }
3066 }
3067
3068 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
3069 {
3070         intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
3071         intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
3072 }
3073
3074 static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv,
3075                                  struct intel_pipe_wm *pipe_wm)
3076 {
3077         /* LP0 watermark maximums depend on this pipe alone */
3078         const struct intel_wm_config config = {
3079                 .num_pipes_active = 1,
3080                 .sprites_enabled = pipe_wm->sprites_enabled,
3081                 .sprites_scaled = pipe_wm->sprites_scaled,
3082         };
3083         struct ilk_wm_maximums max;
3084
3085         /* LP0 watermarks always use 1/2 DDB partitioning */
3086         ilk_compute_wm_maximums(dev_priv, 0, &config, INTEL_DDB_PART_1_2, &max);
3087
3088         /* At least LP0 must be valid */
3089         if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
3090                 DRM_DEBUG_KMS("LP0 watermark invalid\n");
3091                 return false;
3092         }
3093
3094         return true;
3095 }
3096
3097 /* Compute new watermarks for the pipe */
3098 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate)
3099 {
3100         struct drm_atomic_state *state = cstate->base.state;
3101         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3102         struct intel_pipe_wm *pipe_wm;
3103         struct drm_device *dev = state->dev;
3104         const struct drm_i915_private *dev_priv = to_i915(dev);
3105         struct drm_plane *plane;
3106         const struct drm_plane_state *plane_state;
3107         const struct intel_plane_state *pristate = NULL;
3108         const struct intel_plane_state *sprstate = NULL;
3109         const struct intel_plane_state *curstate = NULL;
3110         int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
3111         struct ilk_wm_maximums max;
3112
3113         pipe_wm = &cstate->wm.ilk.optimal;
3114
3115         drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) {
3116                 const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
3117
3118                 if (plane->type == DRM_PLANE_TYPE_PRIMARY)
3119                         pristate = ps;
3120                 else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
3121                         sprstate = ps;
3122                 else if (plane->type == DRM_PLANE_TYPE_CURSOR)
3123                         curstate = ps;
3124         }
3125
3126         pipe_wm->pipe_enabled = cstate->base.active;
3127         if (sprstate) {
3128                 pipe_wm->sprites_enabled = sprstate->base.visible;
3129                 pipe_wm->sprites_scaled = sprstate->base.visible &&
3130                         (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
3131                          drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
3132         }
3133
3134         usable_level = max_level;
3135
3136         /* ILK/SNB: LP2+ watermarks only w/o sprites */
3137         if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
3138                 usable_level = 1;
3139
3140         /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
3141         if (pipe_wm->sprites_scaled)
3142                 usable_level = 0;
3143
3144         memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
3145         ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
3146                              pristate, sprstate, curstate, &pipe_wm->wm[0]);
3147
3148         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3149                 pipe_wm->linetime = hsw_compute_linetime_wm(cstate);
3150
3151         if (!ilk_validate_pipe_wm(dev_priv, pipe_wm))
3152                 return -EINVAL;
3153
3154         ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
3155
3156         for (level = 1; level <= usable_level; level++) {
3157                 struct intel_wm_level *wm = &pipe_wm->wm[level];
3158
3159                 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
3160                                      pristate, sprstate, curstate, wm);
3161
3162                 /*
3163                  * Disable any watermark level that exceeds the
3164                  * register maximums since such watermarks are
3165                  * always invalid.
3166                  */
3167                 if (!ilk_validate_wm_level(level, &max, wm)) {
3168                         memset(wm, 0, sizeof(*wm));
3169                         break;
3170                 }
3171         }
3172
3173         return 0;
3174 }
3175
3176 /*
3177  * Build a set of 'intermediate' watermark values that satisfy both the old
3178  * state and the new state.  These can be programmed to the hardware
3179  * immediately.
3180  */
3181 static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate)
3182 {
3183         struct intel_crtc *intel_crtc = to_intel_crtc(newstate->base.crtc);
3184         struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
3185         struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
3186         struct intel_atomic_state *intel_state =
3187                 to_intel_atomic_state(newstate->base.state);
3188         const struct intel_crtc_state *oldstate =
3189                 intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
3190         const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
3191         int level, max_level = ilk_wm_max_level(dev_priv);
3192
3193         /*
3194          * Start with the final, target watermarks, then combine with the
3195          * currently active watermarks to get values that are safe both before
3196          * and after the vblank.
3197          */
3198         *a = newstate->wm.ilk.optimal;
3199         if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base) ||
3200             intel_state->skip_intermediate_wm)
3201                 return 0;
3202
3203         a->pipe_enabled |= b->pipe_enabled;
3204         a->sprites_enabled |= b->sprites_enabled;
3205         a->sprites_scaled |= b->sprites_scaled;
3206
3207         for (level = 0; level <= max_level; level++) {
3208                 struct intel_wm_level *a_wm = &a->wm[level];
3209                 const struct intel_wm_level *b_wm = &b->wm[level];
3210
3211                 a_wm->enable &= b_wm->enable;
3212                 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
3213                 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
3214                 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
3215                 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
3216         }
3217
3218         /*
3219          * We need to make sure that these merged watermark values are
3220          * actually a valid configuration themselves.  If they're not,
3221          * there's no safe way to transition from the old state to
3222          * the new state, so we need to fail the atomic transaction.
3223          */
3224         if (!ilk_validate_pipe_wm(dev_priv, a))
3225                 return -EINVAL;
3226
3227         /*
3228          * If our intermediate WM are identical to the final WM, then we can
3229          * omit the post-vblank programming; only update if it's different.
3230          */
3231         if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
3232                 newstate->wm.need_postvbl_update = true;
3233
3234         return 0;
3235 }
3236
3237 /*
3238  * Merge the watermarks from all active pipes for a specific level.
3239  */
3240 static void ilk_merge_wm_level(struct drm_i915_private *dev_priv,
3241                                int level,
3242                                struct intel_wm_level *ret_wm)
3243 {
3244         const struct intel_crtc *intel_crtc;
3245
3246         ret_wm->enable = true;
3247
3248         for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
3249                 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
3250                 const struct intel_wm_level *wm = &active->wm[level];
3251
3252                 if (!active->pipe_enabled)
3253                         continue;
3254
3255                 /*
3256                  * The watermark values may have been used in the past,
3257                  * so we must maintain them in the registers for some
3258                  * time even if the level is now disabled.
3259                  */
3260                 if (!wm->enable)
3261                         ret_wm->enable = false;
3262
3263                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
3264                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
3265                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
3266                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
3267         }
3268 }
3269
3270 /*
3271  * Merge all low power watermarks for all active pipes.
3272  */
3273 static void ilk_wm_merge(struct drm_i915_private *dev_priv,
3274                          const struct intel_wm_config *config,
3275                          const struct ilk_wm_maximums *max,
3276                          struct intel_pipe_wm *merged)
3277 {
3278         int level, max_level = ilk_wm_max_level(dev_priv);
3279         int last_enabled_level = max_level;
3280
3281         /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
3282         if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
3283             config->num_pipes_active > 1)
3284                 last_enabled_level = 0;
3285
3286         /* ILK: FBC WM must be disabled always */
3287         merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
3288
3289         /* merge each WM1+ level */
3290         for (level = 1; level <= max_level; level++) {
3291                 struct intel_wm_level *wm = &merged->wm[level];
3292
3293                 ilk_merge_wm_level(dev_priv, level, wm);
3294
3295                 if (level > last_enabled_level)
3296                         wm->enable = false;
3297                 else if (!ilk_validate_wm_level(level, max, wm))
3298                         /* make sure all following levels get disabled */
3299                         last_enabled_level = level - 1;
3300
3301                 /*
3302                  * The spec says it is preferred to disable
3303                  * FBC WMs instead of disabling a WM level.
3304                  */
3305                 if (wm->fbc_val > max->fbc) {
3306                         if (wm->enable)
3307                                 merged->fbc_wm_enabled = false;
3308                         wm->fbc_val = 0;
3309                 }
3310         }
3311
3312         /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3313         /*
3314          * FIXME this is racy. FBC might get enabled later.
3315          * What we should check here is whether FBC can be
3316          * enabled sometime later.
3317          */
3318         if (IS_GEN(dev_priv, 5) && !merged->fbc_wm_enabled &&
3319             intel_fbc_is_active(dev_priv)) {
3320                 for (level = 2; level <= max_level; level++) {
3321                         struct intel_wm_level *wm = &merged->wm[level];
3322
3323                         wm->enable = false;
3324                 }
3325         }
3326 }
3327
3328 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
3329 {
3330         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3331         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
3332 }
3333
3334 /* The value we need to program into the WM_LPx latency field */
3335 static unsigned int ilk_wm_lp_latency(struct drm_i915_private *dev_priv,
3336                                       int level)
3337 {
3338         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3339                 return 2 * level;
3340         else
3341                 return dev_priv->wm.pri_latency[level];
3342 }
3343
3344 static void ilk_compute_wm_results(struct drm_i915_private *dev_priv,
3345                                    const struct intel_pipe_wm *merged,
3346                                    enum intel_ddb_partitioning partitioning,
3347                                    struct ilk_wm_values *results)
3348 {
3349         struct intel_crtc *intel_crtc;
3350         int level, wm_lp;
3351
3352         results->enable_fbc_wm = merged->fbc_wm_enabled;
3353         results->partitioning = partitioning;
3354
3355         /* LP1+ register values */
3356         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3357                 const struct intel_wm_level *r;
3358
3359                 level = ilk_wm_lp_to_level(wm_lp, merged);
3360
3361                 r = &merged->wm[level];
3362
3363                 /*
3364                  * Maintain the watermark values even if the level is
3365                  * disabled. Doing otherwise could cause underruns.
3366                  */
3367                 results->wm_lp[wm_lp - 1] =
3368                         (ilk_wm_lp_latency(dev_priv, level) << WM1_LP_LATENCY_SHIFT) |
3369                         (r->pri_val << WM1_LP_SR_SHIFT) |
3370                         r->cur_val;
3371
3372                 if (r->enable)
3373                         results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
3374
3375                 if (INTEL_GEN(dev_priv) >= 8)
3376                         results->wm_lp[wm_lp - 1] |=
3377                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
3378                 else
3379                         results->wm_lp[wm_lp - 1] |=
3380                                 r->fbc_val << WM1_LP_FBC_SHIFT;
3381
3382                 /*
3383                  * Always set WM1S_LP_EN when spr_val != 0, even if the
3384                  * level is disabled. Doing otherwise could cause underruns.
3385                  */
3386                 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
3387                         WARN_ON(wm_lp != 1);
3388                         results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
3389                 } else
3390                         results->wm_lp_spr[wm_lp - 1] = r->spr_val;
3391         }
3392
3393         /* LP0 register values */
3394         for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
3395                 enum pipe pipe = intel_crtc->pipe;
3396                 const struct intel_wm_level *r =
3397                         &intel_crtc->wm.active.ilk.wm[0];
3398
3399                 if (WARN_ON(!r->enable))
3400                         continue;
3401
3402                 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
3403
3404                 results->wm_pipe[pipe] =
3405                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
3406                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
3407                         r->cur_val;
3408         }
3409 }
3410
3411 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
3412  * case both are at the same level. Prefer r1 in case they're the same. */
3413 static struct intel_pipe_wm *
3414 ilk_find_best_result(struct drm_i915_private *dev_priv,
3415                      struct intel_pipe_wm *r1,
3416                      struct intel_pipe_wm *r2)
3417 {
3418         int level, max_level = ilk_wm_max_level(dev_priv);
3419         int level1 = 0, level2 = 0;
3420
3421         for (level = 1; level <= max_level; level++) {
3422                 if (r1->wm[level].enable)
3423                         level1 = level;
3424                 if (r2->wm[level].enable)
3425                         level2 = level;
3426         }
3427
3428         if (level1 == level2) {
3429                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
3430                         return r2;
3431                 else
3432                         return r1;
3433         } else if (level1 > level2) {
3434                 return r1;
3435         } else {
3436                 return r2;
3437         }
3438 }
3439
3440 /* dirty bits used to track which watermarks need changes */
3441 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3442 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3443 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3444 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3445 #define WM_DIRTY_FBC (1 << 24)
3446 #define WM_DIRTY_DDB (1 << 25)
3447
3448 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
3449                                          const struct ilk_wm_values *old,
3450                                          const struct ilk_wm_values *new)
3451 {
3452         unsigned int dirty = 0;
3453         enum pipe pipe;
3454         int wm_lp;
3455
3456         for_each_pipe(dev_priv, pipe) {
3457                 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
3458                         dirty |= WM_DIRTY_LINETIME(pipe);
3459                         /* Must disable LP1+ watermarks too */
3460                         dirty |= WM_DIRTY_LP_ALL;
3461                 }
3462
3463                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3464                         dirty |= WM_DIRTY_PIPE(pipe);
3465                         /* Must disable LP1+ watermarks too */
3466                         dirty |= WM_DIRTY_LP_ALL;
3467                 }
3468         }
3469
3470         if (old->enable_fbc_wm != new->enable_fbc_wm) {
3471                 dirty |= WM_DIRTY_FBC;
3472                 /* Must disable LP1+ watermarks too */
3473                 dirty |= WM_DIRTY_LP_ALL;
3474         }
3475
3476         if (old->partitioning != new->partitioning) {
3477                 dirty |= WM_DIRTY_DDB;
3478                 /* Must disable LP1+ watermarks too */
3479                 dirty |= WM_DIRTY_LP_ALL;
3480         }
3481
3482         /* LP1+ watermarks already deemed dirty, no need to continue */
3483         if (dirty & WM_DIRTY_LP_ALL)
3484                 return dirty;
3485
3486         /* Find the lowest numbered LP1+ watermark in need of an update... */
3487         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3488                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3489                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3490                         break;
3491         }
3492
3493         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3494         for (; wm_lp <= 3; wm_lp++)
3495                 dirty |= WM_DIRTY_LP(wm_lp);
3496
3497         return dirty;
3498 }
3499
3500 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3501                                unsigned int dirty)
3502 {
3503         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3504         bool changed = false;
3505
3506         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3507                 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3508                 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
3509                 changed = true;
3510         }
3511         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3512                 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3513                 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
3514                 changed = true;
3515         }
3516         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3517                 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3518                 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
3519                 changed = true;
3520         }
3521
3522         /*
3523          * Don't touch WM1S_LP_EN here.
3524          * Doing so could cause underruns.
3525          */
3526
3527         return changed;
3528 }
3529
3530 /*
3531  * The spec says we shouldn't write when we don't need, because every write
3532  * causes WMs to be re-evaluated, expending some power.
3533  */
3534 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3535                                 struct ilk_wm_values *results)
3536 {
3537         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3538         unsigned int dirty;
3539         u32 val;
3540
3541         dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
3542         if (!dirty)
3543                 return;
3544
3545         _ilk_disable_lp_wm(dev_priv, dirty);
3546
3547         if (dirty & WM_DIRTY_PIPE(PIPE_A))
3548                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
3549         if (dirty & WM_DIRTY_PIPE(PIPE_B))
3550                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
3551         if (dirty & WM_DIRTY_PIPE(PIPE_C))
3552                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3553
3554         if (dirty & WM_DIRTY_LINETIME(PIPE_A))
3555                 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
3556         if (dirty & WM_DIRTY_LINETIME(PIPE_B))
3557                 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
3558         if (dirty & WM_DIRTY_LINETIME(PIPE_C))
3559                 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
3560
3561         if (dirty & WM_DIRTY_DDB) {
3562                 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
3563                         val = I915_READ(WM_MISC);
3564                         if (results->partitioning == INTEL_DDB_PART_1_2)
3565                                 val &= ~WM_MISC_DATA_PARTITION_5_6;
3566                         else
3567                                 val |= WM_MISC_DATA_PARTITION_5_6;
3568                         I915_WRITE(WM_MISC, val);
3569                 } else {
3570                         val = I915_READ(DISP_ARB_CTL2);
3571                         if (results->partitioning == INTEL_DDB_PART_1_2)
3572                                 val &= ~DISP_DATA_PARTITION_5_6;
3573                         else
3574                                 val |= DISP_DATA_PARTITION_5_6;
3575                         I915_WRITE(DISP_ARB_CTL2, val);
3576                 }
3577         }
3578
3579         if (dirty & WM_DIRTY_FBC) {
3580                 val = I915_READ(DISP_ARB_CTL);
3581                 if (results->enable_fbc_wm)
3582                         val &= ~DISP_FBC_WM_DIS;
3583                 else
3584                         val |= DISP_FBC_WM_DIS;
3585                 I915_WRITE(DISP_ARB_CTL, val);
3586         }
3587
3588         if (dirty & WM_DIRTY_LP(1) &&
3589             previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3590                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3591
3592         if (INTEL_GEN(dev_priv) >= 7) {
3593                 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3594                         I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3595                 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3596                         I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3597         }
3598
3599         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
3600                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
3601         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
3602                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
3603         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
3604                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
3605
3606         dev_priv->wm.hw = *results;
3607 }
3608
3609 bool ilk_disable_lp_wm(struct drm_device *dev)
3610 {
3611         struct drm_i915_private *dev_priv = to_i915(dev);
3612
3613         return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3614 }
3615
3616 static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
3617 {
3618         u8 enabled_slices;
3619
3620         /* Slice 1 will always be enabled */
3621         enabled_slices = 1;
3622
3623         /* Gen prior to GEN11 have only one DBuf slice */
3624         if (INTEL_GEN(dev_priv) < 11)
3625                 return enabled_slices;
3626
3627         if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
3628                 enabled_slices++;
3629
3630         return enabled_slices;
3631 }
3632
3633 /*
3634  * FIXME: We still don't have the proper code detect if we need to apply the WA,
3635  * so assume we'll always need it in order to avoid underruns.
3636  */
3637 static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)
3638 {
3639         return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv);
3640 }
3641
3642 static bool
3643 intel_has_sagv(struct drm_i915_private *dev_priv)
3644 {
3645         return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) &&
3646                 dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED;
3647 }
3648
3649 /*
3650  * SAGV dynamically adjusts the system agent voltage and clock frequencies
3651  * depending on power and performance requirements. The display engine access
3652  * to system memory is blocked during the adjustment time. Because of the
3653  * blocking time, having this enabled can cause full system hangs and/or pipe
3654  * underruns if we don't meet all of the following requirements:
3655  *
3656  *  - <= 1 pipe enabled
3657  *  - All planes can enable watermarks for latencies >= SAGV engine block time
3658  *  - We're not using an interlaced display configuration
3659  */
3660 int
3661 intel_enable_sagv(struct drm_i915_private *dev_priv)
3662 {
3663         int ret;
3664
3665         if (!intel_has_sagv(dev_priv))
3666                 return 0;
3667
3668         if (dev_priv->sagv_status == I915_SAGV_ENABLED)
3669                 return 0;
3670
3671         DRM_DEBUG_KMS("Enabling SAGV\n");
3672         mutex_lock(&dev_priv->pcu_lock);
3673
3674         ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3675                                       GEN9_SAGV_ENABLE);
3676
3677         /* We don't need to wait for SAGV when enabling */
3678         mutex_unlock(&dev_priv->pcu_lock);
3679
3680         /*
3681          * Some skl systems, pre-release machines in particular,
3682          * don't actually have SAGV.
3683          */
3684         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3685                 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3686                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3687                 return 0;
3688         } else if (ret < 0) {
3689                 DRM_ERROR("Failed to enable SAGV\n");
3690                 return ret;
3691         }
3692
3693         dev_priv->sagv_status = I915_SAGV_ENABLED;
3694         return 0;
3695 }
3696
3697 int
3698 intel_disable_sagv(struct drm_i915_private *dev_priv)
3699 {
3700         int ret;
3701
3702         if (!intel_has_sagv(dev_priv))
3703                 return 0;
3704
3705         if (dev_priv->sagv_status == I915_SAGV_DISABLED)
3706                 return 0;
3707
3708         DRM_DEBUG_KMS("Disabling SAGV\n");
3709         mutex_lock(&dev_priv->pcu_lock);
3710
3711         /* bspec says to keep retrying for at least 1 ms */
3712         ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3713                                 GEN9_SAGV_DISABLE,
3714                                 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3715                                 1);
3716         mutex_unlock(&dev_priv->pcu_lock);
3717
3718         /*
3719          * Some skl systems, pre-release machines in particular,
3720          * don't actually have SAGV.
3721          */
3722         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3723                 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3724                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3725                 return 0;
3726         } else if (ret < 0) {
3727                 DRM_ERROR("Failed to disable SAGV (%d)\n", ret);
3728                 return ret;
3729         }
3730
3731         dev_priv->sagv_status = I915_SAGV_DISABLED;
3732         return 0;
3733 }
3734
3735 bool intel_can_enable_sagv(struct drm_atomic_state *state)
3736 {
3737         struct drm_device *dev = state->dev;
3738         struct drm_i915_private *dev_priv = to_i915(dev);
3739         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3740         struct intel_crtc *crtc;
3741         struct intel_plane *plane;
3742         struct intel_crtc_state *cstate;
3743         enum pipe pipe;
3744         int level, latency;
3745         int sagv_block_time_us;
3746
3747         if (!intel_has_sagv(dev_priv))
3748                 return false;
3749
3750         if (IS_GEN(dev_priv, 9))
3751                 sagv_block_time_us = 30;
3752         else if (IS_GEN(dev_priv, 10))
3753                 sagv_block_time_us = 20;
3754         else
3755                 sagv_block_time_us = 10;
3756
3757         /*
3758          * SKL+ workaround: bspec recommends we disable SAGV when we have
3759          * more then one pipe enabled
3760          *
3761          * If there are no active CRTCs, no additional checks need be performed
3762          */
3763         if (hweight32(intel_state->active_crtcs) == 0)
3764                 return true;
3765         else if (hweight32(intel_state->active_crtcs) > 1)
3766                 return false;
3767
3768         /* Since we're now guaranteed to only have one active CRTC... */
3769         pipe = ffs(intel_state->active_crtcs) - 1;
3770         crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
3771         cstate = to_intel_crtc_state(crtc->base.state);
3772
3773         if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
3774                 return false;
3775
3776         for_each_intel_plane_on_crtc(dev, crtc, plane) {
3777                 struct skl_plane_wm *wm =
3778                         &cstate->wm.skl.optimal.planes[plane->id];
3779
3780                 /* Skip this plane if it's not enabled */
3781                 if (!wm->wm[0].plane_en)
3782                         continue;
3783
3784                 /* Find the highest enabled wm level for this plane */
3785                 for (level = ilk_wm_max_level(dev_priv);
3786                      !wm->wm[level].plane_en; --level)
3787                      { }
3788
3789                 latency = dev_priv->wm.skl_latency[level];
3790
3791                 if (skl_needs_memory_bw_wa(dev_priv) &&
3792                     plane->base.state->fb->modifier ==
3793                     I915_FORMAT_MOD_X_TILED)
3794                         latency += 15;
3795
3796                 /*
3797                  * If any of the planes on this pipe don't enable wm levels that
3798                  * incur memory latencies higher than sagv_block_time_us we
3799                  * can't enable SAGV.
3800                  */
3801                 if (latency < sagv_block_time_us)
3802                         return false;
3803         }
3804
3805         return true;
3806 }
3807
3808 static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
3809                               const struct intel_crtc_state *cstate,
3810                               const u64 total_data_rate,
3811                               const int num_active,
3812                               struct skl_ddb_allocation *ddb)
3813 {
3814         const struct drm_display_mode *adjusted_mode;
3815         u64 total_data_bw;
3816         u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3817
3818         WARN_ON(ddb_size == 0);
3819
3820         if (INTEL_GEN(dev_priv) < 11)
3821                 return ddb_size - 4; /* 4 blocks for bypass path allocation */
3822
3823         adjusted_mode = &cstate->base.adjusted_mode;
3824         total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode);
3825
3826         /*
3827          * 12GB/s is maximum BW supported by single DBuf slice.
3828          *
3829          * FIXME dbuf slice code is broken:
3830          * - must wait for planes to stop using the slice before powering it off
3831          * - plane straddling both slices is illegal in multi-pipe scenarios
3832          * - should validate we stay within the hw bandwidth limits
3833          */
3834         if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
3835                 ddb->enabled_slices = 2;
3836         } else {
3837                 ddb->enabled_slices = 1;
3838                 ddb_size /= 2;
3839         }
3840
3841         return ddb_size;
3842 }
3843
3844 static void
3845 skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
3846                                    const struct intel_crtc_state *cstate,
3847                                    const u64 total_data_rate,
3848                                    struct skl_ddb_allocation *ddb,
3849                                    struct skl_ddb_entry *alloc, /* out */
3850                                    int *num_active /* out */)
3851 {
3852         struct drm_atomic_state *state = cstate->base.state;
3853         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3854         struct drm_crtc *for_crtc = cstate->base.crtc;
3855         const struct drm_crtc_state *crtc_state;
3856         const struct drm_crtc *crtc;
3857         u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
3858         enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
3859         u16 ddb_size;
3860         u32 i;
3861
3862         if (WARN_ON(!state) || !cstate->base.active) {
3863                 alloc->start = 0;
3864                 alloc->end = 0;
3865                 *num_active = hweight32(dev_priv->active_crtcs);
3866                 return;
3867         }
3868
3869         if (intel_state->active_pipe_changes)
3870                 *num_active = hweight32(intel_state->active_crtcs);
3871         else
3872                 *num_active = hweight32(dev_priv->active_crtcs);
3873
3874         ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate,
3875                                       *num_active, ddb);
3876
3877         /*
3878          * If the state doesn't change the active CRTC's or there is no
3879          * modeset request, then there's no need to recalculate;
3880          * the existing pipe allocation limits should remain unchanged.
3881          * Note that we're safe from racing commits since any racing commit
3882          * that changes the active CRTC list or do modeset would need to
3883          * grab _all_ crtc locks, including the one we currently hold.
3884          */
3885         if (!intel_state->active_pipe_changes && !intel_state->modeset) {
3886                 /*
3887                  * alloc may be cleared by clear_intel_crtc_state,
3888                  * copy from old state to be sure
3889                  */
3890                 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
3891                 return;
3892         }
3893
3894         /*
3895          * Watermark/ddb requirement highly depends upon width of the
3896          * framebuffer, So instead of allocating DDB equally among pipes
3897          * distribute DDB based on resolution/width of the display.
3898          */
3899         for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
3900                 const struct drm_display_mode *adjusted_mode;
3901                 int hdisplay, vdisplay;
3902                 enum pipe pipe;
3903
3904                 if (!crtc_state->enable)
3905                         continue;
3906
3907                 pipe = to_intel_crtc(crtc)->pipe;
3908                 adjusted_mode = &crtc_state->adjusted_mode;
3909                 drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
3910                 total_width += hdisplay;
3911
3912                 if (pipe < for_pipe)
3913                         width_before_pipe += hdisplay;
3914                 else if (pipe == for_pipe)
3915                         pipe_width = hdisplay;
3916         }
3917
3918         alloc->start = ddb_size * width_before_pipe / total_width;
3919         alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
3920 }
3921
3922 static unsigned int skl_cursor_allocation(int num_active)
3923 {
3924         if (num_active == 1)
3925                 return 32;
3926
3927         return 8;
3928 }
3929
3930 static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
3931                                        struct skl_ddb_entry *entry, u32 reg)
3932 {
3933
3934         entry->start = reg & DDB_ENTRY_MASK;
3935         entry->end = (reg >> DDB_ENTRY_END_SHIFT) & DDB_ENTRY_MASK;
3936
3937         if (entry->end)
3938                 entry->end += 1;
3939 }
3940
3941 static void
3942 skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
3943                            const enum pipe pipe,
3944                            const enum plane_id plane_id,
3945                            struct skl_ddb_entry *ddb_y,
3946                            struct skl_ddb_entry *ddb_uv)
3947 {
3948         u32 val, val2;
3949         u32 fourcc = 0;
3950
3951         /* Cursor doesn't support NV12/planar, so no extra calculation needed */
3952         if (plane_id == PLANE_CURSOR) {
3953                 val = I915_READ(CUR_BUF_CFG(pipe));
3954                 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
3955                 return;
3956         }
3957
3958         val = I915_READ(PLANE_CTL(pipe, plane_id));
3959
3960         /* No DDB allocated for disabled planes */
3961         if (val & PLANE_CTL_ENABLE)
3962                 fourcc = skl_format_to_fourcc(val & PLANE_CTL_FORMAT_MASK,
3963                                               val & PLANE_CTL_ORDER_RGBX,
3964                                               val & PLANE_CTL_ALPHA_MASK);
3965
3966         if (INTEL_GEN(dev_priv) >= 11) {
3967                 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
3968                 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
3969         } else {
3970                 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
3971                 val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
3972
3973                 if (fourcc == DRM_FORMAT_NV12)
3974                         swap(val, val2);
3975
3976                 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
3977                 skl_ddb_entry_init_from_hw(dev_priv, ddb_uv, val2);
3978         }
3979 }
3980
3981 void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
3982                                struct skl_ddb_entry *ddb_y,
3983                                struct skl_ddb_entry *ddb_uv)
3984 {
3985         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
3986         enum intel_display_power_domain power_domain;
3987         enum pipe pipe = crtc->pipe;
3988         intel_wakeref_t wakeref;
3989         enum plane_id plane_id;
3990
3991         power_domain = POWER_DOMAIN_PIPE(pipe);
3992         wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
3993         if (!wakeref)
3994                 return;
3995
3996         for_each_plane_id_on_crtc(crtc, plane_id)
3997                 skl_ddb_get_hw_plane_state(dev_priv, pipe,
3998                                            plane_id,
3999                                            &ddb_y[plane_id],
4000                                            &ddb_uv[plane_id]);
4001
4002         intel_display_power_put(dev_priv, power_domain, wakeref);
4003 }
4004
4005 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
4006                           struct skl_ddb_allocation *ddb /* out */)
4007 {
4008         ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
4009 }
4010
4011 /*
4012  * Determines the downscale amount of a plane for the purposes of watermark calculations.
4013  * The bspec defines downscale amount as:
4014  *
4015  * """
4016  * Horizontal down scale amount = maximum[1, Horizontal source size /
4017  *                                           Horizontal destination size]
4018  * Vertical down scale amount = maximum[1, Vertical source size /
4019  *                                         Vertical destination size]
4020  * Total down scale amount = Horizontal down scale amount *
4021  *                           Vertical down scale amount
4022  * """
4023  *
4024  * Return value is provided in 16.16 fixed point form to retain fractional part.
4025  * Caller should take care of dividing & rounding off the value.
4026  */
4027 static uint_fixed_16_16_t
4028 skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
4029                            const struct intel_plane_state *pstate)
4030 {
4031         struct intel_plane *plane = to_intel_plane(pstate->base.plane);
4032         u32 src_w, src_h, dst_w, dst_h;
4033         uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4034         uint_fixed_16_16_t downscale_h, downscale_w;
4035
4036         if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4037                 return u32_to_fixed16(0);
4038
4039         /* n.b., src is 16.16 fixed point, dst is whole integer */
4040         if (plane->id == PLANE_CURSOR) {
4041                 /*
4042                  * Cursors only support 0/180 degree rotation,
4043                  * hence no need to account for rotation here.
4044                  */
4045                 src_w = pstate->base.src_w >> 16;
4046                 src_h = pstate->base.src_h >> 16;
4047                 dst_w = pstate->base.crtc_w;
4048                 dst_h = pstate->base.crtc_h;
4049         } else {
4050                 /*
4051                  * Src coordinates are already rotated by 270 degrees for
4052                  * the 90/270 degree plane rotation cases (to match the
4053                  * GTT mapping), hence no need to account for rotation here.
4054                  */
4055                 src_w = drm_rect_width(&pstate->base.src) >> 16;
4056                 src_h = drm_rect_height(&pstate->base.src) >> 16;
4057                 dst_w = drm_rect_width(&pstate->base.dst);
4058                 dst_h = drm_rect_height(&pstate->base.dst);
4059         }
4060
4061         fp_w_ratio = div_fixed16(src_w, dst_w);
4062         fp_h_ratio = div_fixed16(src_h, dst_h);
4063         downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4064         downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4065
4066         return mul_fixed16(downscale_w, downscale_h);
4067 }
4068
4069 static uint_fixed_16_16_t
4070 skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
4071 {
4072         uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
4073
4074         if (!crtc_state->base.enable)
4075                 return pipe_downscale;
4076
4077         if (crtc_state->pch_pfit.enabled) {
4078                 u32 src_w, src_h, dst_w, dst_h;
4079                 u32 pfit_size = crtc_state->pch_pfit.size;
4080                 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4081                 uint_fixed_16_16_t downscale_h, downscale_w;
4082
4083                 src_w = crtc_state->pipe_src_w;
4084                 src_h = crtc_state->pipe_src_h;
4085                 dst_w = pfit_size >> 16;
4086                 dst_h = pfit_size & 0xffff;
4087
4088                 if (!dst_w || !dst_h)
4089                         return pipe_downscale;
4090
4091                 fp_w_ratio = div_fixed16(src_w, dst_w);
4092                 fp_h_ratio = div_fixed16(src_h, dst_h);
4093                 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4094                 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4095
4096                 pipe_downscale = mul_fixed16(downscale_w, downscale_h);
4097         }
4098
4099         return pipe_downscale;
4100 }
4101
4102 int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
4103                                   struct intel_crtc_state *cstate)
4104 {
4105         struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
4106         struct drm_crtc_state *crtc_state = &cstate->base;
4107         struct drm_atomic_state *state = crtc_state->state;
4108         struct drm_plane *plane;
4109         const struct drm_plane_state *pstate;
4110         struct intel_plane_state *intel_pstate;
4111         int crtc_clock, dotclk;
4112         u32 pipe_max_pixel_rate;
4113         uint_fixed_16_16_t pipe_downscale;
4114         uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
4115
4116         if (!cstate->base.enable)
4117                 return 0;
4118
4119         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4120                 uint_fixed_16_16_t plane_downscale;
4121                 uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
4122                 int bpp;
4123
4124                 if (!intel_wm_plane_visible(cstate,
4125                                             to_intel_plane_state(pstate)))
4126                         continue;
4127
4128                 if (WARN_ON(!pstate->fb))
4129                         return -EINVAL;
4130
4131                 intel_pstate = to_intel_plane_state(pstate);
4132                 plane_downscale = skl_plane_downscale_amount(cstate,
4133                                                              intel_pstate);
4134                 bpp = pstate->fb->format->cpp[0] * 8;
4135                 if (bpp == 64)
4136                         plane_downscale = mul_fixed16(plane_downscale,
4137                                                       fp_9_div_8);
4138
4139                 max_downscale = max_fixed16(plane_downscale, max_downscale);
4140         }
4141         pipe_downscale = skl_pipe_downscale_amount(cstate);
4142
4143         pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
4144
4145         crtc_clock = crtc_state->adjusted_mode.crtc_clock;
4146         dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk;
4147
4148         if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
4149                 dotclk *= 2;
4150
4151         pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale);
4152
4153         if (pipe_max_pixel_rate < crtc_clock) {
4154                 DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
4155                 return -EINVAL;
4156         }
4157
4158         return 0;
4159 }
4160
4161 static u64
4162 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
4163                              const struct intel_plane_state *intel_pstate,
4164                              const int plane)
4165 {
4166         struct intel_plane *intel_plane =
4167                 to_intel_plane(intel_pstate->base.plane);
4168         u32 data_rate;
4169         u32 width = 0, height = 0;
4170         struct drm_framebuffer *fb;
4171         u32 format;
4172         uint_fixed_16_16_t down_scale_amount;
4173         u64 rate;
4174
4175         if (!intel_pstate->base.visible)
4176                 return 0;
4177
4178         fb = intel_pstate->base.fb;
4179         format = fb->format->format;
4180
4181         if (intel_plane->id == PLANE_CURSOR)
4182                 return 0;
4183         if (plane == 1 && format != DRM_FORMAT_NV12)
4184                 return 0;
4185
4186         /*
4187          * Src coordinates are already rotated by 270 degrees for
4188          * the 90/270 degree plane rotation cases (to match the
4189          * GTT mapping), hence no need to account for rotation here.
4190          */
4191         width = drm_rect_width(&intel_pstate->base.src) >> 16;
4192         height = drm_rect_height(&intel_pstate->base.src) >> 16;
4193
4194         /* UV plane does 1/2 pixel sub-sampling */
4195         if (plane == 1 && format == DRM_FORMAT_NV12) {
4196                 width /= 2;
4197                 height /= 2;
4198         }
4199
4200         data_rate = width * height;
4201
4202         down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
4203
4204         rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount);
4205
4206         rate *= fb->format->cpp[plane];
4207         return rate;
4208 }
4209
4210 static u64
4211 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4212                                  u64 *plane_data_rate,
4213                                  u64 *uv_plane_data_rate)
4214 {
4215         struct drm_crtc_state *cstate = &intel_cstate->base;
4216         struct drm_atomic_state *state = cstate->state;
4217         struct drm_plane *plane;
4218         const struct drm_plane_state *pstate;
4219         u64 total_data_rate = 0;
4220
4221         if (WARN_ON(!state))
4222                 return 0;
4223
4224         /* Calculate and cache data rate for each plane */
4225         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4226                 enum plane_id plane_id = to_intel_plane(plane)->id;
4227                 u64 rate;
4228                 const struct intel_plane_state *intel_pstate =
4229                         to_intel_plane_state(pstate);
4230
4231                 /* packed/y */
4232                 rate = skl_plane_relative_data_rate(intel_cstate,
4233                                                     intel_pstate, 0);
4234                 plane_data_rate[plane_id] = rate;
4235                 total_data_rate += rate;
4236
4237                 /* uv-plane */
4238                 rate = skl_plane_relative_data_rate(intel_cstate,
4239                                                     intel_pstate, 1);
4240                 uv_plane_data_rate[plane_id] = rate;
4241                 total_data_rate += rate;
4242         }
4243
4244         return total_data_rate;
4245 }
4246
4247 static u64
4248 icl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4249                                  u64 *plane_data_rate)
4250 {
4251         struct drm_crtc_state *cstate = &intel_cstate->base;
4252         struct drm_atomic_state *state = cstate->state;
4253         struct drm_plane *plane;
4254         const struct drm_plane_state *pstate;
4255         u64 total_data_rate = 0;
4256
4257         if (WARN_ON(!state))
4258                 return 0;
4259
4260         /* Calculate and cache data rate for each plane */
4261         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4262                 const struct intel_plane_state *intel_pstate =
4263                         to_intel_plane_state(pstate);
4264                 enum plane_id plane_id = to_intel_plane(plane)->id;
4265                 u64 rate;
4266
4267                 if (!intel_pstate->linked_plane) {
4268                         rate = skl_plane_relative_data_rate(intel_cstate,
4269                                                             intel_pstate, 0);
4270                         plane_data_rate[plane_id] = rate;
4271                         total_data_rate += rate;
4272                 } else {
4273                         enum plane_id y_plane_id;
4274
4275                         /*
4276                          * The slave plane might not iterate in
4277                          * drm_atomic_crtc_state_for_each_plane_state(),
4278                          * and needs the master plane state which may be
4279                          * NULL if we try get_new_plane_state(), so we
4280                          * always calculate from the master.
4281                          */
4282                         if (intel_pstate->slave)
4283                                 continue;
4284
4285                         /* Y plane rate is calculated on the slave */
4286                         rate = skl_plane_relative_data_rate(intel_cstate,
4287                                                             intel_pstate, 0);
4288                         y_plane_id = intel_pstate->linked_plane->id;
4289                         plane_data_rate[y_plane_id] = rate;
4290                         total_data_rate += rate;
4291
4292                         rate = skl_plane_relative_data_rate(intel_cstate,
4293                                                             intel_pstate, 1);
4294                         plane_data_rate[plane_id] = rate;
4295                         total_data_rate += rate;
4296                 }
4297         }
4298
4299         return total_data_rate;
4300 }
4301
4302 static int
4303 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
4304                       struct skl_ddb_allocation *ddb /* out */)
4305 {
4306         struct drm_atomic_state *state = cstate->base.state;
4307         struct drm_crtc *crtc = cstate->base.crtc;
4308         struct drm_i915_private *dev_priv = to_i915(crtc->dev);
4309         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4310         struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
4311         struct skl_plane_wm *wm;
4312         u16 alloc_size, start = 0;
4313         u16 total[I915_MAX_PLANES] = {};
4314         u16 uv_total[I915_MAX_PLANES] = {};
4315         u64 total_data_rate;
4316         enum plane_id plane_id;
4317         int num_active;
4318         u64 plane_data_rate[I915_MAX_PLANES] = {};
4319         u64 uv_plane_data_rate[I915_MAX_PLANES] = {};
4320         u32 blocks;
4321         int level;
4322
4323         /* Clear the partitioning for disabled planes. */
4324         memset(cstate->wm.skl.plane_ddb_y, 0, sizeof(cstate->wm.skl.plane_ddb_y));
4325         memset(cstate->wm.skl.plane_ddb_uv, 0, sizeof(cstate->wm.skl.plane_ddb_uv));
4326
4327         if (WARN_ON(!state))
4328                 return 0;
4329
4330         if (!cstate->base.active) {
4331                 alloc->start = alloc->end = 0;
4332                 return 0;
4333         }
4334
4335         if (INTEL_GEN(dev_priv) < 11)
4336                 total_data_rate =
4337                         skl_get_total_relative_data_rate(cstate,
4338                                                          plane_data_rate,
4339                                                          uv_plane_data_rate);
4340         else
4341                 total_data_rate =
4342                         icl_get_total_relative_data_rate(cstate,
4343                                                          plane_data_rate);
4344
4345         skl_ddb_get_pipe_allocation_limits(dev_priv, cstate, total_data_rate,
4346                                            ddb, alloc, &num_active);
4347         alloc_size = skl_ddb_entry_size(alloc);
4348         if (alloc_size == 0)
4349                 return 0;
4350
4351         /* Allocate fixed number of blocks for cursor. */
4352         total[PLANE_CURSOR] = skl_cursor_allocation(num_active);
4353         alloc_size -= total[PLANE_CURSOR];
4354         cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].start =
4355                 alloc->end - total[PLANE_CURSOR];
4356         cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end;
4357
4358         if (total_data_rate == 0)
4359                 return 0;
4360
4361         /*
4362          * Find the highest watermark level for which we can satisfy the block
4363          * requirement of active planes.
4364          */
4365         for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) {
4366                 blocks = 0;
4367                 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4368                         if (plane_id == PLANE_CURSOR)
4369                                 continue;
4370
4371                         wm = &cstate->wm.skl.optimal.planes[plane_id];
4372                         blocks += wm->wm[level].min_ddb_alloc;
4373                         blocks += wm->uv_wm[level].min_ddb_alloc;
4374                 }
4375
4376                 if (blocks < alloc_size) {
4377                         alloc_size -= blocks;
4378                         break;
4379                 }
4380         }
4381
4382         if (level < 0) {
4383                 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
4384                 DRM_DEBUG_KMS("minimum required %d/%d\n", blocks,
4385                               alloc_size);
4386                 return -EINVAL;
4387         }
4388
4389         /*
4390          * Grant each plane the blocks it requires at the highest achievable
4391          * watermark level, plus an extra share of the leftover blocks
4392          * proportional to its relative data rate.
4393          */
4394         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4395                 u64 rate;
4396                 u16 extra;
4397
4398                 if (plane_id == PLANE_CURSOR)
4399                         continue;
4400
4401                 /*
4402                  * We've accounted for all active planes; remaining planes are
4403                  * all disabled.
4404                  */
4405                 if (total_data_rate == 0)
4406                         break;
4407
4408                 wm = &cstate->wm.skl.optimal.planes[plane_id];
4409
4410                 rate = plane_data_rate[plane_id];
4411                 extra = min_t(u16, alloc_size,
4412                               DIV64_U64_ROUND_UP(alloc_size * rate,
4413                                                  total_data_rate));
4414                 total[plane_id] = wm->wm[level].min_ddb_alloc + extra;
4415                 alloc_size -= extra;
4416                 total_data_rate -= rate;
4417
4418                 if (total_data_rate == 0)
4419                         break;
4420
4421                 rate = uv_plane_data_rate[plane_id];
4422                 extra = min_t(u16, alloc_size,
4423                               DIV64_U64_ROUND_UP(alloc_size * rate,
4424                                                  total_data_rate));
4425                 uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra;
4426                 alloc_size -= extra;
4427                 total_data_rate -= rate;
4428         }
4429         WARN_ON(alloc_size != 0 || total_data_rate != 0);
4430
4431         /* Set the actual DDB start/end points for each plane */
4432         start = alloc->start;
4433         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4434                 struct skl_ddb_entry *plane_alloc, *uv_plane_alloc;
4435
4436                 if (plane_id == PLANE_CURSOR)
4437                         continue;
4438
4439                 plane_alloc = &cstate->wm.skl.plane_ddb_y[plane_id];
4440                 uv_plane_alloc = &cstate->wm.skl.plane_ddb_uv[plane_id];
4441
4442                 /* Gen11+ uses a separate plane for UV watermarks */
4443                 WARN_ON(INTEL_GEN(dev_priv) >= 11 && uv_total[plane_id]);
4444
4445                 /* Leave disabled planes at (0,0) */
4446                 if (total[plane_id]) {
4447                         plane_alloc->start = start;
4448                         start += total[plane_id];
4449                         plane_alloc->end = start;
4450                 }
4451
4452                 if (uv_total[plane_id]) {
4453                         uv_plane_alloc->start = start;
4454                         start += uv_total[plane_id];
4455                         uv_plane_alloc->end = start;
4456                 }
4457         }
4458
4459         /*
4460          * When we calculated watermark values we didn't know how high
4461          * of a level we'd actually be able to hit, so we just marked
4462          * all levels as "enabled."  Go back now and disable the ones
4463          * that aren't actually possible.
4464          */
4465         for (level++; level <= ilk_wm_max_level(dev_priv); level++) {
4466                 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4467                         wm = &cstate->wm.skl.optimal.planes[plane_id];
4468                         memset(&wm->wm[level], 0, sizeof(wm->wm[level]));
4469
4470                         /*
4471                          * Wa_1408961008:icl
4472                          * Underruns with WM1+ disabled
4473                          */
4474                         if (IS_ICELAKE(dev_priv) &&
4475                             level == 1 && wm->wm[0].plane_en) {
4476                                 wm->wm[level].plane_res_b = wm->wm[0].plane_res_b;
4477                                 wm->wm[level].plane_res_l = wm->wm[0].plane_res_l;
4478                                 wm->wm[level].ignore_lines = wm->wm[0].ignore_lines;
4479                         }
4480                 }
4481         }
4482
4483         /*
4484          * Go back and disable the transition watermark if it turns out we
4485          * don't have enough DDB blocks for it.
4486          */
4487         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4488                 wm = &cstate->wm.skl.optimal.planes[plane_id];
4489                 if (wm->trans_wm.plane_res_b >= total[plane_id])
4490                         memset(&wm->trans_wm, 0, sizeof(wm->trans_wm));
4491         }
4492
4493         return 0;
4494 }
4495
4496 /*
4497  * The max latency should be 257 (max the punit can code is 255 and we add 2us
4498  * for the read latency) and cpp should always be <= 8, so that
4499  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4500  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4501 */
4502 static uint_fixed_16_16_t
4503 skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate,
4504                u8 cpp, u32 latency, u32 dbuf_block_size)
4505 {
4506         u32 wm_intermediate_val;
4507         uint_fixed_16_16_t ret;
4508
4509         if (latency == 0)
4510                 return FP_16_16_MAX;
4511
4512         wm_intermediate_val = latency * pixel_rate * cpp;
4513         ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
4514
4515         if (INTEL_GEN(dev_priv) >= 10)
4516                 ret = add_fixed16_u32(ret, 1);
4517
4518         return ret;
4519 }
4520
4521 static uint_fixed_16_16_t
4522 skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency,
4523                uint_fixed_16_16_t plane_blocks_per_line)
4524 {
4525         u32 wm_intermediate_val;
4526         uint_fixed_16_16_t ret;
4527
4528         if (latency == 0)
4529                 return FP_16_16_MAX;
4530
4531         wm_intermediate_val = latency * pixel_rate;
4532         wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
4533                                            pipe_htotal * 1000);
4534         ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
4535         return ret;
4536 }
4537
4538 static uint_fixed_16_16_t
4539 intel_get_linetime_us(const struct intel_crtc_state *cstate)
4540 {
4541         u32 pixel_rate;
4542         u32 crtc_htotal;
4543         uint_fixed_16_16_t linetime_us;
4544
4545         if (!cstate->base.active)
4546                 return u32_to_fixed16(0);
4547
4548         pixel_rate = cstate->pixel_rate;
4549
4550         if (WARN_ON(pixel_rate == 0))
4551                 return u32_to_fixed16(0);
4552
4553         crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
4554         linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
4555
4556         return linetime_us;
4557 }
4558
4559 static u32
4560 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
4561                               const struct intel_plane_state *pstate)
4562 {
4563         u64 adjusted_pixel_rate;
4564         uint_fixed_16_16_t downscale_amount;
4565
4566         /* Shouldn't reach here on disabled planes... */
4567         if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4568                 return 0;
4569
4570         /*
4571          * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4572          * with additional adjustments for plane-specific scaling.
4573          */
4574         adjusted_pixel_rate = cstate->pixel_rate;
4575         downscale_amount = skl_plane_downscale_amount(cstate, pstate);
4576
4577         return mul_round_up_u32_fixed16(adjusted_pixel_rate,
4578                                             downscale_amount);
4579 }
4580
4581 static int
4582 skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
4583                             const struct intel_plane_state *intel_pstate,
4584                             struct skl_wm_params *wp, int color_plane)
4585 {
4586         struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
4587         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
4588         const struct drm_plane_state *pstate = &intel_pstate->base;
4589         const struct drm_framebuffer *fb = pstate->fb;
4590         u32 interm_pbpl;
4591
4592         /* only NV12 format has two planes */
4593         if (color_plane == 1 && fb->format->format != DRM_FORMAT_NV12) {
4594                 DRM_DEBUG_KMS("Non NV12 format have single plane\n");
4595                 return -EINVAL;
4596         }
4597
4598         wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
4599                       fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
4600                       fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4601                       fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4602         wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
4603         wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4604                          fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4605         wp->is_planar = fb->format->format == DRM_FORMAT_NV12;
4606
4607         if (plane->id == PLANE_CURSOR) {
4608                 wp->width = intel_pstate->base.crtc_w;
4609         } else {
4610                 /*
4611                  * Src coordinates are already rotated by 270 degrees for
4612                  * the 90/270 degree plane rotation cases (to match the
4613                  * GTT mapping), hence no need to account for rotation here.
4614                  */
4615                 wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
4616         }
4617
4618         if (color_plane == 1 && wp->is_planar)
4619                 wp->width /= 2;
4620
4621         wp->cpp = fb->format->cpp[color_plane];
4622         wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
4623                                                              intel_pstate);
4624
4625         if (INTEL_GEN(dev_priv) >= 11 &&
4626             fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 1)
4627                 wp->dbuf_block_size = 256;
4628         else
4629                 wp->dbuf_block_size = 512;
4630
4631         if (drm_rotation_90_or_270(pstate->rotation)) {
4632
4633                 switch (wp->cpp) {
4634                 case 1:
4635                         wp->y_min_scanlines = 16;
4636                         break;
4637                 case 2:
4638                         wp->y_min_scanlines = 8;
4639                         break;
4640                 case 4:
4641                         wp->y_min_scanlines = 4;
4642                         break;
4643                 default:
4644                         MISSING_CASE(wp->cpp);
4645                         return -EINVAL;
4646                 }
4647         } else {
4648                 wp->y_min_scanlines = 4;
4649         }
4650
4651         if (skl_needs_memory_bw_wa(dev_priv))
4652                 wp->y_min_scanlines *= 2;
4653
4654         wp->plane_bytes_per_line = wp->width * wp->cpp;
4655         if (wp->y_tiled) {
4656                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
4657                                            wp->y_min_scanlines,
4658                                            wp->dbuf_block_size);
4659
4660                 if (INTEL_GEN(dev_priv) >= 10)
4661                         interm_pbpl++;
4662
4663                 wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4664                                                         wp->y_min_scanlines);
4665         } else if (wp->x_tiled && IS_GEN(dev_priv, 9)) {
4666                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4667                                            wp->dbuf_block_size);
4668                 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4669         } else {
4670                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4671                                            wp->dbuf_block_size) + 1;
4672                 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4673         }
4674
4675         wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4676                                              wp->plane_blocks_per_line);
4677         wp->linetime_us = fixed16_to_u32_round_up(
4678                                         intel_get_linetime_us(cstate));
4679
4680         return 0;
4681 }
4682
4683 static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level)
4684 {
4685         if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
4686                 return true;
4687
4688         /* The number of lines are ignored for the level 0 watermark. */
4689         return level > 0;
4690 }
4691
4692 static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
4693                                  const struct intel_plane_state *intel_pstate,
4694                                  int level,
4695                                  const struct skl_wm_params *wp,
4696                                  const struct skl_wm_level *result_prev,
4697                                  struct skl_wm_level *result /* out */)
4698 {
4699         struct drm_i915_private *dev_priv =
4700                 to_i915(intel_pstate->base.plane->dev);
4701         u32 latency = dev_priv->wm.skl_latency[level];
4702         uint_fixed_16_16_t method1, method2;
4703         uint_fixed_16_16_t selected_result;
4704         u32 res_blocks, res_lines, min_ddb_alloc = 0;
4705
4706         if (latency == 0) {
4707                 /* reject it */
4708                 result->min_ddb_alloc = U16_MAX;
4709                 return;
4710         }
4711
4712         /* Display WA #1141: kbl,cfl */
4713         if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
4714             IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) &&
4715             dev_priv->ipc_enabled)
4716                 latency += 4;
4717
4718         if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled)
4719                 latency += 15;
4720
4721         method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
4722                                  wp->cpp, latency, wp->dbuf_block_size);
4723         method2 = skl_wm_method2(wp->plane_pixel_rate,
4724                                  cstate->base.adjusted_mode.crtc_htotal,
4725                                  latency,
4726                                  wp->plane_blocks_per_line);
4727
4728         if (wp->y_tiled) {
4729                 selected_result = max_fixed16(method2, wp->y_tile_minimum);
4730         } else {
4731                 if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
4732                      wp->dbuf_block_size < 1) &&
4733                      (wp->plane_bytes_per_line / wp->dbuf_block_size < 1)) {
4734                         selected_result = method2;
4735                 } else if (latency >= wp->linetime_us) {
4736                         if (IS_GEN(dev_priv, 9) &&
4737                             !IS_GEMINILAKE(dev_priv))
4738                                 selected_result = min_fixed16(method1, method2);
4739                         else
4740                                 selected_result = method2;
4741                 } else {
4742                         selected_result = method1;
4743                 }
4744         }
4745
4746         res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
4747         res_lines = div_round_up_fixed16(selected_result,
4748                                          wp->plane_blocks_per_line);
4749
4750         if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) {
4751                 /* Display WA #1125: skl,bxt,kbl */
4752                 if (level == 0 && wp->rc_surface)
4753                         res_blocks +=
4754                                 fixed16_to_u32_round_up(wp->y_tile_minimum);
4755
4756                 /* Display WA #1126: skl,bxt,kbl */
4757                 if (level >= 1 && level <= 7) {
4758                         if (wp->y_tiled) {
4759                                 res_blocks +=
4760                                     fixed16_to_u32_round_up(wp->y_tile_minimum);
4761                                 res_lines += wp->y_min_scanlines;
4762                         } else {
4763                                 res_blocks++;
4764                         }
4765
4766                         /*
4767                          * Make sure result blocks for higher latency levels are
4768                          * atleast as high as level below the current level.
4769                          * Assumption in DDB algorithm optimization for special
4770                          * cases. Also covers Display WA #1125 for RC.
4771                          */
4772                         if (result_prev->plane_res_b > res_blocks)
4773                                 res_blocks = result_prev->plane_res_b;
4774                 }
4775         }
4776
4777         if (INTEL_GEN(dev_priv) >= 11) {
4778                 if (wp->y_tiled) {
4779                         int extra_lines;
4780
4781                         if (res_lines % wp->y_min_scanlines == 0)
4782                                 extra_lines = wp->y_min_scanlines;
4783                         else
4784                                 extra_lines = wp->y_min_scanlines * 2 -
4785                                         res_lines % wp->y_min_scanlines;
4786
4787                         min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines,
4788                                                                  wp->plane_blocks_per_line);
4789                 } else {
4790                         min_ddb_alloc = res_blocks +
4791                                 DIV_ROUND_UP(res_blocks, 10);
4792                 }
4793         }
4794
4795         if (!skl_wm_has_lines(dev_priv, level))
4796                 res_lines = 0;
4797
4798         if (res_lines > 31) {
4799                 /* reject it */
4800                 result->min_ddb_alloc = U16_MAX;
4801                 return;
4802         }
4803
4804         /*
4805          * If res_lines is valid, assume we can use this watermark level
4806          * for now.  We'll come back and disable it after we calculate the
4807          * DDB allocation if it turns out we don't actually have enough
4808          * blocks to satisfy it.
4809          */
4810         result->plane_res_b = res_blocks;
4811         result->plane_res_l = res_lines;
4812         /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */
4813         result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1;
4814         result->plane_en = true;
4815 }
4816
4817 static void
4818 skl_compute_wm_levels(const struct intel_crtc_state *cstate,
4819                       const struct intel_plane_state *intel_pstate,
4820                       const struct skl_wm_params *wm_params,
4821                       struct skl_wm_level *levels)
4822 {
4823         struct drm_i915_private *dev_priv =
4824                 to_i915(intel_pstate->base.plane->dev);
4825         int level, max_level = ilk_wm_max_level(dev_priv);
4826         struct skl_wm_level *result_prev = &levels[0];
4827
4828         for (level = 0; level <= max_level; level++) {
4829                 struct skl_wm_level *result = &levels[level];
4830
4831                 skl_compute_plane_wm(cstate, intel_pstate, level, wm_params,
4832                                      result_prev, result);
4833
4834                 result_prev = result;
4835         }
4836 }
4837
4838 static u32
4839 skl_compute_linetime_wm(const struct intel_crtc_state *cstate)
4840 {
4841         struct drm_atomic_state *state = cstate->base.state;
4842         struct drm_i915_private *dev_priv = to_i915(state->dev);
4843         uint_fixed_16_16_t linetime_us;
4844         u32 linetime_wm;
4845
4846         linetime_us = intel_get_linetime_us(cstate);
4847         linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
4848
4849         /* Display WA #1135: BXT:ALL GLK:ALL */
4850         if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled)
4851                 linetime_wm /= 2;
4852
4853         return linetime_wm;
4854 }
4855
4856 static void skl_compute_transition_wm(const struct intel_crtc_state *cstate,
4857                                       const struct skl_wm_params *wp,
4858                                       struct skl_plane_wm *wm)
4859 {
4860         struct drm_device *dev = cstate->base.crtc->dev;
4861         const struct drm_i915_private *dev_priv = to_i915(dev);
4862         u16 trans_min, trans_y_tile_min;
4863         const u16 trans_amount = 10; /* This is configurable amount */
4864         u16 wm0_sel_res_b, trans_offset_b, res_blocks;
4865
4866         /* Transition WM are not recommended by HW team for GEN9 */
4867         if (INTEL_GEN(dev_priv) <= 9)
4868                 return;
4869
4870         /* Transition WM don't make any sense if ipc is disabled */
4871         if (!dev_priv->ipc_enabled)
4872                 return;
4873
4874         trans_min = 14;
4875         if (INTEL_GEN(dev_priv) >= 11)
4876                 trans_min = 4;
4877
4878         trans_offset_b = trans_min + trans_amount;
4879
4880         /*
4881          * The spec asks for Selected Result Blocks for wm0 (the real value),
4882          * not Result Blocks (the integer value). Pay attention to the capital
4883          * letters. The value wm_l0->plane_res_b is actually Result Blocks, but
4884          * since Result Blocks is the ceiling of Selected Result Blocks plus 1,
4885          * and since we later will have to get the ceiling of the sum in the
4886          * transition watermarks calculation, we can just pretend Selected
4887          * Result Blocks is Result Blocks minus 1 and it should work for the
4888          * current platforms.
4889          */
4890         wm0_sel_res_b = wm->wm[0].plane_res_b - 1;
4891
4892         if (wp->y_tiled) {
4893                 trans_y_tile_min =
4894                         (u16)mul_round_up_u32_fixed16(2, wp->y_tile_minimum);
4895                 res_blocks = max(wm0_sel_res_b, trans_y_tile_min) +
4896                                 trans_offset_b;
4897         } else {
4898                 res_blocks = wm0_sel_res_b + trans_offset_b;
4899
4900                 /* WA BUG:1938466 add one block for non y-tile planes */
4901                 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
4902                         res_blocks += 1;
4903
4904         }
4905
4906         /*
4907          * Just assume we can enable the transition watermark.  After
4908          * computing the DDB we'll come back and disable it if that
4909          * assumption turns out to be false.
4910          */
4911         wm->trans_wm.plane_res_b = res_blocks + 1;
4912         wm->trans_wm.plane_en = true;
4913 }
4914
4915 static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,
4916                                      const struct intel_plane_state *plane_state,
4917                                      enum plane_id plane_id, int color_plane)
4918 {
4919         struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
4920         struct skl_wm_params wm_params;
4921         int ret;
4922
4923         ret = skl_compute_plane_wm_params(crtc_state, plane_state,
4924                                           &wm_params, color_plane);
4925         if (ret)
4926                 return ret;
4927
4928         skl_compute_wm_levels(crtc_state, plane_state, &wm_params, wm->wm);
4929         skl_compute_transition_wm(crtc_state, &wm_params, wm);
4930
4931         return 0;
4932 }
4933
4934 static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state,
4935                                  const struct intel_plane_state *plane_state,
4936                                  enum plane_id plane_id)
4937 {
4938         struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
4939         struct skl_wm_params wm_params;
4940         int ret;
4941
4942         wm->is_planar = true;
4943
4944         /* uv plane watermarks must also be validated for NV12/Planar */
4945         ret = skl_compute_plane_wm_params(crtc_state, plane_state,
4946                                           &wm_params, 1);
4947         if (ret)
4948                 return ret;
4949
4950         skl_compute_wm_levels(crtc_state, plane_state, &wm_params, wm->uv_wm);
4951
4952         return 0;
4953 }
4954
4955 static int skl_build_plane_wm(struct skl_pipe_wm *pipe_wm,
4956                               struct intel_crtc_state *crtc_state,
4957                               const struct intel_plane_state *plane_state)
4958 {
4959         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
4960         const struct drm_framebuffer *fb = plane_state->base.fb;
4961         enum plane_id plane_id = plane->id;
4962         int ret;
4963
4964         if (!intel_wm_plane_visible(crtc_state, plane_state))
4965                 return 0;
4966
4967         ret = skl_build_plane_wm_single(crtc_state, plane_state,
4968                                         plane_id, 0);
4969         if (ret)
4970                 return ret;
4971
4972         if (fb->format->is_yuv && fb->format->num_planes > 1) {
4973                 ret = skl_build_plane_wm_uv(crtc_state, plane_state,
4974                                             plane_id);
4975                 if (ret)
4976                         return ret;
4977         }
4978
4979         return 0;
4980 }
4981
4982 static int icl_build_plane_wm(struct skl_pipe_wm *pipe_wm,
4983                               struct intel_crtc_state *crtc_state,
4984                               const struct intel_plane_state *plane_state)
4985 {
4986         enum plane_id plane_id = to_intel_plane(plane_state->base.plane)->id;
4987         int ret;
4988
4989         /* Watermarks calculated in master */
4990         if (plane_state->slave)
4991                 return 0;
4992
4993         if (plane_state->linked_plane) {
4994                 const struct drm_framebuffer *fb = plane_state->base.fb;
4995                 enum plane_id y_plane_id = plane_state->linked_plane->id;
4996
4997                 WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state));
4998                 WARN_ON(!fb->format->is_yuv ||
4999                         fb->format->num_planes == 1);
5000
5001                 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5002                                                 y_plane_id, 0);
5003                 if (ret)
5004                         return ret;
5005
5006                 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5007                                                 plane_id, 1);
5008                 if (ret)
5009                         return ret;
5010         } else if (intel_wm_plane_visible(crtc_state, plane_state)) {
5011                 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5012                                                 plane_id, 0);
5013                 if (ret)
5014                         return ret;
5015         }
5016
5017         return 0;
5018 }
5019
5020 static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
5021                              struct skl_pipe_wm *pipe_wm)
5022 {
5023         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5024         struct drm_crtc_state *crtc_state = &cstate->base;
5025         struct drm_plane *plane;
5026         const struct drm_plane_state *pstate;
5027         int ret;
5028
5029         /*
5030          * We'll only calculate watermarks for planes that are actually
5031          * enabled, so make sure all other planes are set as disabled.
5032          */
5033         memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
5034
5035         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
5036                 const struct intel_plane_state *intel_pstate =
5037                                                 to_intel_plane_state(pstate);
5038
5039                 if (INTEL_GEN(dev_priv) >= 11)
5040                         ret = icl_build_plane_wm(pipe_wm,
5041                                                  cstate, intel_pstate);
5042                 else
5043                         ret = skl_build_plane_wm(pipe_wm,
5044                                                  cstate, intel_pstate);
5045                 if (ret)
5046                         return ret;
5047         }
5048
5049         pipe_wm->linetime = skl_compute_linetime_wm(cstate);
5050
5051         return 0;
5052 }
5053
5054 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
5055                                 i915_reg_t reg,
5056                                 const struct skl_ddb_entry *entry)
5057 {
5058         if (entry->end)
5059                 I915_WRITE_FW(reg, (entry->end - 1) << 16 | entry->start);
5060         else
5061                 I915_WRITE_FW(reg, 0);
5062 }
5063
5064 static void skl_write_wm_level(struct drm_i915_private *dev_priv,
5065                                i915_reg_t reg,
5066                                const struct skl_wm_level *level)
5067 {
5068         u32 val = 0;
5069
5070         if (level->plane_en)
5071                 val |= PLANE_WM_EN;
5072         if (level->ignore_lines)
5073                 val |= PLANE_WM_IGNORE_LINES;
5074         val |= level->plane_res_b;
5075         val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
5076
5077         I915_WRITE_FW(reg, val);
5078 }
5079
5080 void skl_write_plane_wm(struct intel_plane *plane,
5081                         const struct intel_crtc_state *crtc_state)
5082 {
5083         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5084         int level, max_level = ilk_wm_max_level(dev_priv);
5085         enum plane_id plane_id = plane->id;
5086         enum pipe pipe = plane->pipe;
5087         const struct skl_plane_wm *wm =
5088                 &crtc_state->wm.skl.optimal.planes[plane_id];
5089         const struct skl_ddb_entry *ddb_y =
5090                 &crtc_state->wm.skl.plane_ddb_y[plane_id];
5091         const struct skl_ddb_entry *ddb_uv =
5092                 &crtc_state->wm.skl.plane_ddb_uv[plane_id];
5093
5094         for (level = 0; level <= max_level; level++) {
5095                 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
5096                                    &wm->wm[level]);
5097         }
5098         skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
5099                            &wm->trans_wm);
5100
5101         if (INTEL_GEN(dev_priv) >= 11) {
5102                 skl_ddb_entry_write(dev_priv,
5103                                     PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5104                 return;
5105         }
5106
5107         if (wm->is_planar)
5108                 swap(ddb_y, ddb_uv);
5109
5110         skl_ddb_entry_write(dev_priv,
5111                             PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5112         skl_ddb_entry_write(dev_priv,
5113                             PLANE_NV12_BUF_CFG(pipe, plane_id), ddb_uv);
5114 }
5115
5116 void skl_write_cursor_wm(struct intel_plane *plane,
5117                          const struct intel_crtc_state *crtc_state)
5118 {
5119         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5120         int level, max_level = ilk_wm_max_level(dev_priv);
5121         enum plane_id plane_id = plane->id;
5122         enum pipe pipe = plane->pipe;
5123         const struct skl_plane_wm *wm =
5124                 &crtc_state->wm.skl.optimal.planes[plane_id];
5125         const struct skl_ddb_entry *ddb =
5126                 &crtc_state->wm.skl.plane_ddb_y[plane_id];
5127
5128         for (level = 0; level <= max_level; level++) {
5129                 skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
5130                                    &wm->wm[level]);
5131         }
5132         skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
5133
5134         skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), ddb);
5135 }
5136
5137 bool skl_wm_level_equals(const struct skl_wm_level *l1,
5138                          const struct skl_wm_level *l2)
5139 {
5140         return l1->plane_en == l2->plane_en &&
5141                 l1->ignore_lines == l2->ignore_lines &&
5142                 l1->plane_res_l == l2->plane_res_l &&
5143                 l1->plane_res_b == l2->plane_res_b;
5144 }
5145
5146 static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,
5147                                 const struct skl_plane_wm *wm1,
5148                                 const struct skl_plane_wm *wm2)
5149 {
5150         int level, max_level = ilk_wm_max_level(dev_priv);
5151
5152         for (level = 0; level <= max_level; level++) {
5153                 if (!skl_wm_level_equals(&wm1->wm[level], &wm2->wm[level]) ||
5154                     !skl_wm_level_equals(&wm1->uv_wm[level], &wm2->uv_wm[level]))
5155                         return false;
5156         }
5157
5158         return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);
5159 }
5160
5161 static bool skl_pipe_wm_equals(struct intel_crtc *crtc,
5162                                const struct skl_pipe_wm *wm1,
5163                                const struct skl_pipe_wm *wm2)
5164 {
5165         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5166         enum plane_id plane_id;
5167
5168         for_each_plane_id_on_crtc(crtc, plane_id) {
5169                 if (!skl_plane_wm_equals(dev_priv,
5170                                          &wm1->planes[plane_id],
5171                                          &wm2->planes[plane_id]))
5172                         return false;
5173         }
5174
5175         return wm1->linetime == wm2->linetime;
5176 }
5177
5178 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
5179                                            const struct skl_ddb_entry *b)
5180 {
5181         return a->start < b->end && b->start < a->end;
5182 }
5183
5184 bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb,
5185                                  const struct skl_ddb_entry entries[],
5186                                  int num_entries, int ignore_idx)
5187 {
5188         int i;
5189
5190         for (i = 0; i < num_entries; i++) {
5191                 if (i != ignore_idx &&
5192                     skl_ddb_entries_overlap(ddb, &entries[i]))
5193                         return true;
5194         }
5195
5196         return false;
5197 }
5198
5199 static int skl_update_pipe_wm(struct intel_crtc_state *cstate,
5200                               const struct skl_pipe_wm *old_pipe_wm,
5201                               struct skl_pipe_wm *pipe_wm, /* out */
5202                               bool *changed /* out */)
5203 {
5204         struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
5205         int ret;
5206
5207         ret = skl_build_pipe_wm(cstate, pipe_wm);
5208         if (ret)
5209                 return ret;
5210
5211         *changed = !skl_pipe_wm_equals(crtc, old_pipe_wm, pipe_wm);
5212
5213         return 0;
5214 }
5215
5216 static u32
5217 pipes_modified(struct intel_atomic_state *state)
5218 {
5219         struct intel_crtc *crtc;
5220         struct intel_crtc_state *cstate;
5221         u32 i, ret = 0;
5222
5223         for_each_new_intel_crtc_in_state(state, crtc, cstate, i)
5224                 ret |= drm_crtc_mask(&crtc->base);
5225
5226         return ret;
5227 }
5228
5229 static int
5230 skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state,
5231                             struct intel_crtc_state *new_crtc_state)
5232 {
5233         struct intel_atomic_state *state = to_intel_atomic_state(new_crtc_state->base.state);
5234         struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
5235         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5236         struct intel_plane *plane;
5237
5238         for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5239                 struct intel_plane_state *plane_state;
5240                 enum plane_id plane_id = plane->id;
5241
5242                 if (skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_y[plane_id],
5243                                         &new_crtc_state->wm.skl.plane_ddb_y[plane_id]) &&
5244                     skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_uv[plane_id],
5245                                         &new_crtc_state->wm.skl.plane_ddb_uv[plane_id]))
5246                         continue;
5247
5248                 plane_state = intel_atomic_get_plane_state(state, plane);
5249                 if (IS_ERR(plane_state))
5250                         return PTR_ERR(plane_state);
5251
5252                 new_crtc_state->update_planes |= BIT(plane_id);
5253         }
5254
5255         return 0;
5256 }
5257
5258 static int
5259 skl_compute_ddb(struct intel_atomic_state *state)
5260 {
5261         const struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5262         struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
5263         struct intel_crtc_state *old_crtc_state;
5264         struct intel_crtc_state *new_crtc_state;
5265         struct intel_crtc *crtc;
5266         int ret, i;
5267
5268         memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
5269
5270         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5271                                             new_crtc_state, i) {
5272                 ret = skl_allocate_pipe_ddb(new_crtc_state, ddb);
5273                 if (ret)
5274                         return ret;
5275
5276                 ret = skl_ddb_add_affected_planes(old_crtc_state,
5277                                                   new_crtc_state);
5278                 if (ret)
5279                         return ret;
5280         }
5281
5282         return 0;
5283 }
5284
5285 static char enast(bool enable)
5286 {
5287         return enable ? '*' : ' ';
5288 }
5289
5290 static void
5291 skl_print_wm_changes(struct intel_atomic_state *state)
5292 {
5293         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5294         const struct intel_crtc_state *old_crtc_state;
5295         const struct intel_crtc_state *new_crtc_state;
5296         struct intel_plane *plane;
5297         struct intel_crtc *crtc;
5298         int i;
5299
5300         if ((drm_debug & DRM_UT_KMS) == 0)
5301                 return;
5302
5303         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5304                                             new_crtc_state, i) {
5305                 const struct skl_pipe_wm *old_pipe_wm, *new_pipe_wm;
5306
5307                 old_pipe_wm = &old_crtc_state->wm.skl.optimal;
5308                 new_pipe_wm = &new_crtc_state->wm.skl.optimal;
5309
5310                 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5311                         enum plane_id plane_id = plane->id;
5312                         const struct skl_ddb_entry *old, *new;
5313
5314                         old = &old_crtc_state->wm.skl.plane_ddb_y[plane_id];
5315                         new = &new_crtc_state->wm.skl.plane_ddb_y[plane_id];
5316
5317                         if (skl_ddb_entry_equal(old, new))
5318                                 continue;
5319
5320                         DRM_DEBUG_KMS("[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n",
5321                                       plane->base.base.id, plane->base.name,
5322                                       old->start, old->end, new->start, new->end,
5323                                       skl_ddb_entry_size(old), skl_ddb_entry_size(new));
5324                 }
5325
5326                 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5327                         enum plane_id plane_id = plane->id;
5328                         const struct skl_plane_wm *old_wm, *new_wm;
5329
5330                         old_wm = &old_pipe_wm->planes[plane_id];
5331                         new_wm = &new_pipe_wm->planes[plane_id];
5332
5333                         if (skl_plane_wm_equals(dev_priv, old_wm, new_wm))
5334                                 continue;
5335
5336                         DRM_DEBUG_KMS("[PLANE:%d:%s]   level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm"
5337                                       " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n",
5338                                       plane->base.base.id, plane->base.name,
5339                                       enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en),
5340                                       enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en),
5341                                       enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en),
5342                                       enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en),
5343                                       enast(old_wm->trans_wm.plane_en),
5344                                       enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en),
5345                                       enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en),
5346                                       enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en),
5347                                       enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en),
5348                                       enast(new_wm->trans_wm.plane_en));
5349
5350                         DRM_DEBUG_KMS("[PLANE:%d:%s]   lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d"
5351                                       " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n",
5352                                       plane->base.base.id, plane->base.name,
5353                                       enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l,
5354                                       enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l,
5355                                       enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l,
5356                                       enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l,
5357                                       enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l,
5358                                       enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l,
5359                                       enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l,
5360                                       enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l,
5361                                       enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l,
5362
5363                                       enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l,
5364                                       enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l,
5365                                       enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l,
5366                                       enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l,
5367                                       enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l,
5368                                       enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l,
5369                                       enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l,
5370                                       enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l,
5371                                       enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l);
5372
5373                         DRM_DEBUG_KMS("[PLANE:%d:%s]  blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
5374                                       " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
5375                                       plane->base.base.id, plane->base.name,
5376                                       old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b,
5377                                       old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b,
5378                                       old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b,
5379                                       old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b,
5380                                       old_wm->trans_wm.plane_res_b,
5381                                       new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b,
5382                                       new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b,
5383                                       new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b,
5384                                       new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b,
5385                                       new_wm->trans_wm.plane_res_b);
5386
5387                         DRM_DEBUG_KMS("[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
5388                                       " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
5389                                       plane->base.base.id, plane->base.name,
5390                                       old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc,
5391                                       old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc,
5392                                       old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc,
5393                                       old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc,
5394                                       old_wm->trans_wm.min_ddb_alloc,
5395                                       new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc,
5396                                       new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc,
5397                                       new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc,
5398                                       new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc,
5399                                       new_wm->trans_wm.min_ddb_alloc);
5400                 }
5401         }
5402 }
5403
5404 static int
5405 skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed)
5406 {
5407         struct drm_device *dev = state->base.dev;
5408         const struct drm_i915_private *dev_priv = to_i915(dev);
5409         struct intel_crtc *crtc;
5410         struct intel_crtc_state *crtc_state;
5411         u32 realloc_pipes = pipes_modified(state);
5412         int ret, i;
5413
5414         /*
5415          * When we distrust bios wm we always need to recompute to set the
5416          * expected DDB allocations for each CRTC.
5417          */
5418         if (dev_priv->wm.distrust_bios_wm)
5419                 (*changed) = true;
5420
5421         /*
5422          * If this transaction isn't actually touching any CRTC's, don't
5423          * bother with watermark calculation.  Note that if we pass this
5424          * test, we're guaranteed to hold at least one CRTC state mutex,
5425          * which means we can safely use values like dev_priv->active_crtcs
5426          * since any racing commits that want to update them would need to
5427          * hold _all_ CRTC state mutexes.
5428          */
5429         for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i)
5430                 (*changed) = true;
5431
5432         if (!*changed)
5433                 return 0;
5434
5435         /*
5436          * If this is our first atomic update following hardware readout,
5437          * we can't trust the DDB that the BIOS programmed for us.  Let's
5438          * pretend that all pipes switched active status so that we'll
5439          * ensure a full DDB recompute.
5440          */
5441         if (dev_priv->wm.distrust_bios_wm) {
5442                 ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
5443                                        state->base.acquire_ctx);
5444                 if (ret)
5445                         return ret;
5446
5447                 state->active_pipe_changes = ~0;
5448
5449                 /*
5450                  * We usually only initialize state->active_crtcs if we
5451                  * we're doing a modeset; make sure this field is always
5452                  * initialized during the sanitization process that happens
5453                  * on the first commit too.
5454                  */
5455                 if (!state->modeset)
5456                         state->active_crtcs = dev_priv->active_crtcs;
5457         }
5458
5459         /*
5460          * If the modeset changes which CRTC's are active, we need to
5461          * recompute the DDB allocation for *all* active pipes, even
5462          * those that weren't otherwise being modified in any way by this
5463          * atomic commit.  Due to the shrinking of the per-pipe allocations
5464          * when new active CRTC's are added, it's possible for a pipe that
5465          * we were already using and aren't changing at all here to suddenly
5466          * become invalid if its DDB needs exceeds its new allocation.
5467          *
5468          * Note that if we wind up doing a full DDB recompute, we can't let
5469          * any other display updates race with this transaction, so we need
5470          * to grab the lock on *all* CRTC's.
5471          */
5472         if (state->active_pipe_changes || state->modeset) {
5473                 realloc_pipes = ~0;
5474                 state->wm_results.dirty_pipes = ~0;
5475         }
5476
5477         /*
5478          * We're not recomputing for the pipes not included in the commit, so
5479          * make sure we start with the current state.
5480          */
5481         for_each_intel_crtc_mask(dev, crtc, realloc_pipes) {
5482                 crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
5483                 if (IS_ERR(crtc_state))
5484                         return PTR_ERR(crtc_state);
5485         }
5486
5487         return 0;
5488 }
5489
5490 /*
5491  * To make sure the cursor watermark registers are always consistent
5492  * with our computed state the following scenario needs special
5493  * treatment:
5494  *
5495  * 1. enable cursor
5496  * 2. move cursor entirely offscreen
5497  * 3. disable cursor
5498  *
5499  * Step 2. does call .disable_plane() but does not zero the watermarks
5500  * (since we consider an offscreen cursor still active for the purposes
5501  * of watermarks). Step 3. would not normally call .disable_plane()
5502  * because the actual plane visibility isn't changing, and we don't
5503  * deallocate the cursor ddb until the pipe gets disabled. So we must
5504  * force step 3. to call .disable_plane() to update the watermark
5505  * registers properly.
5506  *
5507  * Other planes do not suffer from this issues as their watermarks are
5508  * calculated based on the actual plane visibility. The only time this
5509  * can trigger for the other planes is during the initial readout as the
5510  * default value of the watermarks registers is not zero.
5511  */
5512 static int skl_wm_add_affected_planes(struct intel_atomic_state *state,
5513                                       struct intel_crtc *crtc)
5514 {
5515         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5516         const struct intel_crtc_state *old_crtc_state =
5517                 intel_atomic_get_old_crtc_state(state, crtc);
5518         struct intel_crtc_state *new_crtc_state =
5519                 intel_atomic_get_new_crtc_state(state, crtc);
5520         struct intel_plane *plane;
5521
5522         for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5523                 struct intel_plane_state *plane_state;
5524                 enum plane_id plane_id = plane->id;
5525
5526                 /*
5527                  * Force a full wm update for every plane on modeset.
5528                  * Required because the reset value of the wm registers
5529                  * is non-zero, whereas we want all disabled planes to
5530                  * have zero watermarks. So if we turn off the relevant
5531                  * power well the hardware state will go out of sync
5532                  * with the software state.
5533                  */
5534                 if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->base) &&
5535                     skl_plane_wm_equals(dev_priv,
5536                                         &old_crtc_state->wm.skl.optimal.planes[plane_id],
5537                                         &new_crtc_state->wm.skl.optimal.planes[plane_id]))
5538                         continue;
5539
5540                 plane_state = intel_atomic_get_plane_state(state, plane);
5541                 if (IS_ERR(plane_state))
5542                         return PTR_ERR(plane_state);
5543
5544                 new_crtc_state->update_planes |= BIT(plane_id);
5545         }
5546
5547         return 0;
5548 }
5549
5550 static int
5551 skl_compute_wm(struct intel_atomic_state *state)
5552 {
5553         struct intel_crtc *crtc;
5554         struct intel_crtc_state *cstate;
5555         struct intel_crtc_state *old_crtc_state;
5556         struct skl_ddb_values *results = &state->wm_results;
5557         struct skl_pipe_wm *pipe_wm;
5558         bool changed = false;
5559         int ret, i;
5560
5561         /* Clear all dirty flags */
5562         results->dirty_pipes = 0;
5563
5564         ret = skl_ddb_add_affected_pipes(state, &changed);
5565         if (ret || !changed)
5566                 return ret;
5567
5568         /*
5569          * Calculate WM's for all pipes that are part of this transaction.
5570          * Note that skl_ddb_add_affected_pipes may have added more CRTC's that
5571          * weren't otherwise being modified (and set bits in dirty_pipes) if
5572          * pipe allocations had to change.
5573          */
5574         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5575                                             cstate, i) {
5576                 const struct skl_pipe_wm *old_pipe_wm =
5577                         &old_crtc_state->wm.skl.optimal;
5578
5579                 pipe_wm = &cstate->wm.skl.optimal;
5580                 ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm, &changed);
5581                 if (ret)
5582                         return ret;
5583
5584                 ret = skl_wm_add_affected_planes(state, crtc);
5585                 if (ret)
5586                         return ret;
5587
5588                 if (changed)
5589                         results->dirty_pipes |= drm_crtc_mask(&crtc->base);
5590         }
5591
5592         ret = skl_compute_ddb(state);
5593         if (ret)
5594                 return ret;
5595
5596         skl_print_wm_changes(state);
5597
5598         return 0;
5599 }
5600
5601 static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
5602                                       struct intel_crtc_state *cstate)
5603 {
5604         struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
5605         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5606         struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
5607         enum pipe pipe = crtc->pipe;
5608
5609         if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
5610                 return;
5611
5612         I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
5613 }
5614
5615 static void skl_initial_wm(struct intel_atomic_state *state,
5616                            struct intel_crtc_state *cstate)
5617 {
5618         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5619         struct drm_device *dev = intel_crtc->base.dev;
5620         struct drm_i915_private *dev_priv = to_i915(dev);
5621         struct skl_ddb_values *results = &state->wm_results;
5622
5623         if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
5624                 return;
5625
5626         mutex_lock(&dev_priv->wm.wm_mutex);
5627
5628         if (cstate->base.active_changed)
5629                 skl_atomic_update_crtc_wm(state, cstate);
5630
5631         mutex_unlock(&dev_priv->wm.wm_mutex);
5632 }
5633
5634 static void ilk_compute_wm_config(struct drm_i915_private *dev_priv,
5635                                   struct intel_wm_config *config)
5636 {
5637         struct intel_crtc *crtc;
5638
5639         /* Compute the currently _active_ config */
5640         for_each_intel_crtc(&dev_priv->drm, crtc) {
5641                 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
5642
5643                 if (!wm->pipe_enabled)
5644                         continue;
5645
5646                 config->sprites_enabled |= wm->sprites_enabled;
5647                 config->sprites_scaled |= wm->sprites_scaled;
5648                 config->num_pipes_active++;
5649         }
5650 }
5651
5652 static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
5653 {
5654         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
5655         struct ilk_wm_maximums max;
5656         struct intel_wm_config config = {};
5657         struct ilk_wm_values results = {};
5658         enum intel_ddb_partitioning partitioning;
5659
5660         ilk_compute_wm_config(dev_priv, &config);
5661
5662         ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_1_2, &max);
5663         ilk_wm_merge(dev_priv, &config, &max, &lp_wm_1_2);
5664
5665         /* 5/6 split only in single pipe config on IVB+ */
5666         if (INTEL_GEN(dev_priv) >= 7 &&
5667             config.num_pipes_active == 1 && config.sprites_enabled) {
5668                 ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_5_6, &max);
5669                 ilk_wm_merge(dev_priv, &config, &max, &lp_wm_5_6);
5670
5671                 best_lp_wm = ilk_find_best_result(dev_priv, &lp_wm_1_2, &lp_wm_5_6);
5672         } else {
5673                 best_lp_wm = &lp_wm_1_2;
5674         }
5675
5676         partitioning = (best_lp_wm == &lp_wm_1_2) ?
5677                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
5678
5679         ilk_compute_wm_results(dev_priv, best_lp_wm, partitioning, &results);
5680
5681         ilk_write_wm_values(dev_priv, &results);
5682 }
5683
5684 static void ilk_initial_watermarks(struct intel_atomic_state *state,
5685                                    struct intel_crtc_state *cstate)
5686 {
5687         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5688         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5689
5690         mutex_lock(&dev_priv->wm.wm_mutex);
5691         intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate;
5692         ilk_program_watermarks(dev_priv);
5693         mutex_unlock(&dev_priv->wm.wm_mutex);
5694 }
5695
5696 static void ilk_optimize_watermarks(struct intel_atomic_state *state,
5697                                     struct intel_crtc_state *cstate)
5698 {
5699         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5700         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5701
5702         mutex_lock(&dev_priv->wm.wm_mutex);
5703         if (cstate->wm.need_postvbl_update) {
5704                 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal;
5705                 ilk_program_watermarks(dev_priv);
5706         }
5707         mutex_unlock(&dev_priv->wm.wm_mutex);
5708 }
5709
5710 static inline void skl_wm_level_from_reg_val(u32 val,
5711                                              struct skl_wm_level *level)
5712 {
5713         level->plane_en = val & PLANE_WM_EN;
5714         level->ignore_lines = val & PLANE_WM_IGNORE_LINES;
5715         level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
5716         level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
5717                 PLANE_WM_LINES_MASK;
5718 }
5719
5720 void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,
5721                               struct skl_pipe_wm *out)
5722 {
5723         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5724         enum pipe pipe = crtc->pipe;
5725         int level, max_level;
5726         enum plane_id plane_id;
5727         u32 val;
5728
5729         max_level = ilk_wm_max_level(dev_priv);
5730
5731         for_each_plane_id_on_crtc(crtc, plane_id) {
5732                 struct skl_plane_wm *wm = &out->planes[plane_id];
5733
5734                 for (level = 0; level <= max_level; level++) {
5735                         if (plane_id != PLANE_CURSOR)
5736                                 val = I915_READ(PLANE_WM(pipe, plane_id, level));
5737                         else
5738                                 val = I915_READ(CUR_WM(pipe, level));
5739
5740                         skl_wm_level_from_reg_val(val, &wm->wm[level]);
5741                 }
5742
5743                 if (plane_id != PLANE_CURSOR)
5744                         val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
5745                 else
5746                         val = I915_READ(CUR_WM_TRANS(pipe));
5747
5748                 skl_wm_level_from_reg_val(val, &wm->trans_wm);
5749         }
5750
5751         if (!crtc->active)
5752                 return;
5753
5754         out->linetime = I915_READ(PIPE_WM_LINETIME(pipe));
5755 }
5756
5757 void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
5758 {
5759         struct skl_ddb_values *hw = &dev_priv->wm.skl_hw;
5760         struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
5761         struct intel_crtc *crtc;
5762         struct intel_crtc_state *cstate;
5763
5764         skl_ddb_get_hw_state(dev_priv, ddb);
5765         for_each_intel_crtc(&dev_priv->drm, crtc) {
5766                 cstate = to_intel_crtc_state(crtc->base.state);
5767
5768                 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal);
5769
5770                 if (crtc->active)
5771                         hw->dirty_pipes |= drm_crtc_mask(&crtc->base);
5772         }
5773
5774         if (dev_priv->active_crtcs) {
5775                 /* Fully recompute DDB on first atomic commit */
5776                 dev_priv->wm.distrust_bios_wm = true;
5777         }
5778 }
5779
5780 static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc)
5781 {
5782         struct drm_device *dev = crtc->base.dev;
5783         struct drm_i915_private *dev_priv = to_i915(dev);
5784         struct ilk_wm_values *hw = &dev_priv->wm.hw;
5785         struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->base.state);
5786         struct intel_pipe_wm *active = &cstate->wm.ilk.optimal;
5787         enum pipe pipe = crtc->pipe;
5788         static const i915_reg_t wm0_pipe_reg[] = {
5789                 [PIPE_A] = WM0_PIPEA_ILK,
5790                 [PIPE_B] = WM0_PIPEB_ILK,
5791                 [PIPE_C] = WM0_PIPEC_IVB,
5792         };
5793
5794         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
5795         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5796                 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
5797
5798         memset(active, 0, sizeof(*active));
5799
5800         active->pipe_enabled = crtc->active;
5801
5802         if (active->pipe_enabled) {
5803                 u32 tmp = hw->wm_pipe[pipe];
5804
5805                 /*
5806                  * For active pipes LP0 watermark is marked as
5807                  * enabled, and LP1+ watermaks as disabled since
5808                  * we can't really reverse compute them in case
5809                  * multiple pipes are active.
5810                  */
5811                 active->wm[0].enable = true;
5812                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
5813                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
5814                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
5815                 active->linetime = hw->wm_linetime[pipe];
5816         } else {
5817                 int level, max_level = ilk_wm_max_level(dev_priv);
5818
5819                 /*
5820                  * For inactive pipes, all watermark levels
5821                  * should be marked as enabled but zeroed,
5822                  * which is what we'd compute them to.
5823                  */
5824                 for (level = 0; level <= max_level; level++)
5825                         active->wm[level].enable = true;
5826         }
5827
5828         crtc->wm.active.ilk = *active;
5829 }
5830
5831 #define _FW_WM(value, plane) \
5832         (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5833 #define _FW_WM_VLV(value, plane) \
5834         (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5835
5836 static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5837                                struct g4x_wm_values *wm)
5838 {
5839         u32 tmp;
5840
5841         tmp = I915_READ(DSPFW1);
5842         wm->sr.plane = _FW_WM(tmp, SR);
5843         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5844         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5845         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5846
5847         tmp = I915_READ(DSPFW2);
5848         wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5849         wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5850         wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5851         wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5852         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5853         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5854
5855         tmp = I915_READ(DSPFW3);
5856         wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5857         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5858         wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5859         wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5860 }
5861
5862 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
5863                                struct vlv_wm_values *wm)
5864 {
5865         enum pipe pipe;
5866         u32 tmp;
5867
5868         for_each_pipe(dev_priv, pipe) {
5869                 tmp = I915_READ(VLV_DDL(pipe));
5870
5871                 wm->ddl[pipe].plane[PLANE_PRIMARY] =
5872                         (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5873                 wm->ddl[pipe].plane[PLANE_CURSOR] =
5874                         (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5875                 wm->ddl[pipe].plane[PLANE_SPRITE0] =
5876                         (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5877                 wm->ddl[pipe].plane[PLANE_SPRITE1] =
5878                         (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5879         }
5880
5881         tmp = I915_READ(DSPFW1);
5882         wm->sr.plane = _FW_WM(tmp, SR);
5883         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5884         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
5885         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
5886
5887         tmp = I915_READ(DSPFW2);
5888         wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
5889         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5890         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
5891
5892         tmp = I915_READ(DSPFW3);
5893         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5894
5895         if (IS_CHERRYVIEW(dev_priv)) {
5896                 tmp = I915_READ(DSPFW7_CHV);
5897                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5898                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5899
5900                 tmp = I915_READ(DSPFW8_CHV);
5901                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
5902                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
5903
5904                 tmp = I915_READ(DSPFW9_CHV);
5905                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
5906                 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
5907
5908                 tmp = I915_READ(DSPHOWM);
5909                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5910                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
5911                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
5912                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
5913                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5914                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5915                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5916                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5917                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5918                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5919         } else {
5920                 tmp = I915_READ(DSPFW7);
5921                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5922                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5923
5924                 tmp = I915_READ(DSPHOWM);
5925                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5926                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5927                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5928                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5929                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5930                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5931                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5932         }
5933 }
5934
5935 #undef _FW_WM
5936 #undef _FW_WM_VLV
5937
5938 void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv)
5939 {
5940         struct g4x_wm_values *wm = &dev_priv->wm.g4x;
5941         struct intel_crtc *crtc;
5942
5943         g4x_read_wm_values(dev_priv, wm);
5944
5945         wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5946
5947         for_each_intel_crtc(&dev_priv->drm, crtc) {
5948                 struct intel_crtc_state *crtc_state =
5949                         to_intel_crtc_state(crtc->base.state);
5950                 struct g4x_wm_state *active = &crtc->wm.active.g4x;
5951                 struct g4x_pipe_wm *raw;
5952                 enum pipe pipe = crtc->pipe;
5953                 enum plane_id plane_id;
5954                 int level, max_level;
5955
5956                 active->cxsr = wm->cxsr;
5957                 active->hpll_en = wm->hpll_en;
5958                 active->fbc_en = wm->fbc_en;
5959
5960                 active->sr = wm->sr;
5961                 active->hpll = wm->hpll;
5962
5963                 for_each_plane_id_on_crtc(crtc, plane_id) {
5964                         active->wm.plane[plane_id] =
5965                                 wm->pipe[pipe].plane[plane_id];
5966                 }
5967
5968                 if (wm->cxsr && wm->hpll_en)
5969                         max_level = G4X_WM_LEVEL_HPLL;
5970                 else if (wm->cxsr)
5971                         max_level = G4X_WM_LEVEL_SR;
5972                 else
5973                         max_level = G4X_WM_LEVEL_NORMAL;
5974
5975                 level = G4X_WM_LEVEL_NORMAL;
5976                 raw = &crtc_state->wm.g4x.raw[level];
5977                 for_each_plane_id_on_crtc(crtc, plane_id)
5978                         raw->plane[plane_id] = active->wm.plane[plane_id];
5979
5980                 if (++level > max_level)
5981                         goto out;
5982
5983                 raw = &crtc_state->wm.g4x.raw[level];
5984                 raw->plane[PLANE_PRIMARY] = active->sr.plane;
5985                 raw->plane[PLANE_CURSOR] = active->sr.cursor;
5986                 raw->plane[PLANE_SPRITE0] = 0;
5987                 raw->fbc = active->sr.fbc;
5988
5989                 if (++level > max_level)
5990                         goto out;
5991
5992                 raw = &crtc_state->wm.g4x.raw[level];
5993                 raw->plane[PLANE_PRIMARY] = active->hpll.plane;
5994                 raw->plane[PLANE_CURSOR] = active->hpll.cursor;
5995                 raw->plane[PLANE_SPRITE0] = 0;
5996                 raw->fbc = active->hpll.fbc;
5997
5998         out:
5999                 for_each_plane_id_on_crtc(crtc, plane_id)
6000                         g4x_raw_plane_wm_set(crtc_state, level,
6001                                              plane_id, USHRT_MAX);
6002                 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
6003
6004                 crtc_state->wm.g4x.optimal = *active;
6005                 crtc_state->wm.g4x.intermediate = *active;
6006
6007                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
6008                               pipe_name(pipe),
6009                               wm->pipe[pipe].plane[PLANE_PRIMARY],
6010                               wm->pipe[pipe].plane[PLANE_CURSOR],
6011                               wm->pipe[pipe].plane[PLANE_SPRITE0]);
6012         }
6013
6014         DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
6015                       wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
6016         DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
6017                       wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
6018         DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
6019                       yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
6020 }
6021
6022 void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
6023 {
6024         struct intel_plane *plane;
6025         struct intel_crtc *crtc;
6026
6027         mutex_lock(&dev_priv->wm.wm_mutex);
6028
6029         for_each_intel_plane(&dev_priv->drm, plane) {
6030                 struct intel_crtc *crtc =
6031                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6032                 struct intel_crtc_state *crtc_state =
6033                         to_intel_crtc_state(crtc->base.state);
6034                 struct intel_plane_state *plane_state =
6035                         to_intel_plane_state(plane->base.state);
6036                 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
6037                 enum plane_id plane_id = plane->id;
6038                 int level;
6039
6040                 if (plane_state->base.visible)
6041                         continue;
6042
6043                 for (level = 0; level < 3; level++) {
6044                         struct g4x_pipe_wm *raw =
6045                                 &crtc_state->wm.g4x.raw[level];
6046
6047                         raw->plane[plane_id] = 0;
6048                         wm_state->wm.plane[plane_id] = 0;
6049                 }
6050
6051                 if (plane_id == PLANE_PRIMARY) {
6052                         for (level = 0; level < 3; level++) {
6053                                 struct g4x_pipe_wm *raw =
6054                                         &crtc_state->wm.g4x.raw[level];
6055                                 raw->fbc = 0;
6056                         }
6057
6058                         wm_state->sr.fbc = 0;
6059                         wm_state->hpll.fbc = 0;
6060                         wm_state->fbc_en = false;
6061                 }
6062         }
6063
6064         for_each_intel_crtc(&dev_priv->drm, crtc) {
6065                 struct intel_crtc_state *crtc_state =
6066                         to_intel_crtc_state(crtc->base.state);
6067
6068                 crtc_state->wm.g4x.intermediate =
6069                         crtc_state->wm.g4x.optimal;
6070                 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
6071         }
6072
6073         g4x_program_watermarks(dev_priv);
6074
6075         mutex_unlock(&dev_priv->wm.wm_mutex);
6076 }
6077
6078 void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
6079 {
6080         struct vlv_wm_values *wm = &dev_priv->wm.vlv;
6081         struct intel_crtc *crtc;
6082         u32 val;
6083
6084         vlv_read_wm_values(dev_priv, wm);
6085
6086         wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
6087         wm->level = VLV_WM_LEVEL_PM2;
6088
6089         if (IS_CHERRYVIEW(dev_priv)) {
6090                 mutex_lock(&dev_priv->pcu_lock);
6091
6092                 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
6093                 if (val & DSP_MAXFIFO_PM5_ENABLE)
6094                         wm->level = VLV_WM_LEVEL_PM5;
6095
6096                 /*
6097                  * If DDR DVFS is disabled in the BIOS, Punit
6098                  * will never ack the request. So if that happens
6099                  * assume we don't have to enable/disable DDR DVFS
6100                  * dynamically. To test that just set the REQ_ACK
6101                  * bit to poke the Punit, but don't change the
6102                  * HIGH/LOW bits so that we don't actually change
6103                  * the current state.
6104                  */
6105                 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
6106                 val |= FORCE_DDR_FREQ_REQ_ACK;
6107                 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
6108
6109                 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
6110                               FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
6111                         DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
6112                                       "assuming DDR DVFS is disabled\n");
6113                         dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
6114                 } else {
6115                         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
6116                         if ((val & FORCE_DDR_HIGH_FREQ) == 0)
6117                                 wm->level = VLV_WM_LEVEL_DDR_DVFS;
6118                 }
6119
6120                 mutex_unlock(&dev_priv->pcu_lock);
6121         }
6122
6123         for_each_intel_crtc(&dev_priv->drm, crtc) {
6124                 struct intel_crtc_state *crtc_state =
6125                         to_intel_crtc_state(crtc->base.state);
6126                 struct vlv_wm_state *active = &crtc->wm.active.vlv;
6127                 const struct vlv_fifo_state *fifo_state =
6128                         &crtc_state->wm.vlv.fifo_state;
6129                 enum pipe pipe = crtc->pipe;
6130                 enum plane_id plane_id;
6131                 int level;
6132
6133                 vlv_get_fifo_size(crtc_state);
6134
6135                 active->num_levels = wm->level + 1;
6136                 active->cxsr = wm->cxsr;
6137
6138                 for (level = 0; level < active->num_levels; level++) {
6139                         struct g4x_pipe_wm *raw =
6140                                 &crtc_state->wm.vlv.raw[level];
6141
6142                         active->sr[level].plane = wm->sr.plane;
6143                         active->sr[level].cursor = wm->sr.cursor;
6144
6145                         for_each_plane_id_on_crtc(crtc, plane_id) {
6146                                 active->wm[level].plane[plane_id] =
6147                                         wm->pipe[pipe].plane[plane_id];
6148
6149                                 raw->plane[plane_id] =
6150                                         vlv_invert_wm_value(active->wm[level].plane[plane_id],
6151                                                             fifo_state->plane[plane_id]);
6152                         }
6153                 }
6154
6155                 for_each_plane_id_on_crtc(crtc, plane_id)
6156                         vlv_raw_plane_wm_set(crtc_state, level,
6157                                              plane_id, USHRT_MAX);
6158                 vlv_invalidate_wms(crtc, active, level);
6159
6160                 crtc_state->wm.vlv.optimal = *active;
6161                 crtc_state->wm.vlv.intermediate = *active;
6162
6163                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
6164                               pipe_name(pipe),
6165                               wm->pipe[pipe].plane[PLANE_PRIMARY],
6166                               wm->pipe[pipe].plane[PLANE_CURSOR],
6167                               wm->pipe[pipe].plane[PLANE_SPRITE0],
6168                               wm->pipe[pipe].plane[PLANE_SPRITE1]);
6169         }
6170
6171         DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
6172                       wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
6173 }
6174
6175 void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
6176 {
6177         struct intel_plane *plane;
6178         struct intel_crtc *crtc;
6179
6180         mutex_lock(&dev_priv->wm.wm_mutex);
6181
6182         for_each_intel_plane(&dev_priv->drm, plane) {
6183                 struct intel_crtc *crtc =
6184                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6185                 struct intel_crtc_state *crtc_state =
6186                         to_intel_crtc_state(crtc->base.state);
6187                 struct intel_plane_state *plane_state =
6188                         to_intel_plane_state(plane->base.state);
6189                 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
6190                 const struct vlv_fifo_state *fifo_state =
6191                         &crtc_state->wm.vlv.fifo_state;
6192                 enum plane_id plane_id = plane->id;
6193                 int level;
6194
6195                 if (plane_state->base.visible)
6196                         continue;
6197
6198                 for (level = 0; level < wm_state->num_levels; level++) {
6199                         struct g4x_pipe_wm *raw =
6200                                 &crtc_state->wm.vlv.raw[level];
6201
6202                         raw->plane[plane_id] = 0;
6203
6204                         wm_state->wm[level].plane[plane_id] =
6205                                 vlv_invert_wm_value(raw->plane[plane_id],
6206                                                     fifo_state->plane[plane_id]);
6207                 }
6208         }
6209
6210         for_each_intel_crtc(&dev_priv->drm, crtc) {
6211                 struct intel_crtc_state *crtc_state =
6212                         to_intel_crtc_state(crtc->base.state);
6213
6214                 crtc_state->wm.vlv.intermediate =
6215                         crtc_state->wm.vlv.optimal;
6216                 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
6217         }
6218
6219         vlv_program_watermarks(dev_priv);
6220
6221         mutex_unlock(&dev_priv->wm.wm_mutex);
6222 }
6223
6224 /*
6225  * FIXME should probably kill this and improve
6226  * the real watermark readout/sanitation instead
6227  */
6228 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
6229 {
6230         I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6231         I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6232         I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6233
6234         /*
6235          * Don't touch WM1S_LP_EN here.
6236          * Doing so could cause underruns.
6237          */
6238 }
6239
6240 void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv)
6241 {
6242         struct ilk_wm_values *hw = &dev_priv->wm.hw;
6243         struct intel_crtc *crtc;
6244
6245         ilk_init_lp_watermarks(dev_priv);
6246
6247         for_each_intel_crtc(&dev_priv->drm, crtc)
6248                 ilk_pipe_wm_get_hw_state(crtc);
6249
6250         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
6251         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
6252         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
6253
6254         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
6255         if (INTEL_GEN(dev_priv) >= 7) {
6256                 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
6257                 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
6258         }
6259
6260         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6261                 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
6262                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6263         else if (IS_IVYBRIDGE(dev_priv))
6264                 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
6265                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6266
6267         hw->enable_fbc_wm =
6268                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
6269 }
6270
6271 /**
6272  * intel_update_watermarks - update FIFO watermark values based on current modes
6273  * @crtc: the #intel_crtc on which to compute the WM
6274  *
6275  * Calculate watermark values for the various WM regs based on current mode
6276  * and plane configuration.
6277  *
6278  * There are several cases to deal with here:
6279  *   - normal (i.e. non-self-refresh)
6280  *   - self-refresh (SR) mode
6281  *   - lines are large relative to FIFO size (buffer can hold up to 2)
6282  *   - lines are small relative to FIFO size (buffer can hold more than 2
6283  *     lines), so need to account for TLB latency
6284  *
6285  *   The normal calculation is:
6286  *     watermark = dotclock * bytes per pixel * latency
6287  *   where latency is platform & configuration dependent (we assume pessimal
6288  *   values here).
6289  *
6290  *   The SR calculation is:
6291  *     watermark = (trunc(latency/line time)+1) * surface width *
6292  *       bytes per pixel
6293  *   where
6294  *     line time = htotal / dotclock
6295  *     surface width = hdisplay for normal plane and 64 for cursor
6296  *   and latency is assumed to be high, as above.
6297  *
6298  * The final value programmed to the register should always be rounded up,
6299  * and include an extra 2 entries to account for clock crossings.
6300  *
6301  * We don't use the sprite, so we can ignore that.  And on Crestline we have
6302  * to set the non-SR watermarks to 8.
6303  */
6304 void intel_update_watermarks(struct intel_crtc *crtc)
6305 {
6306         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
6307
6308         if (dev_priv->display.update_wm)
6309                 dev_priv->display.update_wm(crtc);
6310 }
6311
6312 void intel_enable_ipc(struct drm_i915_private *dev_priv)
6313 {
6314         u32 val;
6315
6316         if (!HAS_IPC(dev_priv))
6317                 return;
6318
6319         val = I915_READ(DISP_ARB_CTL2);
6320
6321         if (dev_priv->ipc_enabled)
6322                 val |= DISP_IPC_ENABLE;
6323         else
6324                 val &= ~DISP_IPC_ENABLE;
6325
6326         I915_WRITE(DISP_ARB_CTL2, val);
6327 }
6328
6329 void intel_init_ipc(struct drm_i915_private *dev_priv)
6330 {
6331         if (!HAS_IPC(dev_priv))
6332                 return;
6333
6334         /* Display WA #1141: SKL:all KBL:all CFL */
6335         if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv))
6336                 dev_priv->ipc_enabled = dev_priv->dram_info.symmetric_memory;
6337         else
6338                 dev_priv->ipc_enabled = true;
6339
6340         intel_enable_ipc(dev_priv);
6341 }
6342
6343 /*
6344  * Lock protecting IPS related data structures
6345  */
6346 DEFINE_SPINLOCK(mchdev_lock);
6347
6348 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
6349 {
6350         u16 rgvswctl;
6351
6352         lockdep_assert_held(&mchdev_lock);
6353
6354         rgvswctl = I915_READ16(MEMSWCTL);
6355         if (rgvswctl & MEMCTL_CMD_STS) {
6356                 DRM_DEBUG("gpu busy, RCS change rejected\n");
6357                 return false; /* still busy with another command */
6358         }
6359
6360         rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
6361                 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
6362         I915_WRITE16(MEMSWCTL, rgvswctl);
6363         POSTING_READ16(MEMSWCTL);
6364
6365         rgvswctl |= MEMCTL_CMD_STS;
6366         I915_WRITE16(MEMSWCTL, rgvswctl);
6367
6368         return true;
6369 }
6370
6371 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
6372 {
6373         u32 rgvmodectl;
6374         u8 fmax, fmin, fstart, vstart;
6375
6376         spin_lock_irq(&mchdev_lock);
6377
6378         rgvmodectl = I915_READ(MEMMODECTL);
6379
6380         /* Enable temp reporting */
6381         I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
6382         I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
6383
6384         /* 100ms RC evaluation intervals */
6385         I915_WRITE(RCUPEI, 100000);
6386         I915_WRITE(RCDNEI, 100000);
6387
6388         /* Set max/min thresholds to 90ms and 80ms respectively */
6389         I915_WRITE(RCBMAXAVG, 90000);
6390         I915_WRITE(RCBMINAVG, 80000);
6391
6392         I915_WRITE(MEMIHYST, 1);
6393
6394         /* Set up min, max, and cur for interrupt handling */
6395         fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
6396         fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
6397         fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
6398                 MEMMODE_FSTART_SHIFT;
6399
6400         vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
6401                 PXVFREQ_PX_SHIFT;
6402
6403         dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
6404         dev_priv->ips.fstart = fstart;
6405
6406         dev_priv->ips.max_delay = fstart;
6407         dev_priv->ips.min_delay = fmin;
6408         dev_priv->ips.cur_delay = fstart;
6409
6410         DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
6411                          fmax, fmin, fstart);
6412
6413         I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
6414
6415         /*
6416          * Interrupts will be enabled in ironlake_irq_postinstall
6417          */
6418
6419         I915_WRITE(VIDSTART, vstart);
6420         POSTING_READ(VIDSTART);
6421
6422         rgvmodectl |= MEMMODE_SWMODE_EN;
6423         I915_WRITE(MEMMODECTL, rgvmodectl);
6424
6425         if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
6426                 DRM_ERROR("stuck trying to change perf mode\n");
6427         mdelay(1);
6428
6429         ironlake_set_drps(dev_priv, fstart);
6430
6431         dev_priv->ips.last_count1 = I915_READ(DMIEC) +
6432                 I915_READ(DDREC) + I915_READ(CSIEC);
6433         dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
6434         dev_priv->ips.last_count2 = I915_READ(GFXEC);
6435         dev_priv->ips.last_time2 = ktime_get_raw_ns();
6436
6437         spin_unlock_irq(&mchdev_lock);
6438 }
6439
6440 static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
6441 {
6442         u16 rgvswctl;
6443
6444         spin_lock_irq(&mchdev_lock);
6445
6446         rgvswctl = I915_READ16(MEMSWCTL);
6447
6448         /* Ack interrupts, disable EFC interrupt */
6449         I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
6450         I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
6451         I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
6452         I915_WRITE(DEIIR, DE_PCU_EVENT);
6453         I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
6454
6455         /* Go back to the starting frequency */
6456         ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
6457         mdelay(1);
6458         rgvswctl |= MEMCTL_CMD_STS;
6459         I915_WRITE(MEMSWCTL, rgvswctl);
6460         mdelay(1);
6461
6462         spin_unlock_irq(&mchdev_lock);
6463 }
6464
6465 /* There's a funny hw issue where the hw returns all 0 when reading from
6466  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
6467  * ourselves, instead of doing a rmw cycle (which might result in us clearing
6468  * all limits and the gpu stuck at whatever frequency it is at atm).
6469  */
6470 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
6471 {
6472         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6473         u32 limits;
6474
6475         /* Only set the down limit when we've reached the lowest level to avoid
6476          * getting more interrupts, otherwise leave this clear. This prevents a
6477          * race in the hw when coming out of rc6: There's a tiny window where
6478          * the hw runs at the minimal clock before selecting the desired
6479          * frequency, if the down threshold expires in that window we will not
6480          * receive a down interrupt. */
6481         if (INTEL_GEN(dev_priv) >= 9) {
6482                 limits = (rps->max_freq_softlimit) << 23;
6483                 if (val <= rps->min_freq_softlimit)
6484                         limits |= (rps->min_freq_softlimit) << 14;
6485         } else {
6486                 limits = rps->max_freq_softlimit << 24;
6487                 if (val <= rps->min_freq_softlimit)
6488                         limits |= rps->min_freq_softlimit << 16;
6489         }
6490
6491         return limits;
6492 }
6493
6494 static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
6495 {
6496         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6497         u32 threshold_up = 0, threshold_down = 0; /* in % */
6498         u32 ei_up = 0, ei_down = 0;
6499
6500         lockdep_assert_held(&rps->power.mutex);
6501
6502         if (new_power == rps->power.mode)
6503                 return;
6504
6505         /* Note the units here are not exactly 1us, but 1280ns. */
6506         switch (new_power) {
6507         case LOW_POWER:
6508                 /* Upclock if more than 95% busy over 16ms */
6509                 ei_up = 16000;
6510                 threshold_up = 95;
6511
6512                 /* Downclock if less than 85% busy over 32ms */
6513                 ei_down = 32000;
6514                 threshold_down = 85;
6515                 break;
6516
6517         case BETWEEN:
6518                 /* Upclock if more than 90% busy over 13ms */
6519                 ei_up = 13000;
6520                 threshold_up = 90;
6521
6522                 /* Downclock if less than 75% busy over 32ms */
6523                 ei_down = 32000;
6524                 threshold_down = 75;
6525                 break;
6526
6527         case HIGH_POWER:
6528                 /* Upclock if more than 85% busy over 10ms */
6529                 ei_up = 10000;
6530                 threshold_up = 85;
6531
6532                 /* Downclock if less than 60% busy over 32ms */
6533                 ei_down = 32000;
6534                 threshold_down = 60;
6535                 break;
6536         }
6537
6538         /* When byt can survive without system hang with dynamic
6539          * sw freq adjustments, this restriction can be lifted.
6540          */
6541         if (IS_VALLEYVIEW(dev_priv))
6542                 goto skip_hw_write;
6543
6544         I915_WRITE(GEN6_RP_UP_EI,
6545                    GT_INTERVAL_FROM_US(dev_priv, ei_up));
6546         I915_WRITE(GEN6_RP_UP_THRESHOLD,
6547                    GT_INTERVAL_FROM_US(dev_priv,
6548                                        ei_up * threshold_up / 100));
6549
6550         I915_WRITE(GEN6_RP_DOWN_EI,
6551                    GT_INTERVAL_FROM_US(dev_priv, ei_down));
6552         I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
6553                    GT_INTERVAL_FROM_US(dev_priv,
6554                                        ei_down * threshold_down / 100));
6555
6556         I915_WRITE(GEN6_RP_CONTROL,
6557                    GEN6_RP_MEDIA_TURBO |
6558                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
6559                    GEN6_RP_MEDIA_IS_GFX |
6560                    GEN6_RP_ENABLE |
6561                    GEN6_RP_UP_BUSY_AVG |
6562                    GEN6_RP_DOWN_IDLE_AVG);
6563
6564 skip_hw_write:
6565         rps->power.mode = new_power;
6566         rps->power.up_threshold = threshold_up;
6567         rps->power.down_threshold = threshold_down;
6568 }
6569
6570 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6571 {
6572         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6573         int new_power;
6574
6575         new_power = rps->power.mode;
6576         switch (rps->power.mode) {
6577         case LOW_POWER:
6578                 if (val > rps->efficient_freq + 1 &&
6579                     val > rps->cur_freq)
6580                         new_power = BETWEEN;
6581                 break;
6582
6583         case BETWEEN:
6584                 if (val <= rps->efficient_freq &&
6585                     val < rps->cur_freq)
6586                         new_power = LOW_POWER;
6587                 else if (val >= rps->rp0_freq &&
6588                          val > rps->cur_freq)
6589                         new_power = HIGH_POWER;
6590                 break;
6591
6592         case HIGH_POWER:
6593                 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6594                     val < rps->cur_freq)
6595                         new_power = BETWEEN;
6596                 break;
6597         }
6598         /* Max/min bins are special */
6599         if (val <= rps->min_freq_softlimit)
6600                 new_power = LOW_POWER;
6601         if (val >= rps->max_freq_softlimit)
6602                 new_power = HIGH_POWER;
6603
6604         mutex_lock(&rps->power.mutex);
6605         if (rps->power.interactive)
6606                 new_power = HIGH_POWER;
6607         rps_set_power(dev_priv, new_power);
6608         mutex_unlock(&rps->power.mutex);
6609 }
6610
6611 void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
6612 {
6613         struct intel_rps *rps = &i915->gt_pm.rps;
6614
6615         if (INTEL_GEN(i915) < 6)
6616                 return;
6617
6618         mutex_lock(&rps->power.mutex);
6619         if (interactive) {
6620                 if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
6621                         rps_set_power(i915, HIGH_POWER);
6622         } else {
6623                 GEM_BUG_ON(!rps->power.interactive);
6624                 rps->power.interactive--;
6625         }
6626         mutex_unlock(&rps->power.mutex);
6627 }
6628
6629 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
6630 {
6631         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6632         u32 mask = 0;
6633
6634         /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
6635         if (val > rps->min_freq_softlimit)
6636                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
6637         if (val < rps->max_freq_softlimit)
6638                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
6639
6640         mask &= dev_priv->pm_rps_events;
6641
6642         return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
6643 }
6644
6645 /* gen6_set_rps is called to update the frequency request, but should also be
6646  * called when the range (min_delay and max_delay) is modified so that we can
6647  * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
6648 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
6649 {
6650         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6651
6652         /* min/max delay may still have been modified so be sure to
6653          * write the limits value.
6654          */
6655         if (val != rps->cur_freq) {
6656                 gen6_set_rps_thresholds(dev_priv, val);
6657
6658                 if (INTEL_GEN(dev_priv) >= 9)
6659                         I915_WRITE(GEN6_RPNSWREQ,
6660                                    GEN9_FREQUENCY(val));
6661                 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6662                         I915_WRITE(GEN6_RPNSWREQ,
6663                                    HSW_FREQUENCY(val));
6664                 else
6665                         I915_WRITE(GEN6_RPNSWREQ,
6666                                    GEN6_FREQUENCY(val) |
6667                                    GEN6_OFFSET(0) |
6668                                    GEN6_AGGRESSIVE_TURBO);
6669         }
6670
6671         /* Make sure we continue to get interrupts
6672          * until we hit the minimum or maximum frequencies.
6673          */
6674         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
6675         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6676
6677         rps->cur_freq = val;
6678         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6679
6680         return 0;
6681 }
6682
6683 static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
6684 {
6685         int err;
6686
6687         if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
6688                       "Odd GPU freq value\n"))
6689                 val &= ~1;
6690
6691         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6692
6693         if (val != dev_priv->gt_pm.rps.cur_freq) {
6694                 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
6695                 if (err)
6696                         return err;
6697
6698                 gen6_set_rps_thresholds(dev_priv, val);
6699         }
6700
6701         dev_priv->gt_pm.rps.cur_freq = val;
6702         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6703
6704         return 0;
6705 }
6706
6707 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
6708  *
6709  * * If Gfx is Idle, then
6710  * 1. Forcewake Media well.
6711  * 2. Request idle freq.
6712  * 3. Release Forcewake of Media well.
6713 */
6714 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
6715 {
6716         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6717         u32 val = rps->idle_freq;
6718         int err;
6719
6720         if (rps->cur_freq <= val)
6721                 return;
6722
6723         /* The punit delays the write of the frequency and voltage until it
6724          * determines the GPU is awake. During normal usage we don't want to
6725          * waste power changing the frequency if the GPU is sleeping (rc6).
6726          * However, the GPU and driver is now idle and we do not want to delay
6727          * switching to minimum voltage (reducing power whilst idle) as we do
6728          * not expect to be woken in the near future and so must flush the
6729          * change by waking the device.
6730          *
6731          * We choose to take the media powerwell (either would do to trick the
6732          * punit into committing the voltage change) as that takes a lot less
6733          * power than the render powerwell.
6734          */
6735         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
6736         err = valleyview_set_rps(dev_priv, val);
6737         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
6738
6739         if (err)
6740                 DRM_ERROR("Failed to set RPS for idle\n");
6741 }
6742
6743 void gen6_rps_busy(struct drm_i915_private *dev_priv)
6744 {
6745         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6746
6747         mutex_lock(&dev_priv->pcu_lock);
6748         if (rps->enabled) {
6749                 u8 freq;
6750
6751                 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
6752                         gen6_rps_reset_ei(dev_priv);
6753                 I915_WRITE(GEN6_PMINTRMSK,
6754                            gen6_rps_pm_mask(dev_priv, rps->cur_freq));
6755
6756                 gen6_enable_rps_interrupts(dev_priv);
6757
6758                 /* Use the user's desired frequency as a guide, but for better
6759                  * performance, jump directly to RPe as our starting frequency.
6760                  */
6761                 freq = max(rps->cur_freq,
6762                            rps->efficient_freq);
6763
6764                 if (intel_set_rps(dev_priv,
6765                                   clamp(freq,
6766                                         rps->min_freq_softlimit,
6767                                         rps->max_freq_softlimit)))
6768                         DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
6769         }
6770         mutex_unlock(&dev_priv->pcu_lock);
6771 }
6772
6773 void gen6_rps_idle(struct drm_i915_private *dev_priv)
6774 {
6775         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6776
6777         /* Flush our bottom-half so that it does not race with us
6778          * setting the idle frequency and so that it is bounded by
6779          * our rpm wakeref. And then disable the interrupts to stop any
6780          * futher RPS reclocking whilst we are asleep.
6781          */
6782         gen6_disable_rps_interrupts(dev_priv);
6783
6784         mutex_lock(&dev_priv->pcu_lock);
6785         if (rps->enabled) {
6786                 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6787                         vlv_set_rps_idle(dev_priv);
6788                 else
6789                         gen6_set_rps(dev_priv, rps->idle_freq);
6790                 rps->last_adj = 0;
6791                 I915_WRITE(GEN6_PMINTRMSK,
6792                            gen6_sanitize_rps_pm_mask(dev_priv, ~0));
6793         }
6794         mutex_unlock(&dev_priv->pcu_lock);
6795 }
6796
6797 void gen6_rps_boost(struct i915_request *rq)
6798 {
6799         struct intel_rps *rps = &rq->i915->gt_pm.rps;
6800         unsigned long flags;
6801         bool boost;
6802
6803         /* This is intentionally racy! We peek at the state here, then
6804          * validate inside the RPS worker.
6805          */
6806         if (!rps->enabled)
6807                 return;
6808
6809         if (i915_request_signaled(rq))
6810                 return;
6811
6812         /* Serializes with i915_request_retire() */
6813         boost = false;
6814         spin_lock_irqsave(&rq->lock, flags);
6815         if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
6816                 boost = !atomic_fetch_inc(&rps->num_waiters);
6817                 rq->waitboost = true;
6818         }
6819         spin_unlock_irqrestore(&rq->lock, flags);
6820         if (!boost)
6821                 return;
6822
6823         if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
6824                 schedule_work(&rps->work);
6825
6826         atomic_inc(&rps->boosts);
6827 }
6828
6829 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
6830 {
6831         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6832         int err;
6833
6834         lockdep_assert_held(&dev_priv->pcu_lock);
6835         GEM_BUG_ON(val > rps->max_freq);
6836         GEM_BUG_ON(val < rps->min_freq);
6837
6838         if (!rps->enabled) {
6839                 rps->cur_freq = val;
6840                 return 0;
6841         }
6842
6843         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6844                 err = valleyview_set_rps(dev_priv, val);
6845         else
6846                 err = gen6_set_rps(dev_priv, val);
6847
6848         return err;
6849 }
6850
6851 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
6852 {
6853         I915_WRITE(GEN6_RC_CONTROL, 0);
6854         I915_WRITE(GEN9_PG_ENABLE, 0);
6855 }
6856
6857 static void gen9_disable_rps(struct drm_i915_private *dev_priv)
6858 {
6859         I915_WRITE(GEN6_RP_CONTROL, 0);
6860 }
6861
6862 static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
6863 {
6864         I915_WRITE(GEN6_RC_CONTROL, 0);
6865 }
6866
6867 static void gen6_disable_rps(struct drm_i915_private *dev_priv)
6868 {
6869         I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
6870         I915_WRITE(GEN6_RP_CONTROL, 0);
6871 }
6872
6873 static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
6874 {
6875         I915_WRITE(GEN6_RC_CONTROL, 0);
6876 }
6877
6878 static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
6879 {
6880         I915_WRITE(GEN6_RP_CONTROL, 0);
6881 }
6882
6883 static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
6884 {
6885         /* We're doing forcewake before Disabling RC6,
6886          * This what the BIOS expects when going into suspend */
6887         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6888
6889         I915_WRITE(GEN6_RC_CONTROL, 0);
6890
6891         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6892 }
6893
6894 static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
6895 {
6896         I915_WRITE(GEN6_RP_CONTROL, 0);
6897 }
6898
6899 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
6900 {
6901         bool enable_rc6 = true;
6902         unsigned long rc6_ctx_base;
6903         u32 rc_ctl;
6904         int rc_sw_target;
6905
6906         rc_ctl = I915_READ(GEN6_RC_CONTROL);
6907         rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
6908                        RC_SW_TARGET_STATE_SHIFT;
6909         DRM_DEBUG_DRIVER("BIOS enabled RC states: "
6910                          "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
6911                          onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
6912                          onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
6913                          rc_sw_target);
6914
6915         if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
6916                 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
6917                 enable_rc6 = false;
6918         }
6919
6920         /*
6921          * The exact context size is not known for BXT, so assume a page size
6922          * for this check.
6923          */
6924         rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
6925         if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
6926               (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
6927                 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
6928                 enable_rc6 = false;
6929         }
6930
6931         if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
6932               ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
6933               ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
6934               ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
6935                 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
6936                 enable_rc6 = false;
6937         }
6938
6939         if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
6940             !I915_READ(GEN8_PUSHBUS_ENABLE) ||
6941             !I915_READ(GEN8_PUSHBUS_SHIFT)) {
6942                 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
6943                 enable_rc6 = false;
6944         }
6945
6946         if (!I915_READ(GEN6_GFXPAUSE)) {
6947                 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
6948                 enable_rc6 = false;
6949         }
6950
6951         if (!I915_READ(GEN8_MISC_CTRL0)) {
6952                 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
6953                 enable_rc6 = false;
6954         }
6955
6956         return enable_rc6;
6957 }
6958
6959 static bool sanitize_rc6(struct drm_i915_private *i915)
6960 {
6961         struct intel_device_info *info = mkwrite_device_info(i915);
6962
6963         /* Powersaving is controlled by the host when inside a VM */
6964         if (intel_vgpu_active(i915))
6965                 info->has_rc6 = 0;
6966
6967         if (info->has_rc6 &&
6968             IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
6969                 DRM_INFO("RC6 disabled by BIOS\n");
6970                 info->has_rc6 = 0;
6971         }
6972
6973         /*
6974          * We assume that we do not have any deep rc6 levels if we don't have
6975          * have the previous rc6 level supported, i.e. we use HAS_RC6()
6976          * as the initial coarse check for rc6 in general, moving on to
6977          * progressively finer/deeper levels.
6978          */
6979         if (!info->has_rc6 && info->has_rc6p)
6980                 info->has_rc6p = 0;
6981
6982         return info->has_rc6;
6983 }
6984
6985 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
6986 {
6987         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6988
6989         /* All of these values are in units of 50MHz */
6990
6991         /* static values from HW: RP0 > RP1 > RPn (min_freq) */
6992         if (IS_GEN9_LP(dev_priv)) {
6993                 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
6994                 rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
6995                 rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
6996                 rps->min_freq = (rp_state_cap >>  0) & 0xff;
6997         } else {
6998                 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
6999                 rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
7000                 rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
7001                 rps->min_freq = (rp_state_cap >> 16) & 0xff;
7002         }
7003         /* hw_max = RP0 until we check for overclocking */
7004         rps->max_freq = rps->rp0_freq;
7005
7006         rps->efficient_freq = rps->rp1_freq;
7007         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
7008             IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7009                 u32 ddcc_status = 0;
7010
7011                 if (sandybridge_pcode_read(dev_priv,
7012                                            HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
7013                                            &ddcc_status) == 0)
7014                         rps->efficient_freq =
7015                                 clamp_t(u8,
7016                                         ((ddcc_status >> 8) & 0xff),
7017                                         rps->min_freq,
7018                                         rps->max_freq);
7019         }
7020
7021         if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7022                 /* Store the frequency values in 16.66 MHZ units, which is
7023                  * the natural hardware unit for SKL
7024                  */
7025                 rps->rp0_freq *= GEN9_FREQ_SCALER;
7026                 rps->rp1_freq *= GEN9_FREQ_SCALER;
7027                 rps->min_freq *= GEN9_FREQ_SCALER;
7028                 rps->max_freq *= GEN9_FREQ_SCALER;
7029                 rps->efficient_freq *= GEN9_FREQ_SCALER;
7030         }
7031 }
7032
7033 static void reset_rps(struct drm_i915_private *dev_priv,
7034                       int (*set)(struct drm_i915_private *, u8))
7035 {
7036         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7037         u8 freq = rps->cur_freq;
7038
7039         /* force a reset */
7040         rps->power.mode = -1;
7041         rps->cur_freq = -1;
7042
7043         if (set(dev_priv, freq))
7044                 DRM_ERROR("Failed to reset RPS to initial values\n");
7045 }
7046
7047 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
7048 static void gen9_enable_rps(struct drm_i915_private *dev_priv)
7049 {
7050         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7051
7052         /* Program defaults and thresholds for RPS */
7053         if (IS_GEN(dev_priv, 9))
7054                 I915_WRITE(GEN6_RC_VIDEO_FREQ,
7055                         GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
7056
7057         /* 1 second timeout*/
7058         I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
7059                 GT_INTERVAL_FROM_US(dev_priv, 1000000));
7060
7061         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
7062
7063         /* Leaning on the below call to gen6_set_rps to program/setup the
7064          * Up/Down EI & threshold registers, as well as the RP_CONTROL,
7065          * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
7066         reset_rps(dev_priv, gen6_set_rps);
7067
7068         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7069 }
7070
7071 static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
7072 {
7073         struct intel_engine_cs *engine;
7074         enum intel_engine_id id;
7075         u32 rc6_mode;
7076
7077         /* 1a: Software RC state - RC0 */
7078         I915_WRITE(GEN6_RC_STATE, 0);
7079
7080         /* 1b: Get forcewake during program sequence. Although the driver
7081          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7082         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7083
7084         /* 2a: Disable RC states. */
7085         I915_WRITE(GEN6_RC_CONTROL, 0);
7086
7087         /* 2b: Program RC6 thresholds.*/
7088         if (INTEL_GEN(dev_priv) >= 10) {
7089                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
7090                 I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
7091         } else if (IS_SKYLAKE(dev_priv)) {
7092                 /*
7093                  * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
7094                  * when CPG is enabled
7095                  */
7096                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
7097         } else {
7098                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
7099         }
7100
7101         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7102         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7103         for_each_engine(engine, dev_priv, id)
7104                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7105
7106         if (HAS_GUC(dev_priv))
7107                 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
7108
7109         I915_WRITE(GEN6_RC_SLEEP, 0);
7110
7111         /*
7112          * 2c: Program Coarse Power Gating Policies.
7113          *
7114          * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
7115          * use instead is a more conservative estimate for the maximum time
7116          * it takes us to service a CS interrupt and submit a new ELSP - that
7117          * is the time which the GPU is idle waiting for the CPU to select the
7118          * next request to execute. If the idle hysteresis is less than that
7119          * interrupt service latency, the hardware will automatically gate
7120          * the power well and we will then incur the wake up cost on top of
7121          * the service latency. A similar guide from intel_pstate is that we
7122          * do not want the enable hysteresis to less than the wakeup latency.
7123          *
7124          * igt/gem_exec_nop/sequential provides a rough estimate for the
7125          * service latency, and puts it around 10us for Broadwell (and other
7126          * big core) and around 40us for Broxton (and other low power cores).
7127          * [Note that for legacy ringbuffer submission, this is less than 1us!]
7128          * However, the wakeup latency on Broxton is closer to 100us. To be
7129          * conservative, we have to factor in a context switch on top (due
7130          * to ksoftirqd).
7131          */
7132         I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
7133         I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
7134
7135         /* 3a: Enable RC6 */
7136         I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
7137
7138         /* WaRsUseTimeoutMode:cnl (pre-prod) */
7139         if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
7140                 rc6_mode = GEN7_RC_CTL_TO_MODE;
7141         else
7142                 rc6_mode = GEN6_RC_CTL_EI_MODE(1);
7143
7144         I915_WRITE(GEN6_RC_CONTROL,
7145                    GEN6_RC_CTL_HW_ENABLE |
7146                    GEN6_RC_CTL_RC6_ENABLE |
7147                    rc6_mode);
7148
7149         /*
7150          * 3b: Enable Coarse Power Gating only when RC6 is enabled.
7151          * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
7152          */
7153         if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
7154                 I915_WRITE(GEN9_PG_ENABLE, 0);
7155         else
7156                 I915_WRITE(GEN9_PG_ENABLE,
7157                            GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
7158
7159         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7160 }
7161
7162 static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
7163 {
7164         struct intel_engine_cs *engine;
7165         enum intel_engine_id id;
7166
7167         /* 1a: Software RC state - RC0 */
7168         I915_WRITE(GEN6_RC_STATE, 0);
7169
7170         /* 1b: Get forcewake during program sequence. Although the driver
7171          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7172         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7173
7174         /* 2a: Disable RC states. */
7175         I915_WRITE(GEN6_RC_CONTROL, 0);
7176
7177         /* 2b: Program RC6 thresholds.*/
7178         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7179         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7180         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7181         for_each_engine(engine, dev_priv, id)
7182                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7183         I915_WRITE(GEN6_RC_SLEEP, 0);
7184         I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
7185
7186         /* 3: Enable RC6 */
7187
7188         I915_WRITE(GEN6_RC_CONTROL,
7189                    GEN6_RC_CTL_HW_ENABLE |
7190                    GEN7_RC_CTL_TO_MODE |
7191                    GEN6_RC_CTL_RC6_ENABLE);
7192
7193         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7194 }
7195
7196 static void gen8_enable_rps(struct drm_i915_private *dev_priv)
7197 {
7198         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7199
7200         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7201
7202         /* 1 Program defaults and thresholds for RPS*/
7203         I915_WRITE(GEN6_RPNSWREQ,
7204                    HSW_FREQUENCY(rps->rp1_freq));
7205         I915_WRITE(GEN6_RC_VIDEO_FREQ,
7206                    HSW_FREQUENCY(rps->rp1_freq));
7207         /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
7208         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
7209
7210         /* Docs recommend 900MHz, and 300 MHz respectively */
7211         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
7212                    rps->max_freq_softlimit << 24 |
7213                    rps->min_freq_softlimit << 16);
7214
7215         I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
7216         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
7217         I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
7218         I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
7219
7220         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7221
7222         /* 2: Enable RPS */
7223         I915_WRITE(GEN6_RP_CONTROL,
7224                    GEN6_RP_MEDIA_TURBO |
7225                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7226                    GEN6_RP_MEDIA_IS_GFX |
7227                    GEN6_RP_ENABLE |
7228                    GEN6_RP_UP_BUSY_AVG |
7229                    GEN6_RP_DOWN_IDLE_AVG);
7230
7231         reset_rps(dev_priv, gen6_set_rps);
7232
7233         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7234 }
7235
7236 static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
7237 {
7238         struct intel_engine_cs *engine;
7239         enum intel_engine_id id;
7240         u32 rc6vids, rc6_mask;
7241         u32 gtfifodbg;
7242         int ret;
7243
7244         I915_WRITE(GEN6_RC_STATE, 0);
7245
7246         /* Clear the DBG now so we don't confuse earlier errors */
7247         gtfifodbg = I915_READ(GTFIFODBG);
7248         if (gtfifodbg) {
7249                 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
7250                 I915_WRITE(GTFIFODBG, gtfifodbg);
7251         }
7252
7253         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7254
7255         /* disable the counters and set deterministic thresholds */
7256         I915_WRITE(GEN6_RC_CONTROL, 0);
7257
7258         I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
7259         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
7260         I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
7261         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7262         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7263
7264         for_each_engine(engine, dev_priv, id)
7265                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7266
7267         I915_WRITE(GEN6_RC_SLEEP, 0);
7268         I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
7269         if (IS_IVYBRIDGE(dev_priv))
7270                 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
7271         else
7272                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
7273         I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
7274         I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
7275
7276         /* We don't use those on Haswell */
7277         rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
7278         if (HAS_RC6p(dev_priv))
7279                 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
7280         if (HAS_RC6pp(dev_priv))
7281                 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
7282         I915_WRITE(GEN6_RC_CONTROL,
7283                    rc6_mask |
7284                    GEN6_RC_CTL_EI_MODE(1) |
7285                    GEN6_RC_CTL_HW_ENABLE);
7286
7287         rc6vids = 0;
7288         ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
7289         if (IS_GEN(dev_priv, 6) && ret) {
7290                 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
7291         } else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
7292                 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
7293                           GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
7294                 rc6vids &= 0xffff00;
7295                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
7296                 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
7297                 if (ret)
7298                         DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
7299         }
7300
7301         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7302 }
7303
7304 static void gen6_enable_rps(struct drm_i915_private *dev_priv)
7305 {
7306         /* Here begins a magic sequence of register writes to enable
7307          * auto-downclocking.
7308          *
7309          * Perhaps there might be some value in exposing these to
7310          * userspace...
7311          */
7312         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7313
7314         /* Power down if completely idle for over 50ms */
7315         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
7316         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7317
7318         reset_rps(dev_priv, gen6_set_rps);
7319
7320         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7321 }
7322
7323 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
7324 {
7325         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7326         const int min_freq = 15;
7327         const int scaling_factor = 180;
7328         unsigned int gpu_freq;
7329         unsigned int max_ia_freq, min_ring_freq;
7330         unsigned int max_gpu_freq, min_gpu_freq;
7331         struct cpufreq_policy *policy;
7332
7333         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
7334
7335         if (rps->max_freq <= rps->min_freq)
7336                 return;
7337
7338         policy = cpufreq_cpu_get(0);
7339         if (policy) {
7340                 max_ia_freq = policy->cpuinfo.max_freq;
7341                 cpufreq_cpu_put(policy);
7342         } else {
7343                 /*
7344                  * Default to measured freq if none found, PCU will ensure we
7345                  * don't go over
7346                  */
7347                 max_ia_freq = tsc_khz;
7348         }
7349
7350         /* Convert from kHz to MHz */
7351         max_ia_freq /= 1000;
7352
7353         min_ring_freq = I915_READ(DCLK) & 0xf;
7354         /* convert DDR frequency from units of 266.6MHz to bandwidth */
7355         min_ring_freq = mult_frac(min_ring_freq, 8, 3);
7356
7357         min_gpu_freq = rps->min_freq;
7358         max_gpu_freq = rps->max_freq;
7359         if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7360                 /* Convert GT frequency to 50 HZ units */
7361                 min_gpu_freq /= GEN9_FREQ_SCALER;
7362                 max_gpu_freq /= GEN9_FREQ_SCALER;
7363         }
7364
7365         /*
7366          * For each potential GPU frequency, load a ring frequency we'd like
7367          * to use for memory access.  We do this by specifying the IA frequency
7368          * the PCU should use as a reference to determine the ring frequency.
7369          */
7370         for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
7371                 const int diff = max_gpu_freq - gpu_freq;
7372                 unsigned int ia_freq = 0, ring_freq = 0;
7373
7374                 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7375                         /*
7376                          * ring_freq = 2 * GT. ring_freq is in 100MHz units
7377                          * No floor required for ring frequency on SKL.
7378                          */
7379                         ring_freq = gpu_freq;
7380                 } else if (INTEL_GEN(dev_priv) >= 8) {
7381                         /* max(2 * GT, DDR). NB: GT is 50MHz units */
7382                         ring_freq = max(min_ring_freq, gpu_freq);
7383                 } else if (IS_HASWELL(dev_priv)) {
7384                         ring_freq = mult_frac(gpu_freq, 5, 4);
7385                         ring_freq = max(min_ring_freq, ring_freq);
7386                         /* leave ia_freq as the default, chosen by cpufreq */
7387                 } else {
7388                         /* On older processors, there is no separate ring
7389                          * clock domain, so in order to boost the bandwidth
7390                          * of the ring, we need to upclock the CPU (ia_freq).
7391                          *
7392                          * For GPU frequencies less than 750MHz,
7393                          * just use the lowest ring freq.
7394                          */
7395                         if (gpu_freq < min_freq)
7396                                 ia_freq = 800;
7397                         else
7398                                 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
7399                         ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
7400                 }
7401
7402                 sandybridge_pcode_write(dev_priv,
7403                                         GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
7404                                         ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
7405                                         ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
7406                                         gpu_freq);
7407         }
7408 }
7409
7410 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
7411 {
7412         u32 val, rp0;
7413
7414         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7415
7416         switch (RUNTIME_INFO(dev_priv)->sseu.eu_total) {
7417         case 8:
7418                 /* (2 * 4) config */
7419                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
7420                 break;
7421         case 12:
7422                 /* (2 * 6) config */
7423                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
7424                 break;
7425         case 16:
7426                 /* (2 * 8) config */
7427         default:
7428                 /* Setting (2 * 8) Min RP0 for any other combination */
7429                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
7430                 break;
7431         }
7432
7433         rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
7434
7435         return rp0;
7436 }
7437
7438 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7439 {
7440         u32 val, rpe;
7441
7442         val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
7443         rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
7444
7445         return rpe;
7446 }
7447
7448 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
7449 {
7450         u32 val, rp1;
7451
7452         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7453         rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
7454
7455         return rp1;
7456 }
7457
7458 static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
7459 {
7460         u32 val, rpn;
7461
7462         val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
7463         rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
7464                        FB_GFX_FREQ_FUSE_MASK);
7465
7466         return rpn;
7467 }
7468
7469 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
7470 {
7471         u32 val, rp1;
7472
7473         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7474
7475         rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
7476
7477         return rp1;
7478 }
7479
7480 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
7481 {
7482         u32 val, rp0;
7483
7484         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7485
7486         rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
7487         /* Clamp to max */
7488         rp0 = min_t(u32, rp0, 0xea);
7489
7490         return rp0;
7491 }
7492
7493 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7494 {
7495         u32 val, rpe;
7496
7497         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
7498         rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
7499         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
7500         rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
7501
7502         return rpe;
7503 }
7504
7505 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
7506 {
7507         u32 val;
7508
7509         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
7510         /*
7511          * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
7512          * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
7513          * a BYT-M B0 the above register contains 0xbf. Moreover when setting
7514          * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
7515          * to make sure it matches what Punit accepts.
7516          */
7517         return max_t(u32, val, 0xc0);
7518 }
7519
7520 /* Check that the pctx buffer wasn't move under us. */
7521 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
7522 {
7523         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7524
7525         WARN_ON(pctx_addr != dev_priv->dsm.start +
7526                              dev_priv->vlv_pctx->stolen->start);
7527 }
7528
7529
7530 /* Check that the pcbr address is not empty. */
7531 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
7532 {
7533         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7534
7535         WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
7536 }
7537
7538 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
7539 {
7540         resource_size_t pctx_paddr, paddr;
7541         resource_size_t pctx_size = 32*1024;
7542         u32 pcbr;
7543
7544         pcbr = I915_READ(VLV_PCBR);
7545         if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
7546                 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7547                 paddr = dev_priv->dsm.end + 1 - pctx_size;
7548                 GEM_BUG_ON(paddr > U32_MAX);
7549
7550                 pctx_paddr = (paddr & (~4095));
7551                 I915_WRITE(VLV_PCBR, pctx_paddr);
7552         }
7553
7554         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7555 }
7556
7557 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
7558 {
7559         struct drm_i915_gem_object *pctx;
7560         resource_size_t pctx_paddr;
7561         resource_size_t pctx_size = 24*1024;
7562         u32 pcbr;
7563
7564         pcbr = I915_READ(VLV_PCBR);
7565         if (pcbr) {
7566                 /* BIOS set it up already, grab the pre-alloc'd space */
7567                 resource_size_t pcbr_offset;
7568
7569                 pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
7570                 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
7571                                                                       pcbr_offset,
7572                                                                       I915_GTT_OFFSET_NONE,
7573                                                                       pctx_size);
7574                 goto out;
7575         }
7576
7577         DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7578
7579         /*
7580          * From the Gunit register HAS:
7581          * The Gfx driver is expected to program this register and ensure
7582          * proper allocation within Gfx stolen memory.  For example, this
7583          * register should be programmed such than the PCBR range does not
7584          * overlap with other ranges, such as the frame buffer, protected
7585          * memory, or any other relevant ranges.
7586          */
7587         pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
7588         if (!pctx) {
7589                 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
7590                 goto out;
7591         }
7592
7593         GEM_BUG_ON(range_overflows_t(u64,
7594                                      dev_priv->dsm.start,
7595                                      pctx->stolen->start,
7596                                      U32_MAX));
7597         pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
7598         I915_WRITE(VLV_PCBR, pctx_paddr);
7599
7600 out:
7601         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7602         dev_priv->vlv_pctx = pctx;
7603 }
7604
7605 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
7606 {
7607         struct drm_i915_gem_object *pctx;
7608
7609         pctx = fetch_and_zero(&dev_priv->vlv_pctx);
7610         if (pctx)
7611                 i915_gem_object_put(pctx);
7612 }
7613
7614 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
7615 {
7616         dev_priv->gt_pm.rps.gpll_ref_freq =
7617                 vlv_get_cck_clock(dev_priv, "GPLL ref",
7618                                   CCK_GPLL_CLOCK_CONTROL,
7619                                   dev_priv->czclk_freq);
7620
7621         DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
7622                          dev_priv->gt_pm.rps.gpll_ref_freq);
7623 }
7624
7625 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
7626 {
7627         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7628         u32 val;
7629
7630         valleyview_setup_pctx(dev_priv);
7631
7632         vlv_init_gpll_ref_freq(dev_priv);
7633
7634         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7635         switch ((val >> 6) & 3) {
7636         case 0:
7637         case 1:
7638                 dev_priv->mem_freq = 800;
7639                 break;
7640         case 2:
7641                 dev_priv->mem_freq = 1066;
7642                 break;
7643         case 3:
7644                 dev_priv->mem_freq = 1333;
7645                 break;
7646         }
7647         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7648
7649         rps->max_freq = valleyview_rps_max_freq(dev_priv);
7650         rps->rp0_freq = rps->max_freq;
7651         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7652                          intel_gpu_freq(dev_priv, rps->max_freq),
7653                          rps->max_freq);
7654
7655         rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
7656         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7657                          intel_gpu_freq(dev_priv, rps->efficient_freq),
7658                          rps->efficient_freq);
7659
7660         rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
7661         DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
7662                          intel_gpu_freq(dev_priv, rps->rp1_freq),
7663                          rps->rp1_freq);
7664
7665         rps->min_freq = valleyview_rps_min_freq(dev_priv);
7666         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7667                          intel_gpu_freq(dev_priv, rps->min_freq),
7668                          rps->min_freq);
7669 }
7670
7671 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
7672 {
7673         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7674         u32 val;
7675
7676         cherryview_setup_pctx(dev_priv);
7677
7678         vlv_init_gpll_ref_freq(dev_priv);
7679
7680         mutex_lock(&dev_priv->sb_lock);
7681         val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
7682         mutex_unlock(&dev_priv->sb_lock);
7683
7684         switch ((val >> 2) & 0x7) {
7685         case 3:
7686                 dev_priv->mem_freq = 2000;
7687                 break;
7688         default:
7689                 dev_priv->mem_freq = 1600;
7690                 break;
7691         }
7692         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7693
7694         rps->max_freq = cherryview_rps_max_freq(dev_priv);
7695         rps->rp0_freq = rps->max_freq;
7696         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7697                          intel_gpu_freq(dev_priv, rps->max_freq),
7698                          rps->max_freq);
7699
7700         rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
7701         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7702                          intel_gpu_freq(dev_priv, rps->efficient_freq),
7703                          rps->efficient_freq);
7704
7705         rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
7706         DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
7707                          intel_gpu_freq(dev_priv, rps->rp1_freq),
7708                          rps->rp1_freq);
7709
7710         rps->min_freq = cherryview_rps_min_freq(dev_priv);
7711         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7712                          intel_gpu_freq(dev_priv, rps->min_freq),
7713                          rps->min_freq);
7714
7715         WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
7716                    rps->min_freq) & 1,
7717                   "Odd GPU freq values\n");
7718 }
7719
7720 static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
7721 {
7722         valleyview_cleanup_pctx(dev_priv);
7723 }
7724
7725 static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
7726 {
7727         struct intel_engine_cs *engine;
7728         enum intel_engine_id id;
7729         u32 gtfifodbg, rc6_mode, pcbr;
7730
7731         gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
7732                                              GT_FIFO_FREE_ENTRIES_CHV);
7733         if (gtfifodbg) {
7734                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7735                                  gtfifodbg);
7736                 I915_WRITE(GTFIFODBG, gtfifodbg);
7737         }
7738
7739         cherryview_check_pctx(dev_priv);
7740
7741         /* 1a & 1b: Get forcewake during program sequence. Although the driver
7742          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7743         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7744
7745         /*  Disable RC states. */
7746         I915_WRITE(GEN6_RC_CONTROL, 0);
7747
7748         /* 2a: Program RC6 thresholds.*/
7749         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7750         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7751         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7752
7753         for_each_engine(engine, dev_priv, id)
7754                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7755         I915_WRITE(GEN6_RC_SLEEP, 0);
7756
7757         /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
7758         I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
7759
7760         /* Allows RC6 residency counter to work */
7761         I915_WRITE(VLV_COUNTER_CONTROL,
7762                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7763                                       VLV_MEDIA_RC6_COUNT_EN |
7764                                       VLV_RENDER_RC6_COUNT_EN));
7765
7766         /* For now we assume BIOS is allocating and populating the PCBR  */
7767         pcbr = I915_READ(VLV_PCBR);
7768
7769         /* 3: Enable RC6 */
7770         rc6_mode = 0;
7771         if (pcbr >> VLV_PCBR_ADDR_SHIFT)
7772                 rc6_mode = GEN7_RC_CTL_TO_MODE;
7773         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
7774
7775         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7776 }
7777
7778 static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
7779 {
7780         u32 val;
7781
7782         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7783
7784         /* 1: Program defaults and thresholds for RPS*/
7785         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7786         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7787         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7788         I915_WRITE(GEN6_RP_UP_EI, 66000);
7789         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7790
7791         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7792
7793         /* 2: Enable RPS */
7794         I915_WRITE(GEN6_RP_CONTROL,
7795                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7796                    GEN6_RP_MEDIA_IS_GFX |
7797                    GEN6_RP_ENABLE |
7798                    GEN6_RP_UP_BUSY_AVG |
7799                    GEN6_RP_DOWN_IDLE_AVG);
7800
7801         /* Setting Fixed Bias */
7802         val = VLV_OVERRIDE_EN |
7803                   VLV_SOC_TDP_EN |
7804                   CHV_BIAS_CPU_50_SOC_50;
7805         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7806
7807         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7808
7809         /* RPS code assumes GPLL is used */
7810         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7811
7812         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7813         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7814
7815         reset_rps(dev_priv, valleyview_set_rps);
7816
7817         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7818 }
7819
7820 static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
7821 {
7822         struct intel_engine_cs *engine;
7823         enum intel_engine_id id;
7824         u32 gtfifodbg;
7825
7826         valleyview_check_pctx(dev_priv);
7827
7828         gtfifodbg = I915_READ(GTFIFODBG);
7829         if (gtfifodbg) {
7830                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7831                                  gtfifodbg);
7832                 I915_WRITE(GTFIFODBG, gtfifodbg);
7833         }
7834
7835         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7836
7837         /*  Disable RC states. */
7838         I915_WRITE(GEN6_RC_CONTROL, 0);
7839
7840         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
7841         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7842         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7843
7844         for_each_engine(engine, dev_priv, id)
7845                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7846
7847         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
7848
7849         /* Allows RC6 residency counter to work */
7850         I915_WRITE(VLV_COUNTER_CONTROL,
7851                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7852                                       VLV_MEDIA_RC0_COUNT_EN |
7853                                       VLV_RENDER_RC0_COUNT_EN |
7854                                       VLV_MEDIA_RC6_COUNT_EN |
7855                                       VLV_RENDER_RC6_COUNT_EN));
7856
7857         I915_WRITE(GEN6_RC_CONTROL,
7858                    GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
7859
7860         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7861 }
7862
7863 static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
7864 {
7865         u32 val;
7866
7867         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7868
7869         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7870         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7871         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7872         I915_WRITE(GEN6_RP_UP_EI, 66000);
7873         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7874
7875         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7876
7877         I915_WRITE(GEN6_RP_CONTROL,
7878                    GEN6_RP_MEDIA_TURBO |
7879                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7880                    GEN6_RP_MEDIA_IS_GFX |
7881                    GEN6_RP_ENABLE |
7882                    GEN6_RP_UP_BUSY_AVG |
7883                    GEN6_RP_DOWN_IDLE_CONT);
7884
7885         /* Setting Fixed Bias */
7886         val = VLV_OVERRIDE_EN |
7887                   VLV_SOC_TDP_EN |
7888                   VLV_BIAS_CPU_125_SOC_875;
7889         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7890
7891         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7892
7893         /* RPS code assumes GPLL is used */
7894         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7895
7896         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7897         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7898
7899         reset_rps(dev_priv, valleyview_set_rps);
7900
7901         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7902 }
7903
7904 static unsigned long intel_pxfreq(u32 vidfreq)
7905 {
7906         unsigned long freq;
7907         int div = (vidfreq & 0x3f0000) >> 16;
7908         int post = (vidfreq & 0x3000) >> 12;
7909         int pre = (vidfreq & 0x7);
7910
7911         if (!pre)
7912                 return 0;
7913
7914         freq = ((div * 133333) / ((1<<post) * pre));
7915
7916         return freq;
7917 }
7918
7919 static const struct cparams {
7920         u16 i;
7921         u16 t;
7922         u16 m;
7923         u16 c;
7924 } cparams[] = {
7925         { 1, 1333, 301, 28664 },
7926         { 1, 1066, 294, 24460 },
7927         { 1, 800, 294, 25192 },
7928         { 0, 1333, 276, 27605 },
7929         { 0, 1066, 276, 27605 },
7930         { 0, 800, 231, 23784 },
7931 };
7932
7933 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
7934 {
7935         u64 total_count, diff, ret;
7936         u32 count1, count2, count3, m = 0, c = 0;
7937         unsigned long now = jiffies_to_msecs(jiffies), diff1;
7938         int i;
7939
7940         lockdep_assert_held(&mchdev_lock);
7941
7942         diff1 = now - dev_priv->ips.last_time1;
7943
7944         /* Prevent division-by-zero if we are asking too fast.
7945          * Also, we don't get interesting results if we are polling
7946          * faster than once in 10ms, so just return the saved value
7947          * in such cases.
7948          */
7949         if (diff1 <= 10)
7950                 return dev_priv->ips.chipset_power;
7951
7952         count1 = I915_READ(DMIEC);
7953         count2 = I915_READ(DDREC);
7954         count3 = I915_READ(CSIEC);
7955
7956         total_count = count1 + count2 + count3;
7957
7958         /* FIXME: handle per-counter overflow */
7959         if (total_count < dev_priv->ips.last_count1) {
7960                 diff = ~0UL - dev_priv->ips.last_count1;
7961                 diff += total_count;
7962         } else {
7963                 diff = total_count - dev_priv->ips.last_count1;
7964         }
7965
7966         for (i = 0; i < ARRAY_SIZE(cparams); i++) {
7967                 if (cparams[i].i == dev_priv->ips.c_m &&
7968                     cparams[i].t == dev_priv->ips.r_t) {
7969                         m = cparams[i].m;
7970                         c = cparams[i].c;
7971                         break;
7972                 }
7973         }
7974
7975         diff = div_u64(diff, diff1);
7976         ret = ((m * diff) + c);
7977         ret = div_u64(ret, 10);
7978
7979         dev_priv->ips.last_count1 = total_count;
7980         dev_priv->ips.last_time1 = now;
7981
7982         dev_priv->ips.chipset_power = ret;
7983
7984         return ret;
7985 }
7986
7987 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
7988 {
7989         intel_wakeref_t wakeref;
7990         unsigned long val = 0;
7991
7992         if (!IS_GEN(dev_priv, 5))
7993                 return 0;
7994
7995         with_intel_runtime_pm(dev_priv, wakeref) {
7996                 spin_lock_irq(&mchdev_lock);
7997                 val = __i915_chipset_val(dev_priv);
7998                 spin_unlock_irq(&mchdev_lock);
7999         }
8000
8001         return val;
8002 }
8003
8004 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
8005 {
8006         unsigned long m, x, b;
8007         u32 tsfs;
8008
8009         tsfs = I915_READ(TSFS);
8010
8011         m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
8012         x = I915_READ8(TR1);
8013
8014         b = tsfs & TSFS_INTR_MASK;
8015
8016         return ((m * x) / 127) - b;
8017 }
8018
8019 static int _pxvid_to_vd(u8 pxvid)
8020 {
8021         if (pxvid == 0)
8022                 return 0;
8023
8024         if (pxvid >= 8 && pxvid < 31)
8025                 pxvid = 31;
8026
8027         return (pxvid + 2) * 125;
8028 }
8029
8030 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
8031 {
8032         const int vd = _pxvid_to_vd(pxvid);
8033         const int vm = vd - 1125;
8034
8035         if (INTEL_INFO(dev_priv)->is_mobile)
8036                 return vm > 0 ? vm : 0;
8037
8038         return vd;
8039 }
8040
8041 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
8042 {
8043         u64 now, diff, diffms;
8044         u32 count;
8045
8046         lockdep_assert_held(&mchdev_lock);
8047
8048         now = ktime_get_raw_ns();
8049         diffms = now - dev_priv->ips.last_time2;
8050         do_div(diffms, NSEC_PER_MSEC);
8051
8052         /* Don't divide by 0 */
8053         if (!diffms)
8054                 return;
8055
8056         count = I915_READ(GFXEC);
8057
8058         if (count < dev_priv->ips.last_count2) {
8059                 diff = ~0UL - dev_priv->ips.last_count2;
8060                 diff += count;
8061         } else {
8062                 diff = count - dev_priv->ips.last_count2;
8063         }
8064
8065         dev_priv->ips.last_count2 = count;
8066         dev_priv->ips.last_time2 = now;
8067
8068         /* More magic constants... */
8069         diff = diff * 1181;
8070         diff = div_u64(diff, diffms * 10);
8071         dev_priv->ips.gfx_power = diff;
8072 }
8073
8074 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
8075 {
8076         intel_wakeref_t wakeref;
8077
8078         if (!IS_GEN(dev_priv, 5))
8079                 return;
8080
8081         with_intel_runtime_pm(dev_priv, wakeref) {
8082                 spin_lock_irq(&mchdev_lock);
8083                 __i915_update_gfx_val(dev_priv);
8084                 spin_unlock_irq(&mchdev_lock);
8085         }
8086 }
8087
8088 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
8089 {
8090         unsigned long t, corr, state1, corr2, state2;
8091         u32 pxvid, ext_v;
8092
8093         lockdep_assert_held(&mchdev_lock);
8094
8095         pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
8096         pxvid = (pxvid >> 24) & 0x7f;
8097         ext_v = pvid_to_extvid(dev_priv, pxvid);
8098
8099         state1 = ext_v;
8100
8101         t = i915_mch_val(dev_priv);
8102
8103         /* Revel in the empirically derived constants */
8104
8105         /* Correction factor in 1/100000 units */
8106         if (t > 80)
8107                 corr = ((t * 2349) + 135940);
8108         else if (t >= 50)
8109                 corr = ((t * 964) + 29317);
8110         else /* < 50 */
8111                 corr = ((t * 301) + 1004);
8112
8113         corr = corr * ((150142 * state1) / 10000 - 78642);
8114         corr /= 100000;
8115         corr2 = (corr * dev_priv->ips.corr);
8116
8117         state2 = (corr2 * state1) / 10000;
8118         state2 /= 100; /* convert to mW */
8119
8120         __i915_update_gfx_val(dev_priv);
8121
8122         return dev_priv->ips.gfx_power + state2;
8123 }
8124
8125 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
8126 {
8127         intel_wakeref_t wakeref;
8128         unsigned long val = 0;
8129
8130         if (!IS_GEN(dev_priv, 5))
8131                 return 0;
8132
8133         with_intel_runtime_pm(dev_priv, wakeref) {
8134                 spin_lock_irq(&mchdev_lock);
8135                 val = __i915_gfx_val(dev_priv);
8136                 spin_unlock_irq(&mchdev_lock);
8137         }
8138
8139         return val;
8140 }
8141
8142 static struct drm_i915_private *i915_mch_dev;
8143
8144 static struct drm_i915_private *mchdev_get(void)
8145 {
8146         struct drm_i915_private *i915;
8147
8148         rcu_read_lock();
8149         i915 = i915_mch_dev;
8150         if (!kref_get_unless_zero(&i915->drm.ref))
8151                 i915 = NULL;
8152         rcu_read_unlock();
8153
8154         return i915;
8155 }
8156
8157 /**
8158  * i915_read_mch_val - return value for IPS use
8159  *
8160  * Calculate and return a value for the IPS driver to use when deciding whether
8161  * we have thermal and power headroom to increase CPU or GPU power budget.
8162  */
8163 unsigned long i915_read_mch_val(void)
8164 {
8165         struct drm_i915_private *i915;
8166         unsigned long chipset_val = 0;
8167         unsigned long graphics_val = 0;
8168         intel_wakeref_t wakeref;
8169
8170         i915 = mchdev_get();
8171         if (!i915)
8172                 return 0;
8173
8174         with_intel_runtime_pm(i915, wakeref) {
8175                 spin_lock_irq(&mchdev_lock);
8176                 chipset_val = __i915_chipset_val(i915);
8177                 graphics_val = __i915_gfx_val(i915);
8178                 spin_unlock_irq(&mchdev_lock);
8179         }
8180
8181         drm_dev_put(&i915->drm);
8182         return chipset_val + graphics_val;
8183 }
8184 EXPORT_SYMBOL_GPL(i915_read_mch_val);
8185
8186 /**
8187  * i915_gpu_raise - raise GPU frequency limit
8188  *
8189  * Raise the limit; IPS indicates we have thermal headroom.
8190  */
8191 bool i915_gpu_raise(void)
8192 {
8193         struct drm_i915_private *i915;
8194
8195         i915 = mchdev_get();
8196         if (!i915)
8197                 return false;
8198
8199         spin_lock_irq(&mchdev_lock);
8200         if (i915->ips.max_delay > i915->ips.fmax)
8201                 i915->ips.max_delay--;
8202         spin_unlock_irq(&mchdev_lock);
8203
8204         drm_dev_put(&i915->drm);
8205         return true;
8206 }
8207 EXPORT_SYMBOL_GPL(i915_gpu_raise);
8208
8209 /**
8210  * i915_gpu_lower - lower GPU frequency limit
8211  *
8212  * IPS indicates we're close to a thermal limit, so throttle back the GPU
8213  * frequency maximum.
8214  */
8215 bool i915_gpu_lower(void)
8216 {
8217         struct drm_i915_private *i915;
8218
8219         i915 = mchdev_get();
8220         if (!i915)
8221                 return false;
8222
8223         spin_lock_irq(&mchdev_lock);
8224         if (i915->ips.max_delay < i915->ips.min_delay)
8225                 i915->ips.max_delay++;
8226         spin_unlock_irq(&mchdev_lock);
8227
8228         drm_dev_put(&i915->drm);
8229         return true;
8230 }
8231 EXPORT_SYMBOL_GPL(i915_gpu_lower);
8232
8233 /**
8234  * i915_gpu_busy - indicate GPU business to IPS
8235  *
8236  * Tell the IPS driver whether or not the GPU is busy.
8237  */
8238 bool i915_gpu_busy(void)
8239 {
8240         struct drm_i915_private *i915;
8241         bool ret;
8242
8243         i915 = mchdev_get();
8244         if (!i915)
8245                 return false;
8246
8247         ret = i915->gt.awake;
8248
8249         drm_dev_put(&i915->drm);
8250         return ret;
8251 }
8252 EXPORT_SYMBOL_GPL(i915_gpu_busy);
8253
8254 /**
8255  * i915_gpu_turbo_disable - disable graphics turbo
8256  *
8257  * Disable graphics turbo by resetting the max frequency and setting the
8258  * current frequency to the default.
8259  */
8260 bool i915_gpu_turbo_disable(void)
8261 {
8262         struct drm_i915_private *i915;
8263         bool ret;
8264
8265         i915 = mchdev_get();
8266         if (!i915)
8267                 return false;
8268
8269         spin_lock_irq(&mchdev_lock);
8270         i915->ips.max_delay = i915->ips.fstart;
8271         ret = ironlake_set_drps(i915, i915->ips.fstart);
8272         spin_unlock_irq(&mchdev_lock);
8273
8274         drm_dev_put(&i915->drm);
8275         return ret;
8276 }
8277 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
8278
8279 /**
8280  * Tells the intel_ips driver that the i915 driver is now loaded, if
8281  * IPS got loaded first.
8282  *
8283  * This awkward dance is so that neither module has to depend on the
8284  * other in order for IPS to do the appropriate communication of
8285  * GPU turbo limits to i915.
8286  */
8287 static void
8288 ips_ping_for_i915_load(void)
8289 {
8290         void (*link)(void);
8291
8292         link = symbol_get(ips_link_to_i915_driver);
8293         if (link) {
8294                 link();
8295                 symbol_put(ips_link_to_i915_driver);
8296         }
8297 }
8298
8299 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
8300 {
8301         /* We only register the i915 ips part with intel-ips once everything is
8302          * set up, to avoid intel-ips sneaking in and reading bogus values. */
8303         rcu_assign_pointer(i915_mch_dev, dev_priv);
8304
8305         ips_ping_for_i915_load();
8306 }
8307
8308 void intel_gpu_ips_teardown(void)
8309 {
8310         rcu_assign_pointer(i915_mch_dev, NULL);
8311 }
8312
8313 static void intel_init_emon(struct drm_i915_private *dev_priv)
8314 {
8315         u32 lcfuse;
8316         u8 pxw[16];
8317         int i;
8318
8319         /* Disable to program */
8320         I915_WRITE(ECR, 0);
8321         POSTING_READ(ECR);
8322
8323         /* Program energy weights for various events */
8324         I915_WRITE(SDEW, 0x15040d00);
8325         I915_WRITE(CSIEW0, 0x007f0000);
8326         I915_WRITE(CSIEW1, 0x1e220004);
8327         I915_WRITE(CSIEW2, 0x04000004);
8328
8329         for (i = 0; i < 5; i++)
8330                 I915_WRITE(PEW(i), 0);
8331         for (i = 0; i < 3; i++)
8332                 I915_WRITE(DEW(i), 0);
8333
8334         /* Program P-state weights to account for frequency power adjustment */
8335         for (i = 0; i < 16; i++) {
8336                 u32 pxvidfreq = I915_READ(PXVFREQ(i));
8337                 unsigned long freq = intel_pxfreq(pxvidfreq);
8338                 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
8339                         PXVFREQ_PX_SHIFT;
8340                 unsigned long val;
8341
8342                 val = vid * vid;
8343                 val *= (freq / 1000);
8344                 val *= 255;
8345                 val /= (127*127*900);
8346                 if (val > 0xff)
8347                         DRM_ERROR("bad pxval: %ld\n", val);
8348                 pxw[i] = val;
8349         }
8350         /* Render standby states get 0 weight */
8351         pxw[14] = 0;
8352         pxw[15] = 0;
8353
8354         for (i = 0; i < 4; i++) {
8355                 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
8356                         (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
8357                 I915_WRITE(PXW(i), val);
8358         }
8359
8360         /* Adjust magic regs to magic values (more experimental results) */
8361         I915_WRITE(OGW0, 0);
8362         I915_WRITE(OGW1, 0);
8363         I915_WRITE(EG0, 0x00007f00);
8364         I915_WRITE(EG1, 0x0000000e);
8365         I915_WRITE(EG2, 0x000e0000);
8366         I915_WRITE(EG3, 0x68000300);
8367         I915_WRITE(EG4, 0x42000000);
8368         I915_WRITE(EG5, 0x00140031);
8369         I915_WRITE(EG6, 0);
8370         I915_WRITE(EG7, 0);
8371
8372         for (i = 0; i < 8; i++)
8373                 I915_WRITE(PXWL(i), 0);
8374
8375         /* Enable PMON + select events */
8376         I915_WRITE(ECR, 0x80000019);
8377
8378         lcfuse = I915_READ(LCFUSE02);
8379
8380         dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
8381 }
8382
8383 void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
8384 {
8385         struct intel_rps *rps = &dev_priv->gt_pm.rps;
8386
8387         /*
8388          * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
8389          * requirement.
8390          */
8391         if (!sanitize_rc6(dev_priv)) {
8392                 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
8393                 pm_runtime_get(&dev_priv->drm.pdev->dev);
8394         }
8395
8396         mutex_lock(&dev_priv->pcu_lock);
8397
8398         /* Initialize RPS limits (for userspace) */
8399         if (IS_CHERRYVIEW(dev_priv))
8400                 cherryview_init_gt_powersave(dev_priv);
8401         else if (IS_VALLEYVIEW(dev_priv))
8402                 valleyview_init_gt_powersave(dev_priv);
8403         else if (INTEL_GEN(dev_priv) >= 6)
8404                 gen6_init_rps_frequencies(dev_priv);
8405
8406         /* Derive initial user preferences/limits from the hardware limits */
8407         rps->idle_freq = rps->min_freq;
8408         rps->cur_freq = rps->idle_freq;
8409
8410         rps->max_freq_softlimit = rps->max_freq;
8411         rps->min_freq_softlimit = rps->min_freq;
8412
8413         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
8414                 rps->min_freq_softlimit =
8415                         max_t(int,
8416                               rps->efficient_freq,
8417                               intel_freq_opcode(dev_priv, 450));
8418
8419         /* After setting max-softlimit, find the overclock max freq */
8420         if (IS_GEN(dev_priv, 6) ||
8421             IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
8422                 u32 params = 0;
8423
8424                 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
8425                 if (params & BIT(31)) { /* OC supported */
8426                         DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
8427                                          (rps->max_freq & 0xff) * 50,
8428                                          (params & 0xff) * 50);
8429                         rps->max_freq = params & 0xff;
8430                 }
8431         }
8432
8433         /* Finally allow us to boost to max by default */
8434         rps->boost_freq = rps->max_freq;
8435
8436         mutex_unlock(&dev_priv->pcu_lock);
8437 }
8438
8439 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
8440 {
8441         if (IS_VALLEYVIEW(dev_priv))
8442                 valleyview_cleanup_gt_powersave(dev_priv);
8443
8444         if (!HAS_RC6(dev_priv))
8445                 pm_runtime_put(&dev_priv->drm.pdev->dev);
8446 }
8447
8448 /**
8449  * intel_suspend_gt_powersave - suspend PM work and helper threads
8450  * @dev_priv: i915 device
8451  *
8452  * We don't want to disable RC6 or other features here, we just want
8453  * to make sure any work we've queued has finished and won't bother
8454  * us while we're suspended.
8455  */
8456 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
8457 {
8458         if (INTEL_GEN(dev_priv) < 6)
8459                 return;
8460
8461         /* gen6_rps_idle() will be called later to disable interrupts */
8462 }
8463
8464 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
8465 {
8466         dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
8467         dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
8468         intel_disable_gt_powersave(dev_priv);
8469
8470         if (INTEL_GEN(dev_priv) >= 11)
8471                 gen11_reset_rps_interrupts(dev_priv);
8472         else if (INTEL_GEN(dev_priv) >= 6)
8473                 gen6_reset_rps_interrupts(dev_priv);
8474 }
8475
8476 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
8477 {
8478         lockdep_assert_held(&i915->pcu_lock);
8479
8480         if (!i915->gt_pm.llc_pstate.enabled)
8481                 return;
8482
8483         /* Currently there is no HW configuration to be done to disable. */
8484
8485         i915->gt_pm.llc_pstate.enabled = false;
8486 }
8487
8488 static void intel_disable_rc6(struct drm_i915_private *dev_priv)
8489 {
8490         lockdep_assert_held(&dev_priv->pcu_lock);
8491
8492         if (!dev_priv->gt_pm.rc6.enabled)
8493                 return;
8494
8495         if (INTEL_GEN(dev_priv) >= 9)
8496                 gen9_disable_rc6(dev_priv);
8497         else if (IS_CHERRYVIEW(dev_priv))
8498                 cherryview_disable_rc6(dev_priv);
8499         else if (IS_VALLEYVIEW(dev_priv))
8500                 valleyview_disable_rc6(dev_priv);
8501         else if (INTEL_GEN(dev_priv) >= 6)
8502                 gen6_disable_rc6(dev_priv);
8503
8504         dev_priv->gt_pm.rc6.enabled = false;
8505 }
8506
8507 static void intel_disable_rps(struct drm_i915_private *dev_priv)
8508 {
8509         lockdep_assert_held(&dev_priv->pcu_lock);
8510
8511         if (!dev_priv->gt_pm.rps.enabled)
8512                 return;
8513
8514         if (INTEL_GEN(dev_priv) >= 9)
8515                 gen9_disable_rps(dev_priv);
8516         else if (IS_CHERRYVIEW(dev_priv))
8517                 cherryview_disable_rps(dev_priv);
8518         else if (IS_VALLEYVIEW(dev_priv))
8519                 valleyview_disable_rps(dev_priv);
8520         else if (INTEL_GEN(dev_priv) >= 6)
8521                 gen6_disable_rps(dev_priv);
8522         else if (IS_IRONLAKE_M(dev_priv))
8523                 ironlake_disable_drps(dev_priv);
8524
8525         dev_priv->gt_pm.rps.enabled = false;
8526 }
8527
8528 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
8529 {
8530         mutex_lock(&dev_priv->pcu_lock);
8531
8532         intel_disable_rc6(dev_priv);
8533         intel_disable_rps(dev_priv);
8534         if (HAS_LLC(dev_priv))
8535                 intel_disable_llc_pstate(dev_priv);
8536
8537         mutex_unlock(&dev_priv->pcu_lock);
8538 }
8539
8540 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
8541 {
8542         lockdep_assert_held(&i915->pcu_lock);
8543
8544         if (i915->gt_pm.llc_pstate.enabled)
8545                 return;
8546
8547         gen6_update_ring_freq(i915);
8548
8549         i915->gt_pm.llc_pstate.enabled = true;
8550 }
8551
8552 static void intel_enable_rc6(struct drm_i915_private *dev_priv)
8553 {
8554         lockdep_assert_held(&dev_priv->pcu_lock);
8555
8556         if (dev_priv->gt_pm.rc6.enabled)
8557                 return;
8558
8559         if (IS_CHERRYVIEW(dev_priv))
8560                 cherryview_enable_rc6(dev_priv);
8561         else if (IS_VALLEYVIEW(dev_priv))
8562                 valleyview_enable_rc6(dev_priv);
8563         else if (INTEL_GEN(dev_priv) >= 9)
8564                 gen9_enable_rc6(dev_priv);
8565         else if (IS_BROADWELL(dev_priv))
8566                 gen8_enable_rc6(dev_priv);
8567         else if (INTEL_GEN(dev_priv) >= 6)
8568                 gen6_enable_rc6(dev_priv);
8569
8570         dev_priv->gt_pm.rc6.enabled = true;
8571 }
8572
8573 static void intel_enable_rps(struct drm_i915_private *dev_priv)
8574 {
8575         struct intel_rps *rps = &dev_priv->gt_pm.rps;
8576
8577         lockdep_assert_held(&dev_priv->pcu_lock);
8578
8579         if (rps->enabled)
8580                 return;
8581
8582         if (IS_CHERRYVIEW(dev_priv)) {
8583                 cherryview_enable_rps(dev_priv);
8584         } else if (IS_VALLEYVIEW(dev_priv)) {
8585                 valleyview_enable_rps(dev_priv);
8586         } else if (INTEL_GEN(dev_priv) >= 9) {
8587                 gen9_enable_rps(dev_priv);
8588         } else if (IS_BROADWELL(dev_priv)) {
8589                 gen8_enable_rps(dev_priv);
8590         } else if (INTEL_GEN(dev_priv) >= 6) {
8591                 gen6_enable_rps(dev_priv);
8592         } else if (IS_IRONLAKE_M(dev_priv)) {
8593                 ironlake_enable_drps(dev_priv);
8594                 intel_init_emon(dev_priv);
8595         }
8596
8597         WARN_ON(rps->max_freq < rps->min_freq);
8598         WARN_ON(rps->idle_freq > rps->max_freq);
8599
8600         WARN_ON(rps->efficient_freq < rps->min_freq);
8601         WARN_ON(rps->efficient_freq > rps->max_freq);
8602
8603         rps->enabled = true;
8604 }
8605
8606 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
8607 {
8608         /* Powersaving is controlled by the host when inside a VM */
8609         if (intel_vgpu_active(dev_priv))
8610                 return;
8611
8612         mutex_lock(&dev_priv->pcu_lock);
8613
8614         if (HAS_RC6(dev_priv))
8615                 intel_enable_rc6(dev_priv);
8616         intel_enable_rps(dev_priv);
8617         if (HAS_LLC(dev_priv))
8618                 intel_enable_llc_pstate(dev_priv);
8619
8620         mutex_unlock(&dev_priv->pcu_lock);
8621 }
8622
8623 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
8624 {
8625         /*
8626          * On Ibex Peak and Cougar Point, we need to disable clock
8627          * gating for the panel power sequencer or it will fail to
8628          * start up when no ports are active.
8629          */
8630         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
8631 }
8632
8633 static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
8634 {
8635         enum pipe pipe;
8636
8637         for_each_pipe(dev_priv, pipe) {
8638                 I915_WRITE(DSPCNTR(pipe),
8639                            I915_READ(DSPCNTR(pipe)) |
8640                            DISPPLANE_TRICKLE_FEED_DISABLE);
8641
8642                 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
8643                 POSTING_READ(DSPSURF(pipe));
8644         }
8645 }
8646
8647 static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
8648 {
8649         u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8650
8651         /*
8652          * Required for FBC
8653          * WaFbcDisableDpfcClockGating:ilk
8654          */
8655         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
8656                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
8657                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
8658
8659         I915_WRITE(PCH_3DCGDIS0,
8660                    MARIUNIT_CLOCK_GATE_DISABLE |
8661                    SVSMUNIT_CLOCK_GATE_DISABLE);
8662         I915_WRITE(PCH_3DCGDIS1,
8663                    VFMUNIT_CLOCK_GATE_DISABLE);
8664
8665         /*
8666          * According to the spec the following bits should be set in
8667          * order to enable memory self-refresh
8668          * The bit 22/21 of 0x42004
8669          * The bit 5 of 0x42020
8670          * The bit 15 of 0x45000
8671          */
8672         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8673                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
8674                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
8675         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
8676         I915_WRITE(DISP_ARB_CTL,
8677                    (I915_READ(DISP_ARB_CTL) |
8678                     DISP_FBC_WM_DIS));
8679
8680         /*
8681          * Based on the document from hardware guys the following bits
8682          * should be set unconditionally in order to enable FBC.
8683          * The bit 22 of 0x42000
8684          * The bit 22 of 0x42004
8685          * The bit 7,8,9 of 0x42020.
8686          */
8687         if (IS_IRONLAKE_M(dev_priv)) {
8688                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
8689                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8690                            I915_READ(ILK_DISPLAY_CHICKEN1) |
8691                            ILK_FBCQ_DIS);
8692                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8693                            I915_READ(ILK_DISPLAY_CHICKEN2) |
8694                            ILK_DPARB_GATE);
8695         }
8696
8697         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8698
8699         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8700                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8701                    ILK_ELPIN_409_SELECT);
8702         I915_WRITE(_3D_CHICKEN2,
8703                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
8704                    _3D_CHICKEN2_WM_READ_PIPELINED);
8705
8706         /* WaDisableRenderCachePipelinedFlush:ilk */
8707         I915_WRITE(CACHE_MODE_0,
8708                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
8709
8710         /* WaDisable_RenderCache_OperationalFlush:ilk */
8711         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8712
8713         g4x_disable_trickle_feed(dev_priv);
8714
8715         ibx_init_clock_gating(dev_priv);
8716 }
8717
8718 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
8719 {
8720         int pipe;
8721         u32 val;
8722
8723         /*
8724          * On Ibex Peak and Cougar Point, we need to disable clock
8725          * gating for the panel power sequencer or it will fail to
8726          * start up when no ports are active.
8727          */
8728         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
8729                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
8730                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
8731         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
8732                    DPLS_EDP_PPS_FIX_DIS);
8733         /* The below fixes the weird display corruption, a few pixels shifted
8734          * downward, on (only) LVDS of some HP laptops with IVY.
8735          */
8736         for_each_pipe(dev_priv, pipe) {
8737                 val = I915_READ(TRANS_CHICKEN2(pipe));
8738                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
8739                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8740                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
8741                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8742                 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
8743                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
8744                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
8745                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
8746         }
8747         /* WADP0ClockGatingDisable */
8748         for_each_pipe(dev_priv, pipe) {
8749                 I915_WRITE(TRANS_CHICKEN1(pipe),
8750                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8751         }
8752 }
8753
8754 static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
8755 {
8756         u32 tmp;
8757
8758         tmp = I915_READ(MCH_SSKPD);
8759         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
8760                 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
8761                               tmp);
8762 }
8763
8764 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
8765 {
8766         u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8767
8768         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8769
8770         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8771                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8772                    ILK_ELPIN_409_SELECT);
8773
8774         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
8775         I915_WRITE(_3D_CHICKEN,
8776                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
8777
8778         /* WaDisable_RenderCache_OperationalFlush:snb */
8779         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8780
8781         /*
8782          * BSpec recoomends 8x4 when MSAA is used,
8783          * however in practice 16x4 seems fastest.
8784          *
8785          * Note that PS/WM thread counts depend on the WIZ hashing
8786          * disable bit, which we don't touch here, but it's good
8787          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8788          */
8789         I915_WRITE(GEN6_GT_MODE,
8790                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8791
8792         I915_WRITE(CACHE_MODE_0,
8793                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
8794
8795         I915_WRITE(GEN6_UCGCTL1,
8796                    I915_READ(GEN6_UCGCTL1) |
8797                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
8798                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
8799
8800         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
8801          * gating disable must be set.  Failure to set it results in
8802          * flickering pixels due to Z write ordering failures after
8803          * some amount of runtime in the Mesa "fire" demo, and Unigine
8804          * Sanctuary and Tropics, and apparently anything else with
8805          * alpha test or pixel discard.
8806          *
8807          * According to the spec, bit 11 (RCCUNIT) must also be set,
8808          * but we didn't debug actual testcases to find it out.
8809          *
8810          * WaDisableRCCUnitClockGating:snb
8811          * WaDisableRCPBUnitClockGating:snb
8812          */
8813         I915_WRITE(GEN6_UCGCTL2,
8814                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
8815                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
8816
8817         /* WaStripsFansDisableFastClipPerformanceFix:snb */
8818         I915_WRITE(_3D_CHICKEN3,
8819                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
8820
8821         /*
8822          * Bspec says:
8823          * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
8824          * 3DSTATE_SF number of SF output attributes is more than 16."
8825          */
8826         I915_WRITE(_3D_CHICKEN3,
8827                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
8828
8829         /*
8830          * According to the spec the following bits should be
8831          * set in order to enable memory self-refresh and fbc:
8832          * The bit21 and bit22 of 0x42000
8833          * The bit21 and bit22 of 0x42004
8834          * The bit5 and bit7 of 0x42020
8835          * The bit14 of 0x70180
8836          * The bit14 of 0x71180
8837          *
8838          * WaFbcAsynchFlipDisableFbcQueue:snb
8839          */
8840         I915_WRITE(ILK_DISPLAY_CHICKEN1,
8841                    I915_READ(ILK_DISPLAY_CHICKEN1) |
8842                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
8843         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8844                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8845                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
8846         I915_WRITE(ILK_DSPCLK_GATE_D,
8847                    I915_READ(ILK_DSPCLK_GATE_D) |
8848                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
8849                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
8850
8851         g4x_disable_trickle_feed(dev_priv);
8852
8853         cpt_init_clock_gating(dev_priv);
8854
8855         gen6_check_mch_setup(dev_priv);
8856 }
8857
8858 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
8859 {
8860         u32 reg = I915_READ(GEN7_FF_THREAD_MODE);
8861
8862         /*
8863          * WaVSThreadDispatchOverride:ivb,vlv
8864          *
8865          * This actually overrides the dispatch
8866          * mode for all thread types.
8867          */
8868         reg &= ~GEN7_FF_SCHED_MASK;
8869         reg |= GEN7_FF_TS_SCHED_HW;
8870         reg |= GEN7_FF_VS_SCHED_HW;
8871         reg |= GEN7_FF_DS_SCHED_HW;
8872
8873         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
8874 }
8875
8876 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
8877 {
8878         /*
8879          * TODO: this bit should only be enabled when really needed, then
8880          * disabled when not needed anymore in order to save power.
8881          */
8882         if (HAS_PCH_LPT_LP(dev_priv))
8883                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
8884                            I915_READ(SOUTH_DSPCLK_GATE_D) |
8885                            PCH_LP_PARTITION_LEVEL_DISABLE);
8886
8887         /* WADPOClockGatingDisable:hsw */
8888         I915_WRITE(TRANS_CHICKEN1(PIPE_A),
8889                    I915_READ(TRANS_CHICKEN1(PIPE_A)) |
8890                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8891 }
8892
8893 static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
8894 {
8895         if (HAS_PCH_LPT_LP(dev_priv)) {
8896                 u32 val = I915_READ(SOUTH_DSPCLK_GATE_D);
8897
8898                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
8899                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
8900         }
8901 }
8902
8903 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
8904                                    int general_prio_credits,
8905                                    int high_prio_credits)
8906 {
8907         u32 misccpctl;
8908         u32 val;
8909
8910         /* WaTempDisableDOPClkGating:bdw */
8911         misccpctl = I915_READ(GEN7_MISCCPCTL);
8912         I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
8913
8914         val = I915_READ(GEN8_L3SQCREG1);
8915         val &= ~L3_PRIO_CREDITS_MASK;
8916         val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
8917         val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
8918         I915_WRITE(GEN8_L3SQCREG1, val);
8919
8920         /*
8921          * Wait at least 100 clocks before re-enabling clock gating.
8922          * See the definition of L3SQCREG1 in BSpec.
8923          */
8924         POSTING_READ(GEN8_L3SQCREG1);
8925         udelay(1);
8926         I915_WRITE(GEN7_MISCCPCTL, misccpctl);
8927 }
8928
8929 static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
8930 {
8931         /* This is not an Wa. Enable to reduce Sampler power */
8932         I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN,
8933                    I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE);
8934
8935         /* WaEnable32PlaneMode:icl */
8936         I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
8937                    _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE));
8938 }
8939
8940 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
8941 {
8942         if (!HAS_PCH_CNP(dev_priv))
8943                 return;
8944
8945         /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
8946         I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
8947                    CNP_PWM_CGE_GATING_DISABLE);
8948 }
8949
8950 static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
8951 {
8952         u32 val;
8953         cnp_init_clock_gating(dev_priv);
8954
8955         /* This is not an Wa. Enable for better image quality */
8956         I915_WRITE(_3D_CHICKEN3,
8957                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
8958
8959         /* WaEnableChickenDCPR:cnl */
8960         I915_WRITE(GEN8_CHICKEN_DCPR_1,
8961                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
8962
8963         /* WaFbcWakeMemOn:cnl */
8964         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
8965                    DISP_FBC_MEMORY_WAKE);
8966
8967         val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);
8968         /* ReadHitWriteOnlyDisable:cnl */
8969         val |= RCCUNIT_CLKGATE_DIS;
8970         /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
8971         if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
8972                 val |= SARBUNIT_CLKGATE_DIS;
8973         I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
8974
8975         /* Wa_2201832410:cnl */
8976         val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE);
8977         val |= GWUNIT_CLKGATE_DIS;
8978         I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val);
8979
8980         /* WaDisableVFclkgate:cnl */
8981         /* WaVFUnitClockGatingDisable:cnl */
8982         val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
8983         val |= VFUNIT_CLKGATE_DIS;
8984         I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
8985 }
8986
8987 static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
8988 {
8989         cnp_init_clock_gating(dev_priv);
8990         gen9_init_clock_gating(dev_priv);
8991
8992         /* WaFbcNukeOnHostModify:cfl */
8993         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8994                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8995 }
8996
8997 static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
8998 {
8999         gen9_init_clock_gating(dev_priv);
9000
9001         /* WaDisableSDEUnitClockGating:kbl */
9002         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
9003                 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9004                            GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9005
9006         /* WaDisableGamClockGating:kbl */
9007         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
9008                 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9009                            GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
9010
9011         /* WaFbcNukeOnHostModify:kbl */
9012         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9013                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9014 }
9015
9016 static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
9017 {
9018         gen9_init_clock_gating(dev_priv);
9019
9020         /* WAC6entrylatency:skl */
9021         I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
9022                    FBC_LLC_FULLY_OPEN);
9023
9024         /* WaFbcNukeOnHostModify:skl */
9025         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9026                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9027 }
9028
9029 static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
9030 {
9031         /* The GTT cache must be disabled if the system is using 2M pages. */
9032         bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
9033                                                  I915_GTT_PAGE_SIZE_2M);
9034         enum pipe pipe;
9035
9036         /* WaSwitchSolVfFArbitrationPriority:bdw */
9037         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
9038
9039         /* WaPsrDPAMaskVBlankInSRD:bdw */
9040         I915_WRITE(CHICKEN_PAR1_1,
9041                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
9042
9043         /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
9044         for_each_pipe(dev_priv, pipe) {
9045                 I915_WRITE(CHICKEN_PIPESL_1(pipe),
9046                            I915_READ(CHICKEN_PIPESL_1(pipe)) |
9047                            BDW_DPRS_MASK_VBLANK_SRD);
9048         }
9049
9050         /* WaVSRefCountFullforceMissDisable:bdw */
9051         /* WaDSRefCountFullforceMissDisable:bdw */
9052         I915_WRITE(GEN7_FF_THREAD_MODE,
9053                    I915_READ(GEN7_FF_THREAD_MODE) &
9054                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
9055
9056         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9057                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
9058
9059         /* WaDisableSDEUnitClockGating:bdw */
9060         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9061                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9062
9063         /* WaProgramL3SqcReg1Default:bdw */
9064         gen8_set_l3sqc_credits(dev_priv, 30, 2);
9065
9066         /* WaGttCachingOffByDefault:bdw */
9067         I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
9068
9069         /* WaKVMNotificationOnConfigChange:bdw */
9070         I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
9071                    | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
9072
9073         lpt_init_clock_gating(dev_priv);
9074
9075         /* WaDisableDopClockGating:bdw
9076          *
9077          * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
9078          * clock gating.
9079          */
9080         I915_WRITE(GEN6_UCGCTL1,
9081                    I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
9082 }
9083
9084 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
9085 {
9086         /* L3 caching of data atomics doesn't work -- disable it. */
9087         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
9088         I915_WRITE(HSW_ROW_CHICKEN3,
9089                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
9090
9091         /* This is required by WaCatErrorRejectionIssue:hsw */
9092         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9093                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9094                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9095
9096         /* WaVSRefCountFullforceMissDisable:hsw */
9097         I915_WRITE(GEN7_FF_THREAD_MODE,
9098                    I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
9099
9100         /* WaDisable_RenderCache_OperationalFlush:hsw */
9101         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9102
9103         /* enable HiZ Raw Stall Optimization */
9104         I915_WRITE(CACHE_MODE_0_GEN7,
9105                    _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9106
9107         /* WaDisable4x2SubspanOptimization:hsw */
9108         I915_WRITE(CACHE_MODE_1,
9109                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9110
9111         /*
9112          * BSpec recommends 8x4 when MSAA is used,
9113          * however in practice 16x4 seems fastest.
9114          *
9115          * Note that PS/WM thread counts depend on the WIZ hashing
9116          * disable bit, which we don't touch here, but it's good
9117          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9118          */
9119         I915_WRITE(GEN7_GT_MODE,
9120                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9121
9122         /* WaSampleCChickenBitEnable:hsw */
9123         I915_WRITE(HALF_SLICE_CHICKEN3,
9124                    _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
9125
9126         /* WaSwitchSolVfFArbitrationPriority:hsw */
9127         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
9128
9129         lpt_init_clock_gating(dev_priv);
9130 }
9131
9132 static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
9133 {
9134         u32 snpcr;
9135
9136         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
9137
9138         /* WaDisableEarlyCull:ivb */
9139         I915_WRITE(_3D_CHICKEN3,
9140                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9141
9142         /* WaDisableBackToBackFlipFix:ivb */
9143         I915_WRITE(IVB_CHICKEN3,
9144                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9145                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
9146
9147         /* WaDisablePSDDualDispatchEnable:ivb */
9148         if (IS_IVB_GT1(dev_priv))
9149                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9150                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
9151
9152         /* WaDisable_RenderCache_OperationalFlush:ivb */
9153         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9154
9155         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
9156         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
9157                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
9158
9159         /* WaApplyL3ControlAndL3ChickenMode:ivb */
9160         I915_WRITE(GEN7_L3CNTLREG1,
9161                         GEN7_WA_FOR_GEN7_L3_CONTROL);
9162         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
9163                    GEN7_WA_L3_CHICKEN_MODE);
9164         if (IS_IVB_GT1(dev_priv))
9165                 I915_WRITE(GEN7_ROW_CHICKEN2,
9166                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9167         else {
9168                 /* must write both registers */
9169                 I915_WRITE(GEN7_ROW_CHICKEN2,
9170                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9171                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
9172                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9173         }
9174
9175         /* WaForceL3Serialization:ivb */
9176         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9177                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9178
9179         /*
9180          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9181          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
9182          */
9183         I915_WRITE(GEN6_UCGCTL2,
9184                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9185
9186         /* This is required by WaCatErrorRejectionIssue:ivb */
9187         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9188                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9189                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9190
9191         g4x_disable_trickle_feed(dev_priv);
9192
9193         gen7_setup_fixed_func_scheduler(dev_priv);
9194
9195         if (0) { /* causes HiZ corruption on ivb:gt1 */
9196                 /* enable HiZ Raw Stall Optimization */
9197                 I915_WRITE(CACHE_MODE_0_GEN7,
9198                            _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9199         }
9200
9201         /* WaDisable4x2SubspanOptimization:ivb */
9202         I915_WRITE(CACHE_MODE_1,
9203                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9204
9205         /*
9206          * BSpec recommends 8x4 when MSAA is used,
9207          * however in practice 16x4 seems fastest.
9208          *
9209          * Note that PS/WM thread counts depend on the WIZ hashing
9210          * disable bit, which we don't touch here, but it's good
9211          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9212          */
9213         I915_WRITE(GEN7_GT_MODE,
9214                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9215
9216         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
9217         snpcr &= ~GEN6_MBC_SNPCR_MASK;
9218         snpcr |= GEN6_MBC_SNPCR_MED;
9219         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
9220
9221         if (!HAS_PCH_NOP(dev_priv))
9222                 cpt_init_clock_gating(dev_priv);
9223
9224         gen6_check_mch_setup(dev_priv);
9225 }
9226
9227 static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
9228 {
9229         /* WaDisableEarlyCull:vlv */
9230         I915_WRITE(_3D_CHICKEN3,
9231                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9232
9233         /* WaDisableBackToBackFlipFix:vlv */
9234         I915_WRITE(IVB_CHICKEN3,
9235                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9236                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
9237
9238         /* WaPsdDispatchEnable:vlv */
9239         /* WaDisablePSDDualDispatchEnable:vlv */
9240         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9241                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
9242                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
9243
9244         /* WaDisable_RenderCache_OperationalFlush:vlv */
9245         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9246
9247         /* WaForceL3Serialization:vlv */
9248         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9249                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9250
9251         /* WaDisableDopClockGating:vlv */
9252         I915_WRITE(GEN7_ROW_CHICKEN2,
9253                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9254
9255         /* This is required by WaCatErrorRejectionIssue:vlv */
9256         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9257                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9258                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9259
9260         gen7_setup_fixed_func_scheduler(dev_priv);
9261
9262         /*
9263          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9264          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
9265          */
9266         I915_WRITE(GEN6_UCGCTL2,
9267                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9268
9269         /* WaDisableL3Bank2xClockGate:vlv
9270          * Disabling L3 clock gating- MMIO 940c[25] = 1
9271          * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
9272         I915_WRITE(GEN7_UCGCTL4,
9273                    I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
9274
9275         /*
9276          * BSpec says this must be set, even though
9277          * WaDisable4x2SubspanOptimization isn't listed for VLV.
9278          */
9279         I915_WRITE(CACHE_MODE_1,
9280                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9281
9282         /*
9283          * BSpec recommends 8x4 when MSAA is used,
9284          * however in practice 16x4 seems fastest.
9285          *
9286          * Note that PS/WM thread counts depend on the WIZ hashing
9287          * disable bit, which we don't touch here, but it's good
9288          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9289          */
9290         I915_WRITE(GEN7_GT_MODE,
9291                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9292
9293         /*
9294          * WaIncreaseL3CreditsForVLVB0:vlv
9295          * This is the hardware default actually.
9296          */
9297         I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
9298
9299         /*
9300          * WaDisableVLVClockGating_VBIIssue:vlv
9301          * Disable clock gating on th GCFG unit to prevent a delay
9302          * in the reporting of vblank events.
9303          */
9304         I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
9305 }
9306
9307 static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
9308 {
9309         /* WaVSRefCountFullforceMissDisable:chv */
9310         /* WaDSRefCountFullforceMissDisable:chv */
9311         I915_WRITE(GEN7_FF_THREAD_MODE,
9312                    I915_READ(GEN7_FF_THREAD_MODE) &
9313                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
9314
9315         /* WaDisableSemaphoreAndSyncFlipWait:chv */
9316         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9317                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
9318
9319         /* WaDisableCSUnitClockGating:chv */
9320         I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9321                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
9322
9323         /* WaDisableSDEUnitClockGating:chv */
9324         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9325                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9326
9327         /*
9328          * WaProgramL3SqcReg1Default:chv
9329          * See gfxspecs/Related Documents/Performance Guide/
9330          * LSQC Setting Recommendations.
9331          */
9332         gen8_set_l3sqc_credits(dev_priv, 38, 2);
9333
9334         /*
9335          * GTT cache may not work with big pages, so if those
9336          * are ever enabled GTT cache may need to be disabled.
9337          */
9338         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
9339 }
9340
9341 static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
9342 {
9343         u32 dspclk_gate;
9344
9345         I915_WRITE(RENCLK_GATE_D1, 0);
9346         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
9347                    GS_UNIT_CLOCK_GATE_DISABLE |
9348                    CL_UNIT_CLOCK_GATE_DISABLE);
9349         I915_WRITE(RAMCLK_GATE_D, 0);
9350         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
9351                 OVRUNIT_CLOCK_GATE_DISABLE |
9352                 OVCUNIT_CLOCK_GATE_DISABLE;
9353         if (IS_GM45(dev_priv))
9354                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
9355         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
9356
9357         /* WaDisableRenderCachePipelinedFlush */
9358         I915_WRITE(CACHE_MODE_0,
9359                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
9360
9361         /* WaDisable_RenderCache_OperationalFlush:g4x */
9362         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9363
9364         g4x_disable_trickle_feed(dev_priv);
9365 }
9366
9367 static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
9368 {
9369         I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
9370         I915_WRITE(RENCLK_GATE_D2, 0);
9371         I915_WRITE(DSPCLK_GATE_D, 0);
9372         I915_WRITE(RAMCLK_GATE_D, 0);
9373         I915_WRITE16(DEUC, 0);
9374         I915_WRITE(MI_ARB_STATE,
9375                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9376
9377         /* WaDisable_RenderCache_OperationalFlush:gen4 */
9378         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9379 }
9380
9381 static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
9382 {
9383         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
9384                    I965_RCC_CLOCK_GATE_DISABLE |
9385                    I965_RCPB_CLOCK_GATE_DISABLE |
9386                    I965_ISC_CLOCK_GATE_DISABLE |
9387                    I965_FBC_CLOCK_GATE_DISABLE);
9388         I915_WRITE(RENCLK_GATE_D2, 0);
9389         I915_WRITE(MI_ARB_STATE,
9390                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9391
9392         /* WaDisable_RenderCache_OperationalFlush:gen4 */
9393         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9394 }
9395
9396 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
9397 {
9398         u32 dstate = I915_READ(D_STATE);
9399
9400         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
9401                 DSTATE_DOT_CLOCK_GATING;
9402         I915_WRITE(D_STATE, dstate);
9403
9404         if (IS_PINEVIEW(dev_priv))
9405                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
9406
9407         /* IIR "flip pending" means done if this bit is set */
9408         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
9409
9410         /* interrupts should cause a wake up from C3 */
9411         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
9412
9413         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
9414         I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
9415
9416         I915_WRITE(MI_ARB_STATE,
9417                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9418 }
9419
9420 static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
9421 {
9422         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
9423
9424         /* interrupts should cause a wake up from C3 */
9425         I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
9426                    _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
9427
9428         I915_WRITE(MEM_MODE,
9429                    _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
9430 }
9431
9432 static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
9433 {
9434         I915_WRITE(MEM_MODE,
9435                    _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
9436                    _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
9437 }
9438
9439 void intel_init_clock_gating(struct drm_i915_private *dev_priv)
9440 {
9441         dev_priv->display.init_clock_gating(dev_priv);
9442 }
9443
9444 void intel_suspend_hw(struct drm_i915_private *dev_priv)
9445 {
9446         if (HAS_PCH_LPT(dev_priv))
9447                 lpt_suspend_hw(dev_priv);
9448 }
9449
9450 static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
9451 {
9452         DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
9453 }
9454
9455 /**
9456  * intel_init_clock_gating_hooks - setup the clock gating hooks
9457  * @dev_priv: device private
9458  *
9459  * Setup the hooks that configure which clocks of a given platform can be
9460  * gated and also apply various GT and display specific workarounds for these
9461  * platforms. Note that some GT specific workarounds are applied separately
9462  * when GPU contexts or batchbuffers start their execution.
9463  */
9464 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
9465 {
9466         if (IS_ICELAKE(dev_priv))
9467                 dev_priv->display.init_clock_gating = icl_init_clock_gating;
9468         else if (IS_CANNONLAKE(dev_priv))
9469                 dev_priv->display.init_clock_gating = cnl_init_clock_gating;
9470         else if (IS_COFFEELAKE(dev_priv))
9471                 dev_priv->display.init_clock_gating = cfl_init_clock_gating;
9472         else if (IS_SKYLAKE(dev_priv))
9473                 dev_priv->display.init_clock_gating = skl_init_clock_gating;
9474         else if (IS_KABYLAKE(dev_priv))
9475                 dev_priv->display.init_clock_gating = kbl_init_clock_gating;
9476         else if (IS_BROXTON(dev_priv))
9477                 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
9478         else if (IS_GEMINILAKE(dev_priv))
9479                 dev_priv->display.init_clock_gating = glk_init_clock_gating;
9480         else if (IS_BROADWELL(dev_priv))
9481                 dev_priv->display.init_clock_gating = bdw_init_clock_gating;
9482         else if (IS_CHERRYVIEW(dev_priv))
9483                 dev_priv->display.init_clock_gating = chv_init_clock_gating;
9484         else if (IS_HASWELL(dev_priv))
9485                 dev_priv->display.init_clock_gating = hsw_init_clock_gating;
9486         else if (IS_IVYBRIDGE(dev_priv))
9487                 dev_priv->display.init_clock_gating = ivb_init_clock_gating;
9488         else if (IS_VALLEYVIEW(dev_priv))
9489                 dev_priv->display.init_clock_gating = vlv_init_clock_gating;
9490         else if (IS_GEN(dev_priv, 6))
9491                 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
9492         else if (IS_GEN(dev_priv, 5))
9493                 dev_priv->display.init_clock_gating = ilk_init_clock_gating;
9494         else if (IS_G4X(dev_priv))
9495                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
9496         else if (IS_I965GM(dev_priv))
9497                 dev_priv->display.init_clock_gating = i965gm_init_clock_gating;
9498         else if (IS_I965G(dev_priv))
9499                 dev_priv->display.init_clock_gating = i965g_init_clock_gating;
9500         else if (IS_GEN(dev_priv, 3))
9501                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
9502         else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
9503                 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
9504         else if (IS_GEN(dev_priv, 2))
9505                 dev_priv->display.init_clock_gating = i830_init_clock_gating;
9506         else {
9507                 MISSING_CASE(INTEL_DEVID(dev_priv));
9508                 dev_priv->display.init_clock_gating = nop_init_clock_gating;
9509         }
9510 }
9511
9512 /* Set up chip specific power management-related functions */
9513 void intel_init_pm(struct drm_i915_private *dev_priv)
9514 {
9515         /* For cxsr */
9516         if (IS_PINEVIEW(dev_priv))
9517                 i915_pineview_get_mem_freq(dev_priv);
9518         else if (IS_GEN(dev_priv, 5))
9519                 i915_ironlake_get_mem_freq(dev_priv);
9520
9521         /* For FIFO watermark updates */
9522         if (INTEL_GEN(dev_priv) >= 9) {
9523                 skl_setup_wm_latency(dev_priv);
9524                 dev_priv->display.initial_watermarks = skl_initial_wm;
9525                 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm;
9526                 dev_priv->display.compute_global_watermarks = skl_compute_wm;
9527         } else if (HAS_PCH_SPLIT(dev_priv)) {
9528                 ilk_setup_wm_latency(dev_priv);
9529
9530                 if ((IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[1] &&
9531                      dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
9532                     (!IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[0] &&
9533                      dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
9534                         dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
9535                         dev_priv->display.compute_intermediate_wm =
9536                                 ilk_compute_intermediate_wm;
9537                         dev_priv->display.initial_watermarks =
9538                                 ilk_initial_watermarks;
9539                         dev_priv->display.optimize_watermarks =
9540                                 ilk_optimize_watermarks;
9541                 } else {
9542                         DRM_DEBUG_KMS("Failed to read display plane latency. "
9543                                       "Disable CxSR\n");
9544                 }
9545         } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9546                 vlv_setup_wm_latency(dev_priv);
9547                 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
9548                 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
9549                 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
9550                 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
9551                 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
9552         } else if (IS_G4X(dev_priv)) {
9553                 g4x_setup_wm_latency(dev_priv);
9554                 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
9555                 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
9556                 dev_priv->display.initial_watermarks = g4x_initial_watermarks;
9557                 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
9558         } else if (IS_PINEVIEW(dev_priv)) {
9559                 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
9560                                             dev_priv->is_ddr3,
9561                                             dev_priv->fsb_freq,
9562                                             dev_priv->mem_freq)) {
9563                         DRM_INFO("failed to find known CxSR latency "
9564                                  "(found ddr%s fsb freq %d, mem freq %d), "
9565                                  "disabling CxSR\n",
9566                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
9567                                  dev_priv->fsb_freq, dev_priv->mem_freq);
9568                         /* Disable CxSR and never update its watermark again */
9569                         intel_set_memory_cxsr(dev_priv, false);
9570                         dev_priv->display.update_wm = NULL;
9571                 } else
9572                         dev_priv->display.update_wm = pineview_update_wm;
9573         } else if (IS_GEN(dev_priv, 4)) {
9574                 dev_priv->display.update_wm = i965_update_wm;
9575         } else if (IS_GEN(dev_priv, 3)) {
9576                 dev_priv->display.update_wm = i9xx_update_wm;
9577                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
9578         } else if (IS_GEN(dev_priv, 2)) {
9579                 if (INTEL_INFO(dev_priv)->num_pipes == 1) {
9580                         dev_priv->display.update_wm = i845_update_wm;
9581                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
9582                 } else {
9583                         dev_priv->display.update_wm = i9xx_update_wm;
9584                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
9585                 }
9586         } else {
9587                 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
9588         }
9589 }
9590
9591 static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
9592 {
9593         u32 flags =
9594                 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9595
9596         switch (flags) {
9597         case GEN6_PCODE_SUCCESS:
9598                 return 0;
9599         case GEN6_PCODE_UNIMPLEMENTED_CMD:
9600                 return -ENODEV;
9601         case GEN6_PCODE_ILLEGAL_CMD:
9602                 return -ENXIO;
9603         case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9604         case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9605                 return -EOVERFLOW;
9606         case GEN6_PCODE_TIMEOUT:
9607                 return -ETIMEDOUT;
9608         default:
9609                 MISSING_CASE(flags);
9610                 return 0;
9611         }
9612 }
9613
9614 static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
9615 {
9616         u32 flags =
9617                 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9618
9619         switch (flags) {
9620         case GEN6_PCODE_SUCCESS:
9621                 return 0;
9622         case GEN6_PCODE_ILLEGAL_CMD:
9623                 return -ENXIO;
9624         case GEN7_PCODE_TIMEOUT:
9625                 return -ETIMEDOUT;
9626         case GEN7_PCODE_ILLEGAL_DATA:
9627                 return -EINVAL;
9628         case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9629                 return -EOVERFLOW;
9630         default:
9631                 MISSING_CASE(flags);
9632                 return 0;
9633         }
9634 }
9635
9636 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
9637 {
9638         int status;
9639
9640         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9641
9642         /* GEN6_PCODE_* are outside of the forcewake domain, we can
9643          * use te fw I915_READ variants to reduce the amount of work
9644          * required when reading/writing.
9645          */
9646
9647         if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9648                 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
9649                                  mbox, __builtin_return_address(0));
9650                 return -EAGAIN;
9651         }
9652
9653         I915_WRITE_FW(GEN6_PCODE_DATA, *val);
9654         I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9655         I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9656
9657         if (__intel_wait_for_register_fw(dev_priv,
9658                                          GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9659                                          500, 0, NULL)) {
9660                 DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
9661                           mbox, __builtin_return_address(0));
9662                 return -ETIMEDOUT;
9663         }
9664
9665         *val = I915_READ_FW(GEN6_PCODE_DATA);
9666         I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9667
9668         if (INTEL_GEN(dev_priv) > 6)
9669                 status = gen7_check_mailbox_status(dev_priv);
9670         else
9671                 status = gen6_check_mailbox_status(dev_priv);
9672
9673         if (status) {
9674                 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
9675                                  mbox, __builtin_return_address(0), status);
9676                 return status;
9677         }
9678
9679         return 0;
9680 }
9681
9682 int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
9683                                     u32 mbox, u32 val,
9684                                     int fast_timeout_us, int slow_timeout_ms)
9685 {
9686         int status;
9687
9688         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9689
9690         /* GEN6_PCODE_* are outside of the forcewake domain, we can
9691          * use te fw I915_READ variants to reduce the amount of work
9692          * required when reading/writing.
9693          */
9694
9695         if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9696                 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
9697                                  val, mbox, __builtin_return_address(0));
9698                 return -EAGAIN;
9699         }
9700
9701         I915_WRITE_FW(GEN6_PCODE_DATA, val);
9702         I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9703         I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9704
9705         if (__intel_wait_for_register_fw(dev_priv,
9706                                          GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9707                                          fast_timeout_us, slow_timeout_ms,
9708                                          NULL)) {
9709                 DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
9710                           val, mbox, __builtin_return_address(0));
9711                 return -ETIMEDOUT;
9712         }
9713
9714         I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9715
9716         if (INTEL_GEN(dev_priv) > 6)
9717                 status = gen7_check_mailbox_status(dev_priv);
9718         else
9719                 status = gen6_check_mailbox_status(dev_priv);
9720
9721         if (status) {
9722                 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
9723                                  val, mbox, __builtin_return_address(0), status);
9724                 return status;
9725         }
9726
9727         return 0;
9728 }
9729
9730 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
9731                                   u32 request, u32 reply_mask, u32 reply,
9732                                   u32 *status)
9733 {
9734         u32 val = request;
9735
9736         *status = sandybridge_pcode_read(dev_priv, mbox, &val);
9737
9738         return *status || ((val & reply_mask) == reply);
9739 }
9740
9741 /**
9742  * skl_pcode_request - send PCODE request until acknowledgment
9743  * @dev_priv: device private
9744  * @mbox: PCODE mailbox ID the request is targeted for
9745  * @request: request ID
9746  * @reply_mask: mask used to check for request acknowledgment
9747  * @reply: value used to check for request acknowledgment
9748  * @timeout_base_ms: timeout for polling with preemption enabled
9749  *
9750  * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
9751  * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
9752  * The request is acknowledged once the PCODE reply dword equals @reply after
9753  * applying @reply_mask. Polling is first attempted with preemption enabled
9754  * for @timeout_base_ms and if this times out for another 50 ms with
9755  * preemption disabled.
9756  *
9757  * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
9758  * other error as reported by PCODE.
9759  */
9760 int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
9761                       u32 reply_mask, u32 reply, int timeout_base_ms)
9762 {
9763         u32 status;
9764         int ret;
9765
9766         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9767
9768 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
9769                                    &status)
9770
9771         /*
9772          * Prime the PCODE by doing a request first. Normally it guarantees
9773          * that a subsequent request, at most @timeout_base_ms later, succeeds.
9774          * _wait_for() doesn't guarantee when its passed condition is evaluated
9775          * first, so send the first request explicitly.
9776          */
9777         if (COND) {
9778                 ret = 0;
9779                 goto out;
9780         }
9781         ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
9782         if (!ret)
9783                 goto out;
9784
9785         /*
9786          * The above can time out if the number of requests was low (2 in the
9787          * worst case) _and_ PCODE was busy for some reason even after a
9788          * (queued) request and @timeout_base_ms delay. As a workaround retry
9789          * the poll with preemption disabled to maximize the number of
9790          * requests. Increase the timeout from @timeout_base_ms to 50ms to
9791          * account for interrupts that could reduce the number of these
9792          * requests, and for any quirks of the PCODE firmware that delays
9793          * the request completion.
9794          */
9795         DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
9796         WARN_ON_ONCE(timeout_base_ms > 3);
9797         preempt_disable();
9798         ret = wait_for_atomic(COND, 50);
9799         preempt_enable();
9800
9801 out:
9802         return ret ? ret : status;
9803 #undef COND
9804 }
9805
9806 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
9807 {
9808         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9809
9810         /*
9811          * N = val - 0xb7
9812          * Slow = Fast = GPLL ref * N
9813          */
9814         return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
9815 }
9816
9817 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
9818 {
9819         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9820
9821         return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
9822 }
9823
9824 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
9825 {
9826         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9827
9828         /*
9829          * N = val / 2
9830          * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
9831          */
9832         return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
9833 }
9834
9835 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
9836 {
9837         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9838
9839         /* CHV needs even values */
9840         return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
9841 }
9842
9843 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
9844 {
9845         if (INTEL_GEN(dev_priv) >= 9)
9846                 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
9847                                          GEN9_FREQ_SCALER);
9848         else if (IS_CHERRYVIEW(dev_priv))
9849                 return chv_gpu_freq(dev_priv, val);
9850         else if (IS_VALLEYVIEW(dev_priv))
9851                 return byt_gpu_freq(dev_priv, val);
9852         else
9853                 return val * GT_FREQUENCY_MULTIPLIER;
9854 }
9855
9856 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
9857 {
9858         if (INTEL_GEN(dev_priv) >= 9)
9859                 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
9860                                          GT_FREQUENCY_MULTIPLIER);
9861         else if (IS_CHERRYVIEW(dev_priv))
9862                 return chv_freq_opcode(dev_priv, val);
9863         else if (IS_VALLEYVIEW(dev_priv))
9864                 return byt_freq_opcode(dev_priv, val);
9865         else
9866                 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
9867 }
9868
9869 void intel_pm_setup(struct drm_i915_private *dev_priv)
9870 {
9871         mutex_init(&dev_priv->pcu_lock);
9872         mutex_init(&dev_priv->gt_pm.rps.power.mutex);
9873
9874         atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
9875
9876         dev_priv->runtime_pm.suspended = false;
9877         atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
9878 }
9879
9880 static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
9881                              const i915_reg_t reg)
9882 {
9883         u32 lower, upper, tmp;
9884         int loop = 2;
9885
9886         /*
9887          * The register accessed do not need forcewake. We borrow
9888          * uncore lock to prevent concurrent access to range reg.
9889          */
9890         lockdep_assert_held(&dev_priv->uncore.lock);
9891
9892         /*
9893          * vlv and chv residency counters are 40 bits in width.
9894          * With a control bit, we can choose between upper or lower
9895          * 32bit window into this counter.
9896          *
9897          * Although we always use the counter in high-range mode elsewhere,
9898          * userspace may attempt to read the value before rc6 is initialised,
9899          * before we have set the default VLV_COUNTER_CONTROL value. So always
9900          * set the high bit to be safe.
9901          */
9902         I915_WRITE_FW(VLV_COUNTER_CONTROL,
9903                       _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9904         upper = I915_READ_FW(reg);
9905         do {
9906                 tmp = upper;
9907
9908                 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9909                               _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
9910                 lower = I915_READ_FW(reg);
9911
9912                 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9913                               _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9914                 upper = I915_READ_FW(reg);
9915         } while (upper != tmp && --loop);
9916
9917         /*
9918          * Everywhere else we always use VLV_COUNTER_CONTROL with the
9919          * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
9920          * now.
9921          */
9922
9923         return lower | (u64)upper << 8;
9924 }
9925
9926 u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
9927                            const i915_reg_t reg)
9928 {
9929         u64 time_hw, prev_hw, overflow_hw;
9930         unsigned int fw_domains;
9931         unsigned long flags;
9932         unsigned int i;
9933         u32 mul, div;
9934
9935         if (!HAS_RC6(dev_priv))
9936                 return 0;
9937
9938         /*
9939          * Store previous hw counter values for counter wrap-around handling.
9940          *
9941          * There are only four interesting registers and they live next to each
9942          * other so we can use the relative address, compared to the smallest
9943          * one as the index into driver storage.
9944          */
9945         i = (i915_mmio_reg_offset(reg) -
9946              i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
9947         if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency)))
9948                 return 0;
9949
9950         fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
9951
9952         spin_lock_irqsave(&dev_priv->uncore.lock, flags);
9953         intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
9954
9955         /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
9956         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9957                 mul = 1000000;
9958                 div = dev_priv->czclk_freq;
9959                 overflow_hw = BIT_ULL(40);
9960                 time_hw = vlv_residency_raw(dev_priv, reg);
9961         } else {
9962                 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
9963                 if (IS_GEN9_LP(dev_priv)) {
9964                         mul = 10000;
9965                         div = 12;
9966                 } else {
9967                         mul = 1280;
9968                         div = 1;
9969                 }
9970
9971                 overflow_hw = BIT_ULL(32);
9972                 time_hw = I915_READ_FW(reg);
9973         }
9974
9975         /*
9976          * Counter wrap handling.
9977          *
9978          * But relying on a sufficient frequency of queries otherwise counters
9979          * can still wrap.
9980          */
9981         prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i];
9982         dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw;
9983
9984         /* RC6 delta from last sample. */
9985         if (time_hw >= prev_hw)
9986                 time_hw -= prev_hw;
9987         else
9988                 time_hw += overflow_hw - prev_hw;
9989
9990         /* Add delta to RC6 extended raw driver copy. */
9991         time_hw += dev_priv->gt_pm.rc6.cur_residency[i];
9992         dev_priv->gt_pm.rc6.cur_residency[i] = time_hw;
9993
9994         intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
9995         spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
9996
9997         return mul_u64_u32_div(time_hw, mul, div);
9998 }
9999
10000 u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
10001 {
10002         u32 cagf;
10003
10004         if (INTEL_GEN(dev_priv) >= 9)
10005                 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
10006         else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
10007                 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
10008         else
10009                 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
10010
10011         return  cagf;
10012 }