drm/amd/display: reduce stack for dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeS...
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / display / dc / dml / dcn32 / display_mode_vba_util_32.c
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29
30 unsigned int dml32_dscceComputeDelay(
31                 unsigned int bpc,
32                 double BPP,
33                 unsigned int sliceWidth,
34                 unsigned int numSlices,
35                 enum output_format_class pixelFormat,
36                 enum output_encoder_class Output)
37 {
38         // valid bpc         = source bits per component in the set of {8, 10, 12}
39         // valid bpp         = increments of 1/16 of a bit
40         //                    min = 6/7/8 in N420/N422/444, respectively
41         //                    max = such that compression is 1:1
42         //valid sliceWidth  = number of pixels per slice line,
43         //      must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
44         //valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
45         //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
46
47         // fixed value
48         unsigned int rcModelSize = 8192;
49
50         // N422/N420 operate at 2 pixels per clock
51         unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
52         Delay, pixels;
53
54         if (pixelFormat == dm_420)
55                 pixelsPerClock = 2;
56         else if (pixelFormat == dm_n422)
57                 pixelsPerClock = 2;
58         // #all other modes operate at 1 pixel per clock
59         else
60                 pixelsPerClock = 1;
61
62         //initial transmit delay as per PPS
63         initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
64
65         //compute ssm delay
66         if (bpc == 8)
67                 D = 81;
68         else if (bpc == 10)
69                 D = 89;
70         else
71                 D = 113;
72
73         //divide by pixel per cycle to compute slice width as seen by DSC
74         w = sliceWidth / pixelsPerClock;
75
76         //422 mode has an additional cycle of delay
77         if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
78                 s = 0;
79         else
80                 s = 1;
81
82         //main calculation for the dscce
83         ix = initalXmitDelay + 45;
84         wx = (w + 2) / 3;
85         p = 3 * wx - w;
86         l0 = ix / w;
87         a = ix + p * l0;
88         ax = (a + 2) / 3 + D + 6 + 1;
89         L = (ax + wx - 1) / wx;
90         if ((ix % w) == 0 && p != 0)
91                 lstall = 1;
92         else
93                 lstall = 0;
94         Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
95
96         //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
97         pixels = Delay * 3 * pixelsPerClock;
98
99 #ifdef __DML_VBA_DEBUG__
100         dml_print("DML::%s: bpc: %d\n", __func__, bpc);
101         dml_print("DML::%s: BPP: %f\n", __func__, BPP);
102         dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
103         dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
104         dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
105         dml_print("DML::%s: Output: %d\n", __func__, Output);
106         dml_print("DML::%s: pixels: %d\n", __func__, pixels);
107 #endif
108
109         return pixels;
110 }
111
112 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
113 {
114         unsigned int Delay = 0;
115
116         if (pixelFormat == dm_420) {
117                 //   sfr
118                 Delay = Delay + 2;
119                 //   dsccif
120                 Delay = Delay + 0;
121                 //   dscc - input deserializer
122                 Delay = Delay + 3;
123                 //   dscc gets pixels every other cycle
124                 Delay = Delay + 2;
125                 //   dscc - input cdc fifo
126                 Delay = Delay + 12;
127                 //   dscc gets pixels every other cycle
128                 Delay = Delay + 13;
129                 //   dscc - cdc uncertainty
130                 Delay = Delay + 2;
131                 //   dscc - output cdc fifo
132                 Delay = Delay + 7;
133                 //   dscc gets pixels every other cycle
134                 Delay = Delay + 3;
135                 //   dscc - cdc uncertainty
136                 Delay = Delay + 2;
137                 //   dscc - output serializer
138                 Delay = Delay + 1;
139                 //   sft
140                 Delay = Delay + 1;
141         } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
142                 //   sfr
143                 Delay = Delay + 2;
144                 //   dsccif
145                 Delay = Delay + 1;
146                 //   dscc - input deserializer
147                 Delay = Delay + 5;
148                 //  dscc - input cdc fifo
149                 Delay = Delay + 25;
150                 //   dscc - cdc uncertainty
151                 Delay = Delay + 2;
152                 //   dscc - output cdc fifo
153                 Delay = Delay + 10;
154                 //   dscc - cdc uncertainty
155                 Delay = Delay + 2;
156                 //   dscc - output serializer
157                 Delay = Delay + 1;
158                 //   sft
159                 Delay = Delay + 1;
160         } else {
161                 //   sfr
162                 Delay = Delay + 2;
163                 //   dsccif
164                 Delay = Delay + 0;
165                 //   dscc - input deserializer
166                 Delay = Delay + 3;
167                 //   dscc - input cdc fifo
168                 Delay = Delay + 12;
169                 //   dscc - cdc uncertainty
170                 Delay = Delay + 2;
171                 //   dscc - output cdc fifo
172                 Delay = Delay + 7;
173                 //   dscc - output serializer
174                 Delay = Delay + 1;
175                 //   dscc - cdc uncertainty
176                 Delay = Delay + 2;
177                 //   sft
178                 Delay = Delay + 1;
179         }
180
181         return Delay;
182 }
183
184
185 bool IsVertical(enum dm_rotation_angle Scan)
186 {
187         bool is_vert = false;
188
189         if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
190                 is_vert = true;
191         else
192                 is_vert = false;
193         return is_vert;
194 }
195
196 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
197                 double HRatio,
198                 double HRatioChroma,
199                 double VRatio,
200                 double VRatioChroma,
201                 double MaxDCHUBToPSCLThroughput,
202                 double MaxPSCLToLBThroughput,
203                 double PixelClock,
204                 enum source_format_class SourcePixelFormat,
205                 unsigned int HTaps,
206                 unsigned int HTapsChroma,
207                 unsigned int VTaps,
208                 unsigned int VTapsChroma,
209
210                 /* output */
211                 double *PSCL_THROUGHPUT,
212                 double *PSCL_THROUGHPUT_CHROMA,
213                 double *DPPCLKUsingSingleDPP)
214 {
215         double DPPCLKUsingSingleDPPLuma;
216         double DPPCLKUsingSingleDPPChroma;
217
218         if (HRatio > 1) {
219                 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
220                                 dml_ceil((double) HTaps / 6.0, 1.0));
221         } else {
222                 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
223         }
224
225         DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
226                         *PSCL_THROUGHPUT, 1);
227
228         if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
229                 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
230
231         if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
232                         SourcePixelFormat != dm_rgbe_alpha)) {
233                 *PSCL_THROUGHPUT_CHROMA = 0;
234                 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
235         } else {
236                 if (HRatioChroma > 1) {
237                         *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
238                                         HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
239                 } else {
240                         *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
241                 }
242                 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
243                                 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
244                 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
245                         DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
246                 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
247         }
248 }
249
250 void dml32_CalculateBytePerPixelAndBlockSizes(
251                 enum source_format_class SourcePixelFormat,
252                 enum dm_swizzle_mode SurfaceTiling,
253
254                 /* Output */
255                 unsigned int *BytePerPixelY,
256                 unsigned int *BytePerPixelC,
257                 double  *BytePerPixelDETY,
258                 double  *BytePerPixelDETC,
259                 unsigned int *BlockHeight256BytesY,
260                 unsigned int *BlockHeight256BytesC,
261                 unsigned int *BlockWidth256BytesY,
262                 unsigned int *BlockWidth256BytesC,
263                 unsigned int *MacroTileHeightY,
264                 unsigned int *MacroTileHeightC,
265                 unsigned int *MacroTileWidthY,
266                 unsigned int *MacroTileWidthC)
267 {
268         if (SourcePixelFormat == dm_444_64) {
269                 *BytePerPixelDETY = 8;
270                 *BytePerPixelDETC = 0;
271                 *BytePerPixelY = 8;
272                 *BytePerPixelC = 0;
273         } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
274                 *BytePerPixelDETY = 4;
275                 *BytePerPixelDETC = 0;
276                 *BytePerPixelY = 4;
277                 *BytePerPixelC = 0;
278         } else if (SourcePixelFormat == dm_444_16) {
279                 *BytePerPixelDETY = 2;
280                 *BytePerPixelDETC = 0;
281                 *BytePerPixelY = 2;
282                 *BytePerPixelC = 0;
283         } else if (SourcePixelFormat == dm_444_8) {
284                 *BytePerPixelDETY = 1;
285                 *BytePerPixelDETC = 0;
286                 *BytePerPixelY = 1;
287                 *BytePerPixelC = 0;
288         } else if (SourcePixelFormat == dm_rgbe_alpha) {
289                 *BytePerPixelDETY = 4;
290                 *BytePerPixelDETC = 1;
291                 *BytePerPixelY = 4;
292                 *BytePerPixelC = 1;
293         } else if (SourcePixelFormat == dm_420_8) {
294                 *BytePerPixelDETY = 1;
295                 *BytePerPixelDETC = 2;
296                 *BytePerPixelY = 1;
297                 *BytePerPixelC = 2;
298         } else if (SourcePixelFormat == dm_420_12) {
299                 *BytePerPixelDETY = 2;
300                 *BytePerPixelDETC = 4;
301                 *BytePerPixelY = 2;
302                 *BytePerPixelC = 4;
303         } else {
304                 *BytePerPixelDETY = 4.0 / 3;
305                 *BytePerPixelDETC = 8.0 / 3;
306                 *BytePerPixelY = 2;
307                 *BytePerPixelC = 4;
308         }
309 #ifdef __DML_VBA_DEBUG__
310         dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
311         dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
312         dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
313         dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
314         dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
315 #endif
316         if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
317                         || SourcePixelFormat == dm_444_16
318                         || SourcePixelFormat == dm_444_8
319                         || SourcePixelFormat == dm_mono_16
320                         || SourcePixelFormat == dm_mono_8
321                         || SourcePixelFormat == dm_rgbe)) {
322                 if (SurfaceTiling == dm_sw_linear)
323                         *BlockHeight256BytesY = 1;
324                 else if (SourcePixelFormat == dm_444_64)
325                         *BlockHeight256BytesY = 4;
326                 else if (SourcePixelFormat == dm_444_8)
327                         *BlockHeight256BytesY = 16;
328                 else
329                         *BlockHeight256BytesY = 8;
330
331                 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
332                 *BlockHeight256BytesC = 0;
333                 *BlockWidth256BytesC = 0;
334         } else {
335                 if (SurfaceTiling == dm_sw_linear) {
336                         *BlockHeight256BytesY = 1;
337                         *BlockHeight256BytesC = 1;
338                 } else if (SourcePixelFormat == dm_rgbe_alpha) {
339                         *BlockHeight256BytesY = 8;
340                         *BlockHeight256BytesC = 16;
341                 } else if (SourcePixelFormat == dm_420_8) {
342                         *BlockHeight256BytesY = 16;
343                         *BlockHeight256BytesC = 8;
344                 } else {
345                         *BlockHeight256BytesY = 8;
346                         *BlockHeight256BytesC = 8;
347                 }
348                 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
349                 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
350         }
351 #ifdef __DML_VBA_DEBUG__
352         dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
353         dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
354         dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
355         dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
356 #endif
357
358         if (SurfaceTiling == dm_sw_linear) {
359                 *MacroTileHeightY = *BlockHeight256BytesY;
360                 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
361                 *MacroTileHeightC = *BlockHeight256BytesC;
362                 if (*MacroTileHeightC == 0)
363                         *MacroTileWidthC = 0;
364                 else
365                         *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
366         } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
367                         SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
368                 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
369                 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
370                 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
371                 if (*MacroTileHeightC == 0)
372                         *MacroTileWidthC = 0;
373                 else
374                         *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
375         } else {
376                 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
377                 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
378                 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
379                 if (*MacroTileHeightC == 0)
380                         *MacroTileWidthC = 0;
381                 else
382                         *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
383         }
384
385 #ifdef __DML_VBA_DEBUG__
386         dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
387         dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
388         dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
389         dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
390 #endif
391 } // CalculateBytePerPixelAndBlockSizes
392
393 void dml32_CalculateSwathAndDETConfiguration(
394                 struct dml32_CalculateSwathAndDETConfiguration *st_vars,
395                 unsigned int DETSizeOverride[],
396                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
397                 unsigned int ConfigReturnBufferSizeInKByte,
398                 unsigned int MaxTotalDETInKByte,
399                 unsigned int MinCompressedBufferSizeInKByte,
400                 double ForceSingleDPP,
401                 unsigned int NumberOfActiveSurfaces,
402                 unsigned int nomDETInKByte,
403                 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
404                 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
405                 unsigned int PixelChunkSizeKBytes,
406                 unsigned int ROBSizeKBytes,
407                 unsigned int CompressedBufferSegmentSizeInkByteFinal,
408                 enum output_encoder_class Output[],
409                 double ReadBandwidthLuma[],
410                 double ReadBandwidthChroma[],
411                 double MaximumSwathWidthLuma[],
412                 double MaximumSwathWidthChroma[],
413                 enum dm_rotation_angle SourceRotation[],
414                 bool ViewportStationary[],
415                 enum source_format_class SourcePixelFormat[],
416                 enum dm_swizzle_mode SurfaceTiling[],
417                 unsigned int ViewportWidth[],
418                 unsigned int ViewportHeight[],
419                 unsigned int ViewportXStart[],
420                 unsigned int ViewportYStart[],
421                 unsigned int ViewportXStartC[],
422                 unsigned int ViewportYStartC[],
423                 unsigned int SurfaceWidthY[],
424                 unsigned int SurfaceWidthC[],
425                 unsigned int SurfaceHeightY[],
426                 unsigned int SurfaceHeightC[],
427                 unsigned int Read256BytesBlockHeightY[],
428                 unsigned int Read256BytesBlockHeightC[],
429                 unsigned int Read256BytesBlockWidthY[],
430                 unsigned int Read256BytesBlockWidthC[],
431                 enum odm_combine_mode ODMMode[],
432                 unsigned int BlendingAndTiming[],
433                 unsigned int BytePerPixY[],
434                 unsigned int BytePerPixC[],
435                 double BytePerPixDETY[],
436                 double BytePerPixDETC[],
437                 unsigned int HActive[],
438                 double HRatio[],
439                 double HRatioChroma[],
440                 unsigned int DPPPerSurface[],
441
442                 /* Output */
443                 unsigned int swath_width_luma_ub[],
444                 unsigned int swath_width_chroma_ub[],
445                 double SwathWidth[],
446                 double SwathWidthChroma[],
447                 unsigned int SwathHeightY[],
448                 unsigned int SwathHeightC[],
449                 unsigned int DETBufferSizeInKByte[],
450                 unsigned int DETBufferSizeY[],
451                 unsigned int DETBufferSizeC[],
452                 bool *UnboundedRequestEnabled,
453                 unsigned int *CompressedBufferSizeInkByte,
454                 unsigned int *CompBufReservedSpaceKBytes,
455                 bool *CompBufReservedSpaceNeedAdjustment,
456                 bool ViewportSizeSupportPerSurface[],
457                 bool *ViewportSizeSupport)
458 {
459         unsigned int k;
460
461         st_vars->TotalActiveDPP = 0;
462         st_vars->NoChromaSurfaces = true;
463
464 #ifdef __DML_VBA_DEBUG__
465         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
466         dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
467         dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
468 #endif
469         dml32_CalculateSwathWidth(ForceSingleDPP,
470                         NumberOfActiveSurfaces,
471                         SourcePixelFormat,
472                         SourceRotation,
473                         ViewportStationary,
474                         ViewportWidth,
475                         ViewportHeight,
476                         ViewportXStart,
477                         ViewportYStart,
478                         ViewportXStartC,
479                         ViewportYStartC,
480                         SurfaceWidthY,
481                         SurfaceWidthC,
482                         SurfaceHeightY,
483                         SurfaceHeightC,
484                         ODMMode,
485                         BytePerPixY,
486                         BytePerPixC,
487                         Read256BytesBlockHeightY,
488                         Read256BytesBlockHeightC,
489                         Read256BytesBlockWidthY,
490                         Read256BytesBlockWidthC,
491                         BlendingAndTiming,
492                         HActive,
493                         HRatio,
494                         DPPPerSurface,
495
496                         /* Output */
497                         st_vars->SwathWidthdoubleDPP,
498                         st_vars->SwathWidthdoubleDPPChroma,
499                         SwathWidth,
500                         SwathWidthChroma,
501                         st_vars->MaximumSwathHeightY,
502                         st_vars->MaximumSwathHeightC,
503                         swath_width_luma_ub,
504                         swath_width_chroma_ub);
505
506         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
507                 st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k];
508                 st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k];
509 #ifdef __DML_VBA_DEBUG__
510                 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
511                 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
512                 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
513                 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]);
514                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
515                                 st_vars->RoundedUpMaxSwathSizeBytesY[k]);
516                 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
517                 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
518                 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]);
519                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
520                                 st_vars->RoundedUpMaxSwathSizeBytesC[k]);
521 #endif
522
523                 if (SourcePixelFormat[k] == dm_420_10) {
524                         st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256);
525                         st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256);
526                 }
527         }
528
529         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
530                 st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
531                 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
532                                 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
533                         st_vars->NoChromaSurfaces = false;
534                 }
535         }
536
537         // By default, just set the reserved space to 2 pixel chunks size
538         *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
539
540         // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
541         // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
542         // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
543         *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512);
544
545         if (*CompBufReservedSpaceNeedAdjustment == 1) {
546                 *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512;
547         }
548
549         #ifdef __DML_VBA_DEBUG__
550                 dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
551                 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
552         #endif
553
554         *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
555
556         dml32_CalculateDETBufferSize(DETSizeOverride,
557                         UseMALLForPStateChange,
558                         ForceSingleDPP,
559                         NumberOfActiveSurfaces,
560                         *UnboundedRequestEnabled,
561                         nomDETInKByte,
562                         MaxTotalDETInKByte,
563                         ConfigReturnBufferSizeInKByte,
564                         MinCompressedBufferSizeInKByte,
565                         CompressedBufferSegmentSizeInkByteFinal,
566                         SourcePixelFormat,
567                         ReadBandwidthLuma,
568                         ReadBandwidthChroma,
569                         st_vars->RoundedUpMaxSwathSizeBytesY,
570                         st_vars->RoundedUpMaxSwathSizeBytesC,
571                         DPPPerSurface,
572
573                         /* Output */
574                         DETBufferSizeInKByte,    // per hubp pipe
575                         CompressedBufferSizeInkByte);
576
577 #ifdef __DML_VBA_DEBUG__
578         dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP);
579         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
580         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
581         dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
582         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
583         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
584 #endif
585
586         *ViewportSizeSupport = true;
587         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
588
589                 st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
590                                 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
591 #ifdef __DML_VBA_DEBUG__
592                 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
593                                 st_vars->DETBufferSizeInKByteForSwathCalculation);
594 #endif
595
596                 if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
597                                 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
598                         SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
599                         SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
600                         st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
601                         st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
602                 } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
603                                 st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
604                                 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
605                         SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
606                         SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
607                         st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
608                         st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
609                 } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
610                                 st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <=
611                                 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
612                         SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
613                         SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
614                         st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
615                         st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
616                 } else {
617                         SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
618                         SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
619                         st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
620                         st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
621                 }
622
623                 if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 >
624                                 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
625                                 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
626                                                 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
627                         *ViewportSizeSupport = false;
628                         ViewportSizeSupportPerSurface[k] = false;
629                 } else {
630                         ViewportSizeSupportPerSurface[k] = true;
631                 }
632
633                 if (SwathHeightC[k] == 0) {
634 #ifdef __DML_VBA_DEBUG__
635                         dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
636 #endif
637                         DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
638                         DETBufferSizeC[k] = 0;
639                 } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) {
640 #ifdef __DML_VBA_DEBUG__
641                         dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
642 #endif
643                         DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
644                         DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
645                 } else {
646 #ifdef __DML_VBA_DEBUG__
647                         dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
648 #endif
649                         DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
650                         DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
651                 }
652
653 #ifdef __DML_VBA_DEBUG__
654                 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
655                 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
656                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
657                                 k, st_vars->RoundedUpMaxSwathSizeBytesY[k]);
658                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
659                                 k, st_vars->RoundedUpMaxSwathSizeBytesC[k]);
660                 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY);
661                 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC);
662                 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
663                 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
664                 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
665                 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
666                                 ViewportSizeSupportPerSurface[k]);
667 #endif
668
669         }
670 } // CalculateSwathAndDETConfiguration
671
672 void dml32_CalculateSwathWidth(
673                 bool                            ForceSingleDPP,
674                 unsigned int                    NumberOfActiveSurfaces,
675                 enum source_format_class        SourcePixelFormat[],
676                 enum dm_rotation_angle          SourceRotation[],
677                 bool                            ViewportStationary[],
678                 unsigned int                    ViewportWidth[],
679                 unsigned int                    ViewportHeight[],
680                 unsigned int                    ViewportXStart[],
681                 unsigned int                    ViewportYStart[],
682                 unsigned int                    ViewportXStartC[],
683                 unsigned int                    ViewportYStartC[],
684                 unsigned int                    SurfaceWidthY[],
685                 unsigned int                    SurfaceWidthC[],
686                 unsigned int                    SurfaceHeightY[],
687                 unsigned int                    SurfaceHeightC[],
688                 enum odm_combine_mode           ODMMode[],
689                 unsigned int                    BytePerPixY[],
690                 unsigned int                    BytePerPixC[],
691                 unsigned int                    Read256BytesBlockHeightY[],
692                 unsigned int                    Read256BytesBlockHeightC[],
693                 unsigned int                    Read256BytesBlockWidthY[],
694                 unsigned int                    Read256BytesBlockWidthC[],
695                 unsigned int                    BlendingAndTiming[],
696                 unsigned int                    HActive[],
697                 double                          HRatio[],
698                 unsigned int                    DPPPerSurface[],
699
700                 /* Output */
701                 double                          SwathWidthdoubleDPPY[],
702                 double                          SwathWidthdoubleDPPC[],
703                 double                          SwathWidthY[], // per-pipe
704                 double                          SwathWidthC[], // per-pipe
705                 unsigned int                    MaximumSwathHeightY[],
706                 unsigned int                    MaximumSwathHeightC[],
707                 unsigned int                    swath_width_luma_ub[], // per-pipe
708                 unsigned int                    swath_width_chroma_ub[]) // per-pipe
709 {
710         unsigned int k, j;
711         enum odm_combine_mode MainSurfaceODMMode;
712
713     unsigned int surface_width_ub_l;
714     unsigned int surface_height_ub_l;
715     unsigned int surface_width_ub_c;
716     unsigned int surface_height_ub_c;
717
718 #ifdef __DML_VBA_DEBUG__
719         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
720         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
721 #endif
722
723         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
724                 if (!IsVertical(SourceRotation[k]))
725                         SwathWidthdoubleDPPY[k] = ViewportWidth[k];
726                 else
727                         SwathWidthdoubleDPPY[k] = ViewportHeight[k];
728
729 #ifdef __DML_VBA_DEBUG__
730                 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
731                 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
732 #endif
733
734                 MainSurfaceODMMode = ODMMode[k];
735                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
736                         if (BlendingAndTiming[k] == j)
737                                 MainSurfaceODMMode = ODMMode[j];
738                 }
739
740                 if (ForceSingleDPP) {
741                         SwathWidthY[k] = SwathWidthdoubleDPPY[k];
742                 } else {
743                         if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
744                                 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
745                                                 dml_round(HActive[k] / 4.0 * HRatio[k]));
746                         } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
747                                 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
748                                                 dml_round(HActive[k] / 2.0 * HRatio[k]));
749                         } else if (DPPPerSurface[k] == 2) {
750                                 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
751                         } else {
752                                 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
753                         }
754                 }
755
756 #ifdef __DML_VBA_DEBUG__
757                 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
758                 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
759                 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
760                 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
761                 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
762 #endif
763
764                 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
765                                 SourcePixelFormat[k] == dm_420_12) {
766                         SwathWidthC[k] = SwathWidthY[k] / 2;
767                         SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
768                 } else {
769                         SwathWidthC[k] = SwathWidthY[k];
770                         SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
771                 }
772
773                 if (ForceSingleDPP == true) {
774                         SwathWidthY[k] = SwathWidthdoubleDPPY[k];
775                         SwathWidthC[k] = SwathWidthdoubleDPPC[k];
776                 }
777
778                 surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
779                 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
780                 surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
781                 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
782
783 #ifdef __DML_VBA_DEBUG__
784                 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
785                 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
786                 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
787                 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
788                 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
789                 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
790                 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
791                 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
792                 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
793                 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
794 #endif
795
796                 if (!IsVertical(SourceRotation[k])) {
797                         MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
798                         MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
799                         if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
800                                 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
801                                                 dml_floor(ViewportXStart[k] +
802                                                                 SwathWidthY[k] +
803                                                                 Read256BytesBlockWidthY[k] - 1,
804                                                                 Read256BytesBlockWidthY[k]) -
805                                                                 dml_floor(ViewportXStart[k],
806                                                                 Read256BytesBlockWidthY[k]));
807                         } else {
808                                 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
809                                                 dml_ceil(SwathWidthY[k] - 1,
810                                                                 Read256BytesBlockWidthY[k]) +
811                                                                 Read256BytesBlockWidthY[k]);
812                         }
813                         if (BytePerPixC[k] > 0) {
814                                 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
815                                         swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
816                                                         dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
817                                                                         Read256BytesBlockWidthC[k] - 1,
818                                                                         Read256BytesBlockWidthC[k]) -
819                                                                         dml_floor(ViewportXStartC[k],
820                                                                         Read256BytesBlockWidthC[k]));
821                                 } else {
822                                         swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
823                                                         dml_ceil(SwathWidthC[k] - 1,
824                                                                 Read256BytesBlockWidthC[k]) +
825                                                                 Read256BytesBlockWidthC[k]);
826                                 }
827                         } else {
828                                 swath_width_chroma_ub[k] = 0;
829                         }
830                 } else {
831                         MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
832                         MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
833
834                         if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
835                                 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
836                                                 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
837                                                 Read256BytesBlockHeightY[k]) -
838                                                 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
839                         } else {
840                                 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
841                                                 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
842                         }
843                         if (BytePerPixC[k] > 0) {
844                                 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
845                                         swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
846                                                         dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
847                                                                         Read256BytesBlockHeightC[k] - 1,
848                                                                         Read256BytesBlockHeightC[k]) -
849                                                                         dml_floor(ViewportYStartC[k],
850                                                                                         Read256BytesBlockHeightC[k]));
851                                 } else {
852                                         swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
853                                                         dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
854                                                         Read256BytesBlockHeightC[k]);
855                                 }
856                         } else {
857                                 swath_width_chroma_ub[k] = 0;
858                         }
859                 }
860
861 #ifdef __DML_VBA_DEBUG__
862                 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
863                 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
864                 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
865                 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
866 #endif
867
868         }
869 } // CalculateSwathWidth
870
871 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
872                         unsigned int TotalNumberOfActiveDPP,
873                         bool NoChroma,
874                         enum output_encoder_class Output,
875                         enum dm_swizzle_mode SurfaceTiling,
876                         bool CompBufReservedSpaceNeedAdjustment,
877                         bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
878 {
879         bool ret_val = false;
880
881         ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
882                         TotalNumberOfActiveDPP == 1 && NoChroma);
883         if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
884                 ret_val = false;
885
886         if (SurfaceTiling == dm_sw_linear)
887                 ret_val = false;
888
889         if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
890                 ret_val = false;
891
892 #ifdef __DML_VBA_DEBUG__
893         dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
894         dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
895         dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
896 #endif
897
898         return (ret_val);
899 }
900
901 void dml32_CalculateDETBufferSize(
902                 unsigned int DETSizeOverride[],
903                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
904                 bool ForceSingleDPP,
905                 unsigned int NumberOfActiveSurfaces,
906                 bool UnboundedRequestEnabled,
907                 unsigned int nomDETInKByte,
908                 unsigned int MaxTotalDETInKByte,
909                 unsigned int ConfigReturnBufferSizeInKByte,
910                 unsigned int MinCompressedBufferSizeInKByte,
911                 unsigned int CompressedBufferSegmentSizeInkByteFinal,
912                 enum source_format_class SourcePixelFormat[],
913                 double ReadBandwidthLuma[],
914                 double ReadBandwidthChroma[],
915                 unsigned int RoundedUpMaxSwathSizeBytesY[],
916                 unsigned int RoundedUpMaxSwathSizeBytesC[],
917                 unsigned int DPPPerSurface[],
918                 /* Output */
919                 unsigned int DETBufferSizeInKByte[],
920                 unsigned int *CompressedBufferSizeInkByte)
921 {
922         unsigned int DETBufferSizePoolInKByte;
923         unsigned int NextDETBufferPieceInKByte;
924         bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
925         bool NextPotentialSurfaceToAssignDETPieceFound;
926         unsigned int NextSurfaceToAssignDETPiece;
927         double TotalBandwidth;
928         double BandwidthOfSurfacesNotAssignedDETPiece;
929         unsigned int max_minDET;
930         unsigned int minDET;
931         unsigned int minDET_pipe;
932         unsigned int j, k;
933
934 #ifdef __DML_VBA_DEBUG__
935         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
936         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
937         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
938         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
939         dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
940         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
941         dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
942         dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
943                         CompressedBufferSegmentSizeInkByteFinal);
944 #endif
945
946         // Note: Will use default det size if that fits 2 swaths
947         if (UnboundedRequestEnabled) {
948                 if (DETSizeOverride[0] > 0) {
949                         DETBufferSizeInKByte[0] = DETSizeOverride[0];
950                 } else {
951                         DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
952                                         ((double) RoundedUpMaxSwathSizeBytesY[0] +
953                                                         (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
954                 }
955                 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
956         } else {
957                 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
958                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
959                         DETBufferSizeInKByte[k] = nomDETInKByte;
960                         if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
961                                         SourcePixelFormat[k] == dm_420_12) {
962                                 max_minDET = nomDETInKByte - 64;
963                         } else {
964                                 max_minDET = nomDETInKByte;
965                         }
966                         minDET = 128;
967                         minDET_pipe = 0;
968
969                         // add DET resource until can hold 2 full swaths
970                         while (minDET <= max_minDET && minDET_pipe == 0) {
971                                 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
972                                                 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
973                                         minDET_pipe = minDET;
974                                 minDET = minDET + 64;
975                         }
976
977 #ifdef __DML_VBA_DEBUG__
978                         dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
979                         dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
980                         dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
981                         dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
982                                         RoundedUpMaxSwathSizeBytesY[k]);
983                         dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
984                                         RoundedUpMaxSwathSizeBytesC[k]);
985 #endif
986
987                         if (minDET_pipe == 0) {
988                                 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
989                                                 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
990 #ifdef __DML_VBA_DEBUG__
991                                 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
992                                                 __func__, k, minDET_pipe);
993 #endif
994                         }
995
996                         if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
997                                 DETBufferSizeInKByte[k] = 0;
998                         } else if (DETSizeOverride[k] > 0) {
999                                 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1000                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1001                                                 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1002                         } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1003                                 DETBufferSizeInKByte[k] = minDET_pipe;
1004                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1005                                                 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1006                         }
1007
1008 #ifdef __DML_VBA_DEBUG__
1009                         dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1010                         dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1011                         dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1012                         dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1013 #endif
1014                 }
1015
1016                 TotalBandwidth = 0;
1017                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1018                         if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1019                                 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1020                 }
1021 #ifdef __DML_VBA_DEBUG__
1022                 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1023                 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1024                         dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1025                 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1026                 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1027 #endif
1028                 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1029                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1030
1031                         if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1032                                 DETPieceAssignedToThisSurfaceAlready[k] = true;
1033                         } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1034                                         (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1035                                         ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1036                                 DETPieceAssignedToThisSurfaceAlready[k] = true;
1037                                 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1038                                                 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1039                         } else {
1040                                 DETPieceAssignedToThisSurfaceAlready[k] = false;
1041                         }
1042 #ifdef __DML_VBA_DEBUG__
1043                         dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1044                                         DETPieceAssignedToThisSurfaceAlready[k]);
1045                         dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1046                                         BandwidthOfSurfacesNotAssignedDETPiece);
1047 #endif
1048                 }
1049
1050                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1051                         NextPotentialSurfaceToAssignDETPieceFound = false;
1052                         NextSurfaceToAssignDETPiece = 0;
1053
1054                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1055 #ifdef __DML_VBA_DEBUG__
1056                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1057                                                 ReadBandwidthLuma[k]);
1058                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1059                                                 ReadBandwidthChroma[k]);
1060                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1061                                                 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1062                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1063                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1064                                 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1065                                                 NextSurfaceToAssignDETPiece);
1066 #endif
1067                                 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1068                                                 (!NextPotentialSurfaceToAssignDETPieceFound ||
1069                                                 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1070                                                 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1071                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1072                                         NextSurfaceToAssignDETPiece = k;
1073                                         NextPotentialSurfaceToAssignDETPieceFound = true;
1074                                 }
1075 #ifdef __DML_VBA_DEBUG__
1076                                 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1077                                                 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1078                                 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1079                                                 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1080 #endif
1081                         }
1082
1083                         if (NextPotentialSurfaceToAssignDETPieceFound) {
1084                                 // Note: To show the banker's rounding behavior in VBA and also the fact
1085                                 // that the DET buffer size varies due to precision issue
1086                                 //
1087                                 //double tmp1 =  ((double) DETBufferSizePoolInKByte *
1088                                 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1089                                 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1090                                 // BandwidthOfSurfacesNotAssignedDETPiece /
1091                                 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1092                                 //double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1093                                 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1094                                 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1095                                  //BandwidthOfSurfacesNotAssignedDETPiece /
1096                                 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1097                                 //
1098                                 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1099                                 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1100
1101                                 NextDETBufferPieceInKByte = dml_min(
1102                                         dml_round((double) DETBufferSizePoolInKByte *
1103                                                 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1104                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1105                                                 BandwidthOfSurfacesNotAssignedDETPiece /
1106                                                 ((ForceSingleDPP ? 1 :
1107                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1108                                                 (ForceSingleDPP ? 1 :
1109                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1110                                                 dml_floor((double) DETBufferSizePoolInKByte,
1111                                                 (ForceSingleDPP ? 1 :
1112                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1113
1114                                 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1115                                 // We should limit the per-pipe DET size to the nominal / max per pipe.
1116                                 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1117                                         if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1118                                                         nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1119                                                 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1120                                                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1121                                         } else {
1122                                                 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1123                                                 // already has the max per-pipe value
1124                                                 NextDETBufferPieceInKByte = 0;
1125                                         }
1126                                 }
1127
1128 #ifdef __DML_VBA_DEBUG__
1129                                 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1130                                         DETBufferSizePoolInKByte);
1131                                 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1132                                         NextSurfaceToAssignDETPiece);
1133                                 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1134                                         NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1135                                 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1136                                         NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1137                                 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1138                                         __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1139                                 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1140                                         NextDETBufferPieceInKByte);
1141                                 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1142                                         __func__, j, NextSurfaceToAssignDETPiece,
1143                                         DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1144 #endif
1145
1146                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1147                                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1148                                                 + NextDETBufferPieceInKByte
1149                                                 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1150 #ifdef __DML_VBA_DEBUG__
1151                                 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1152 #endif
1153
1154                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1155                                 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1156                                 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1157                                                 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1158                                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1159                         }
1160                 }
1161                 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1162         }
1163         *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1164
1165 #ifdef __DML_VBA_DEBUG__
1166         dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1167         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1168         for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1169                 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1170                                 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1171         }
1172 #endif
1173 } // CalculateDETBufferSize
1174
1175 void dml32_CalculateODMMode(
1176                 unsigned int MaximumPixelsPerLinePerDSCUnit,
1177                 unsigned int HActive,
1178                 enum output_encoder_class Output,
1179                 enum odm_combine_policy ODMUse,
1180                 double StateDispclk,
1181                 double MaxDispclk,
1182                 bool DSCEnable,
1183                 unsigned int TotalNumberOfActiveDPP,
1184                 unsigned int MaxNumDPP,
1185                 double PixelClock,
1186                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1187                 double DISPCLKRampingMargin,
1188                 double DISPCLKDPPCLKVCOSpeed,
1189
1190                 /* Output */
1191                 bool *TotalAvailablePipesSupport,
1192                 unsigned int *NumberOfDPP,
1193                 enum odm_combine_mode *ODMMode,
1194                 double *RequiredDISPCLKPerSurface)
1195 {
1196
1197         double SurfaceRequiredDISPCLKWithoutODMCombine;
1198         double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1199         double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1200
1201         SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1202                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1203                         MaxDispclk);
1204         SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1205                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1206                         MaxDispclk);
1207         SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1208                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1209                         MaxDispclk);
1210         *TotalAvailablePipesSupport = true;
1211         *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1212
1213         if (ODMUse == dm_odm_combine_policy_none)
1214                 *ODMMode = dm_odm_combine_mode_disabled;
1215
1216         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1217         *NumberOfDPP = 0;
1218
1219         // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1220         // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1221
1222         if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1223                         ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1224                                         (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) {
1225                 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1226                         *ODMMode = dm_odm_combine_mode_4to1;
1227                         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1228                         *NumberOfDPP = 4;
1229                 } else {
1230                         *TotalAvailablePipesSupport = false;
1231                 }
1232         } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1233                         (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1234                                         SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1235                                         (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)))))) {
1236                 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1237                         *ODMMode = dm_odm_combine_mode_2to1;
1238                         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1239                         *NumberOfDPP = 2;
1240                 } else {
1241                         *TotalAvailablePipesSupport = false;
1242                 }
1243         } else {
1244                 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1245                         *NumberOfDPP = 1;
1246                 else
1247                         *TotalAvailablePipesSupport = false;
1248         }
1249 }
1250
1251 double dml32_CalculateRequiredDispclk(
1252                 enum odm_combine_mode ODMMode,
1253                 double PixelClock,
1254                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1255                 double DISPCLKRampingMargin,
1256                 double DISPCLKDPPCLKVCOSpeed,
1257                 double MaxDispclk)
1258 {
1259         double RequiredDispclk = 0.;
1260         double PixelClockAfterODM;
1261         double DISPCLKWithRampingRoundedToDFSGranularity;
1262         double DISPCLKWithoutRampingRoundedToDFSGranularity;
1263         double MaxDispclkRoundedDownToDFSGranularity;
1264
1265         if (ODMMode == dm_odm_combine_mode_4to1)
1266                 PixelClockAfterODM = PixelClock / 4;
1267         else if (ODMMode == dm_odm_combine_mode_2to1)
1268                 PixelClockAfterODM = PixelClock / 2;
1269         else
1270                 PixelClockAfterODM = PixelClock;
1271
1272
1273         DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1274                         PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1275                                         * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1276
1277         DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1278                         PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1279
1280         MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1281
1282         if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1283                 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1284         else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1285                 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1286         else
1287                 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1288
1289         return RequiredDispclk;
1290 }
1291
1292 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1293 {
1294         if (Clock <= 0.0)
1295                 return 0.0;
1296
1297         if (round_up)
1298                 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1299         else
1300                 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1301 }
1302
1303 void dml32_CalculateOutputLink(
1304                 double PHYCLKPerState,
1305                 double PHYCLKD18PerState,
1306                 double PHYCLKD32PerState,
1307                 double Downspreading,
1308                 bool IsMainSurfaceUsingTheIndicatedTiming,
1309                 enum output_encoder_class Output,
1310                 enum output_format_class OutputFormat,
1311                 unsigned int HTotal,
1312                 unsigned int HActive,
1313                 double PixelClockBackEnd,
1314                 double ForcedOutputLinkBPP,
1315                 unsigned int DSCInputBitPerComponent,
1316                 unsigned int NumberOfDSCSlices,
1317                 double AudioSampleRate,
1318                 unsigned int AudioSampleLayout,
1319                 enum odm_combine_mode ODMModeNoDSC,
1320                 enum odm_combine_mode ODMModeDSC,
1321                 bool DSCEnable,
1322                 unsigned int OutputLinkDPLanes,
1323                 enum dm_output_link_dp_rate OutputLinkDPRate,
1324
1325                 /* Output */
1326                 bool *RequiresDSC,
1327                 double *RequiresFEC,
1328                 double  *OutBpp,
1329                 enum dm_output_type *OutputType,
1330                 enum dm_output_rate *OutputRate,
1331                 unsigned int *RequiredSlots)
1332 {
1333         bool LinkDSCEnable;
1334         unsigned int dummy;
1335         *RequiresDSC = false;
1336         *RequiresFEC = false;
1337         *OutBpp = 0;
1338         *OutputType = dm_output_type_unknown;
1339         *OutputRate = dm_output_rate_unknown;
1340
1341         if (IsMainSurfaceUsingTheIndicatedTiming) {
1342                 if (Output == dm_hdmi) {
1343                         *RequiresDSC = false;
1344                         *RequiresFEC = false;
1345                         *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1346                                         PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1347                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1348                                         ODMModeNoDSC, ODMModeDSC, &dummy);
1349                         //OutputTypeAndRate = "HDMI";
1350                         *OutputType = dm_output_type_hdmi;
1351
1352                 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1353                         if (DSCEnable == true) {
1354                                 *RequiresDSC = true;
1355                                 LinkDSCEnable = true;
1356                                 if (Output == dm_dp || Output == dm_dp2p0)
1357                                         *RequiresFEC = true;
1358                                 else
1359                                         *RequiresFEC = false;
1360                         } else {
1361                                 *RequiresDSC = false;
1362                                 LinkDSCEnable = false;
1363                                 if (Output == dm_dp2p0)
1364                                         *RequiresFEC = true;
1365                                 else
1366                                         *RequiresFEC = false;
1367                         }
1368                         if (Output == dm_dp2p0) {
1369                                 *OutBpp = 0;
1370                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1371                                                 PHYCLKD32PerState >= 10000 / 32) {
1372                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1373                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1374                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1375                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1376                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1377                                         if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1378                                                         ForcedOutputLinkBPP == 0) {
1379                                                 *RequiresDSC = true;
1380                                                 LinkDSCEnable = true;
1381                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1382                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1383                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1384                                                                 OutputFormat, DSCInputBitPerComponent,
1385                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1386                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1387                                         }
1388                                         //OutputTypeAndRate = Output & " UHBR10";
1389                                         *OutputType = dm_output_type_dp2p0;
1390                                         *OutputRate = dm_output_rate_dp_rate_uhbr10;
1391                                 }
1392                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1393                                                 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1394                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1395                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1396                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1397                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1398                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1399
1400                                         if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1401                                                         ForcedOutputLinkBPP == 0) {
1402                                                 *RequiresDSC = true;
1403                                                 LinkDSCEnable = true;
1404                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1405                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1406                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1407                                                                 OutputFormat, DSCInputBitPerComponent,
1408                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1409                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410                                         }
1411                                         //OutputTypeAndRate = Output & " UHBR13p5";
1412                                         *OutputType = dm_output_type_dp2p0;
1413                                         *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1414                                 }
1415                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1416                                                 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1417                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1418                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1419                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1420                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1421                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1422                                         if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1423                                                 *RequiresDSC = true;
1424                                                 LinkDSCEnable = true;
1425                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1426                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1427                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1428                                                                 OutputFormat, DSCInputBitPerComponent,
1429                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1430                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1431                                         }
1432                                         //OutputTypeAndRate = Output & " UHBR20";
1433                                         *OutputType = dm_output_type_dp2p0;
1434                                         *OutputRate = dm_output_rate_dp_rate_uhbr20;
1435                                 }
1436                         } else {
1437                                 *OutBpp = 0;
1438                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1439                                                 PHYCLKPerState >= 270) {
1440                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1441                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1442                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1443                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1444                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1445                                         if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1446                                                         ForcedOutputLinkBPP == 0) {
1447                                                 *RequiresDSC = true;
1448                                                 LinkDSCEnable = true;
1449                                                 if (Output == dm_dp)
1450                                                         *RequiresFEC = true;
1451                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1452                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1453                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1454                                                                 OutputFormat, DSCInputBitPerComponent,
1455                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1456                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1457                                         }
1458                                         //OutputTypeAndRate = Output & " HBR";
1459                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1460                                         *OutputRate = dm_output_rate_dp_rate_hbr;
1461                                 }
1462                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1463                                                 *OutBpp == 0 && PHYCLKPerState >= 540) {
1464                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1465                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1466                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1467                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1468                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1469
1470                                         if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1471                                                         ForcedOutputLinkBPP == 0) {
1472                                                 *RequiresDSC = true;
1473                                                 LinkDSCEnable = true;
1474                                                 if (Output == dm_dp)
1475                                                         *RequiresFEC = true;
1476
1477                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1478                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1479                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1480                                                                 OutputFormat, DSCInputBitPerComponent,
1481                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1482                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1483                                         }
1484                                         //OutputTypeAndRate = Output & " HBR2";
1485                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1486                                         *OutputRate = dm_output_rate_dp_rate_hbr2;
1487                                 }
1488                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1489                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1490                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1491                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output,
1492                                                         OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1493                                                         AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1494                                                         RequiredSlots);
1495
1496                                         if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1497                                                 *RequiresDSC = true;
1498                                                 LinkDSCEnable = true;
1499                                                 if (Output == dm_dp)
1500                                                         *RequiresFEC = true;
1501
1502                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1503                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1504                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1505                                                                 OutputFormat, DSCInputBitPerComponent,
1506                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1507                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1508                                         }
1509                                         //OutputTypeAndRate = Output & " HBR3";
1510                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1511                                         *OutputRate = dm_output_rate_dp_rate_hbr3;
1512                                 }
1513                         }
1514                 }
1515         }
1516 }
1517
1518 void dml32_CalculateDPPCLK(
1519                 unsigned int NumberOfActiveSurfaces,
1520                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1521                 double DISPCLKDPPCLKVCOSpeed,
1522                 double DPPCLKUsingSingleDPP[],
1523                 unsigned int DPPPerSurface[],
1524
1525                 /* output */
1526                 double *GlobalDPPCLK,
1527                 double Dppclk[])
1528 {
1529         unsigned int k;
1530         *GlobalDPPCLK = 0;
1531         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1532                 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1533                 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1534         }
1535         *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1536         for (k = 0; k < NumberOfActiveSurfaces; ++k)
1537                 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1538 }
1539
1540 double dml32_TruncToValidBPP(
1541                 double LinkBitRate,
1542                 unsigned int Lanes,
1543                 unsigned int HTotal,
1544                 unsigned int HActive,
1545                 double PixelClock,
1546                 double DesiredBPP,
1547                 bool DSCEnable,
1548                 enum output_encoder_class Output,
1549                 enum output_format_class Format,
1550                 unsigned int DSCInputBitPerComponent,
1551                 unsigned int DSCSlices,
1552                 unsigned int AudioRate,
1553                 unsigned int AudioLayout,
1554                 enum odm_combine_mode ODMModeNoDSC,
1555                 enum odm_combine_mode ODMModeDSC,
1556                 /* Output */
1557                 unsigned int *RequiredSlots)
1558 {
1559         double    MaxLinkBPP;
1560         unsigned int   MinDSCBPP;
1561         double    MaxDSCBPP;
1562         unsigned int   NonDSCBPP0;
1563         unsigned int   NonDSCBPP1;
1564         unsigned int   NonDSCBPP2;
1565         unsigned int   NonDSCBPP3;
1566
1567         if (Format == dm_420) {
1568                 NonDSCBPP0 = 12;
1569                 NonDSCBPP1 = 15;
1570                 NonDSCBPP2 = 18;
1571                 MinDSCBPP = 6;
1572                 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1573         } else if (Format == dm_444) {
1574                 NonDSCBPP0 = 18;
1575                 NonDSCBPP1 = 24;
1576                 NonDSCBPP2 = 30;
1577                 NonDSCBPP3 = 36;
1578                 MinDSCBPP = 8;
1579                 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1580         } else {
1581                 if (Output == dm_hdmi) {
1582                         NonDSCBPP0 = 24;
1583                         NonDSCBPP1 = 24;
1584                         NonDSCBPP2 = 24;
1585                 } else {
1586                         NonDSCBPP0 = 16;
1587                         NonDSCBPP1 = 20;
1588                         NonDSCBPP2 = 24;
1589                 }
1590                 if (Format == dm_n422) {
1591                         MinDSCBPP = 7;
1592                         MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1593                 } else {
1594                         MinDSCBPP = 8;
1595                         MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1596                 }
1597         }
1598         if (Output == dm_dp2p0) {
1599                 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1600         } else if (DSCEnable && Output == dm_dp) {
1601                 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1602         } else {
1603                 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1604         }
1605
1606         if (DSCEnable) {
1607                 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1608                         MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1609                 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1610                         MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1611                 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1612                         MaxLinkBPP = 2 * MaxLinkBPP;
1613         } else {
1614                 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1615                         MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1616                 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1617                         MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1618                 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1619                         MaxLinkBPP = 2 * MaxLinkBPP;
1620         }
1621
1622         if (DesiredBPP == 0) {
1623                 if (DSCEnable) {
1624                         if (MaxLinkBPP < MinDSCBPP)
1625                                 return BPP_INVALID;
1626                         else if (MaxLinkBPP >= MaxDSCBPP)
1627                                 return MaxDSCBPP;
1628                         else
1629                                 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1630                 } else {
1631                         if (MaxLinkBPP >= NonDSCBPP3)
1632                                 return NonDSCBPP3;
1633                         else if (MaxLinkBPP >= NonDSCBPP2)
1634                                 return NonDSCBPP2;
1635                         else if (MaxLinkBPP >= NonDSCBPP1)
1636                                 return NonDSCBPP1;
1637                         else if (MaxLinkBPP >= NonDSCBPP0)
1638                                 return 16.0;
1639                         else
1640                                 return BPP_INVALID;
1641                 }
1642         } else {
1643                 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1644                                 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1645                                 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1646                         return BPP_INVALID;
1647                 else
1648                         return DesiredBPP;
1649         }
1650
1651         *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1652
1653         return BPP_INVALID;
1654 } // TruncToValidBPP
1655
1656 double dml32_RequiredDTBCLK(
1657                 bool              DSCEnable,
1658                 double               PixelClock,
1659                 enum output_format_class  OutputFormat,
1660                 double               OutputBpp,
1661                 unsigned int              DSCSlices,
1662                 unsigned int                 HTotal,
1663                 unsigned int                 HActive,
1664                 unsigned int              AudioRate,
1665                 unsigned int              AudioLayout)
1666 {
1667         double PixelWordRate;
1668         double HCActive;
1669         double HCBlank;
1670         double AverageTribyteRate;
1671         double HActiveTribyteRate;
1672
1673         if (DSCEnable != true)
1674                 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1675
1676         PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1677         HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1678                         dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1679         HCBlank = 64 + 32 *
1680                         dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1681         AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1682         HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1683         return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1684 }
1685
1686 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1687                 enum odm_combine_mode ODMMode,
1688                 unsigned int DSCInputBitPerComponent,
1689                 double OutputBpp,
1690                 unsigned int HActive,
1691                 unsigned int HTotal,
1692                 unsigned int NumberOfDSCSlices,
1693                 enum output_format_class  OutputFormat,
1694                 enum output_encoder_class Output,
1695                 double PixelClock,
1696                 double PixelClockBackEnd)
1697 {
1698         unsigned int DSCDelayRequirement_val;
1699
1700         if (DSCEnabled == true && OutputBpp != 0) {
1701                 if (ODMMode == dm_odm_combine_mode_4to1) {
1702                         DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1703                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1704                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1705                 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1706                         DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1707                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1708                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1709                 } else {
1710                         DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1711                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1712                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1713                 }
1714
1715                 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1716                                 dml_ceil(DSCDelayRequirement_val / HActive, 1);
1717
1718                 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1719
1720         } else {
1721                 DSCDelayRequirement_val = 0;
1722         }
1723
1724 #ifdef __DML_VBA_DEBUG__
1725         dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1726         dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1727         dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1728         dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1729         dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1730         dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1731         dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1732 #endif
1733
1734         return DSCDelayRequirement_val;
1735 }
1736
1737 void dml32_CalculateSurfaceSizeInMall(
1738                 unsigned int NumberOfActiveSurfaces,
1739                 unsigned int MALLAllocatedForDCN,
1740                 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1741                 bool DCCEnable[],
1742                 bool ViewportStationary[],
1743                 unsigned int ViewportXStartY[],
1744                 unsigned int ViewportYStartY[],
1745                 unsigned int ViewportXStartC[],
1746                 unsigned int ViewportYStartC[],
1747                 unsigned int ViewportWidthY[],
1748                 unsigned int ViewportHeightY[],
1749                 unsigned int BytesPerPixelY[],
1750                 unsigned int ViewportWidthC[],
1751                 unsigned int ViewportHeightC[],
1752                 unsigned int BytesPerPixelC[],
1753                 unsigned int SurfaceWidthY[],
1754                 unsigned int SurfaceWidthC[],
1755                 unsigned int SurfaceHeightY[],
1756                 unsigned int SurfaceHeightC[],
1757                 unsigned int Read256BytesBlockWidthY[],
1758                 unsigned int Read256BytesBlockWidthC[],
1759                 unsigned int Read256BytesBlockHeightY[],
1760                 unsigned int Read256BytesBlockHeightC[],
1761                 unsigned int ReadBlockWidthY[],
1762                 unsigned int ReadBlockWidthC[],
1763                 unsigned int ReadBlockHeightY[],
1764                 unsigned int ReadBlockHeightC[],
1765
1766                 /* Output */
1767                 unsigned int    SurfaceSizeInMALL[],
1768                 bool *ExceededMALLSize)
1769 {
1770         unsigned int TotalSurfaceSizeInMALL  = 0;
1771         unsigned int k;
1772
1773         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1774                 if (ViewportStationary[k]) {
1775                         SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1776                                         dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1777                                                 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1778                                                 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1779                                                 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1780                                                 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1781                                                 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1782
1783                         if (ReadBlockWidthC[k] > 0) {
1784                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1785                                                 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1786                                                         dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1787                                                         ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1788                                                         dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1789                                                         dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1790                                                         dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1791                                                         ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1792                                                         dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1793                                                         BytesPerPixelC[k];
1794                         }
1795                         if (DCCEnable[k] == true) {
1796                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1797                                                 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1798                                                         dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1799                                                         Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1800                                                         - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1801                                                         * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1802                                                         Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1803                                                         ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1804                                                         Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1805                                                         * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1806                                 if (Read256BytesBlockWidthC[k] > 0) {
1807                                         SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1808                                                         dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1809                                                                 Read256BytesBlockWidthC[k]),
1810                                                                 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1811                                                                 * Read256BytesBlockWidthC[k] - 1, 8 *
1812                                                                 Read256BytesBlockWidthC[k]) -
1813                                                                 dml_floor(ViewportXStartC[k], 8 *
1814                                                                 Read256BytesBlockWidthC[k])) *
1815                                                                 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1816                                                                 Read256BytesBlockHeightC[k]),
1817                                                                 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1818                                                                 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1819                                                                 Read256BytesBlockHeightC[k]) -
1820                                                                 dml_floor(ViewportYStartC[k], 8 *
1821                                                                 Read256BytesBlockHeightC[k])) *
1822                                                                 BytesPerPixelC[k] / 256;
1823                                 }
1824                         }
1825                 } else {
1826                         SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1827                                         ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1828                                         dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1829                                                         ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1830                                                         BytesPerPixelY[k];
1831                         if (ReadBlockWidthC[k] > 0) {
1832                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1833                                                 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1834                                                                 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1835                                                 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1836                                                                 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1837                                                                 BytesPerPixelC[k];
1838                         }
1839                         if (DCCEnable[k] == true) {
1840                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1841                                                 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1842                                                                 Read256BytesBlockWidthY[k] - 1), 8 *
1843                                                                 Read256BytesBlockWidthY[k]) *
1844                                                 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1845                                                                 Read256BytesBlockHeightY[k] - 1), 8 *
1846                                                                 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1847
1848                                 if (Read256BytesBlockWidthC[k] > 0) {
1849                                         SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1850                                                         dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1851                                                                         Read256BytesBlockWidthC[k] - 1), 8 *
1852                                                                         Read256BytesBlockWidthC[k]) *
1853                                                         dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1854                                                                         Read256BytesBlockHeightC[k] - 1), 8 *
1855                                                                         Read256BytesBlockHeightC[k]) *
1856                                                                         BytesPerPixelC[k] / 256;
1857                                 }
1858                         }
1859                 }
1860         }
1861
1862         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1863                 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1864                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1865         }
1866         *ExceededMALLSize =  (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true);
1867 } // CalculateSurfaceSizeInMall
1868
1869 void dml32_CalculateVMRowAndSwath(
1870                 struct dml32_CalculateVMRowAndSwath *st_vars,
1871                 unsigned int NumberOfActiveSurfaces,
1872                 DmlPipe myPipe[],
1873                 unsigned int SurfaceSizeInMALL[],
1874                 unsigned int PTEBufferSizeInRequestsLuma,
1875                 unsigned int PTEBufferSizeInRequestsChroma,
1876                 unsigned int DCCMetaBufferSizeBytes,
1877                 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1878                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1879                 unsigned int MALLAllocatedForDCN,
1880                 double SwathWidthY[],
1881                 double SwathWidthC[],
1882                 bool GPUVMEnable,
1883                 bool HostVMEnable,
1884                 unsigned int HostVMMaxNonCachedPageTableLevels,
1885                 unsigned int GPUVMMaxPageTableLevels,
1886                 unsigned int GPUVMMinPageSizeKBytes[],
1887                 unsigned int HostVMMinPageSize,
1888
1889                 /* Output */
1890                 bool PTEBufferSizeNotExceeded[],
1891                 bool DCCMetaBufferSizeNotExceeded[],
1892                 unsigned int dpte_row_width_luma_ub[],
1893                 unsigned int dpte_row_width_chroma_ub[],
1894                 unsigned int dpte_row_height_luma[],
1895                 unsigned int dpte_row_height_chroma[],
1896                 unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1897                 unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1898                 unsigned int meta_req_width[],
1899                 unsigned int meta_req_width_chroma[],
1900                 unsigned int meta_req_height[],
1901                 unsigned int meta_req_height_chroma[],
1902                 unsigned int meta_row_width[],
1903                 unsigned int meta_row_width_chroma[],
1904                 unsigned int meta_row_height[],
1905                 unsigned int meta_row_height_chroma[],
1906                 unsigned int vm_group_bytes[],
1907                 unsigned int dpte_group_bytes[],
1908                 unsigned int PixelPTEReqWidthY[],
1909                 unsigned int PixelPTEReqHeightY[],
1910                 unsigned int PTERequestSizeY[],
1911                 unsigned int PixelPTEReqWidthC[],
1912                 unsigned int PixelPTEReqHeightC[],
1913                 unsigned int PTERequestSizeC[],
1914                 unsigned int dpde0_bytes_per_frame_ub_l[],
1915                 unsigned int meta_pte_bytes_per_frame_ub_l[],
1916                 unsigned int dpde0_bytes_per_frame_ub_c[],
1917                 unsigned int meta_pte_bytes_per_frame_ub_c[],
1918                 double PrefetchSourceLinesY[],
1919                 double PrefetchSourceLinesC[],
1920                 double VInitPreFillY[],
1921                 double VInitPreFillC[],
1922                 unsigned int MaxNumSwathY[],
1923                 unsigned int MaxNumSwathC[],
1924                 double meta_row_bw[],
1925                 double dpte_row_bw[],
1926                 double PixelPTEBytesPerRow[],
1927                 double PDEAndMetaPTEBytesFrame[],
1928                 double MetaRowByte[],
1929                 bool use_one_row_for_frame[],
1930                 bool use_one_row_for_frame_flip[],
1931                 bool UsesMALLForStaticScreen[],
1932                 bool PTE_BUFFER_MODE[],
1933                 unsigned int BIGK_FRAGMENT_SIZE[])
1934 {
1935         unsigned int k;
1936
1937         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1938                 if (HostVMEnable == true) {
1939                         vm_group_bytes[k] = 512;
1940                         dpte_group_bytes[k] = 512;
1941                 } else if (GPUVMEnable == true) {
1942                         vm_group_bytes[k] = 2048;
1943                         if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1944                                 dpte_group_bytes[k] = 512;
1945                         else
1946                                 dpte_group_bytes[k] = 2048;
1947                 } else {
1948                         vm_group_bytes[k] = 0;
1949                         dpte_group_bytes[k] = 0;
1950                 }
1951
1952                 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
1953                                 myPipe[k].SourcePixelFormat == dm_420_12 ||
1954                                 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
1955                         if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
1956                                         !IsVertical(myPipe[k].SourceRotation)) {
1957                                 st_vars->PTEBufferSizeInRequestsForLuma[k] =
1958                                                 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
1959                                 st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k];
1960                         } else {
1961                                 st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
1962                                 st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
1963                         }
1964
1965                         st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
1966                                         myPipe[k].ViewportStationary,
1967                                         myPipe[k].DCCEnable,
1968                                         myPipe[k].DPPPerSurface,
1969                                         myPipe[k].BlockHeight256BytesC,
1970                                         myPipe[k].BlockWidth256BytesC,
1971                                         myPipe[k].SourcePixelFormat,
1972                                         myPipe[k].SurfaceTiling,
1973                                         myPipe[k].BytePerPixelC,
1974                                         myPipe[k].SourceRotation,
1975                                         SwathWidthC[k],
1976                                         myPipe[k].ViewportHeightChroma,
1977                                         myPipe[k].ViewportXStartC,
1978                                         myPipe[k].ViewportYStartC,
1979                                         GPUVMEnable,
1980                                         HostVMEnable,
1981                                         HostVMMaxNonCachedPageTableLevels,
1982                                         GPUVMMaxPageTableLevels,
1983                                         GPUVMMinPageSizeKBytes[k],
1984                                         HostVMMinPageSize,
1985                                         st_vars->PTEBufferSizeInRequestsForChroma[k],
1986                                         myPipe[k].PitchC,
1987                                         myPipe[k].DCCMetaPitchC,
1988                                         myPipe[k].BlockWidthC,
1989                                         myPipe[k].BlockHeightC,
1990
1991                                         /* Output */
1992                                         &st_vars->MetaRowByteC[k],
1993                                         &st_vars->PixelPTEBytesPerRowC[k],
1994                                         &dpte_row_width_chroma_ub[k],
1995                                         &dpte_row_height_chroma[k],
1996                                         &dpte_row_height_linear_chroma[k],
1997                                         &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k],
1998                                         &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k],
1999                                         &st_vars->dpte_row_height_chroma_one_row_per_frame[k],
2000                                         &meta_req_width_chroma[k],
2001                                         &meta_req_height_chroma[k],
2002                                         &meta_row_width_chroma[k],
2003                                         &meta_row_height_chroma[k],
2004                                         &PixelPTEReqWidthC[k],
2005                                         &PixelPTEReqHeightC[k],
2006                                         &PTERequestSizeC[k],
2007                                         &dpde0_bytes_per_frame_ub_c[k],
2008                                         &meta_pte_bytes_per_frame_ub_c[k]);
2009
2010                         PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2011                                         myPipe[k].VRatioChroma,
2012                                         myPipe[k].VTapsChroma,
2013                                         myPipe[k].InterlaceEnable,
2014                                         myPipe[k].ProgressiveToInterlaceUnitInOPP,
2015                                         myPipe[k].SwathHeightC,
2016                                         myPipe[k].SourceRotation,
2017                                         myPipe[k].ViewportStationary,
2018                                         SwathWidthC[k],
2019                                         myPipe[k].ViewportHeightChroma,
2020                                         myPipe[k].ViewportXStartC,
2021                                         myPipe[k].ViewportYStartC,
2022
2023                                         /* Output */
2024                                         &VInitPreFillC[k],
2025                                         &MaxNumSwathC[k]);
2026                 } else {
2027                         st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2028                         st_vars->PTEBufferSizeInRequestsForChroma[k] = 0;
2029                         st_vars->PixelPTEBytesPerRowC[k] = 0;
2030                         st_vars->PDEAndMetaPTEBytesFrameC = 0;
2031                         st_vars->MetaRowByteC[k] = 0;
2032                         MaxNumSwathC[k] = 0;
2033                         PrefetchSourceLinesC[k] = 0;
2034                         st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0;
2035                         st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2036                         st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2037                 }
2038
2039                 st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2040                                 myPipe[k].ViewportStationary,
2041                                 myPipe[k].DCCEnable,
2042                                 myPipe[k].DPPPerSurface,
2043                                 myPipe[k].BlockHeight256BytesY,
2044                                 myPipe[k].BlockWidth256BytesY,
2045                                 myPipe[k].SourcePixelFormat,
2046                                 myPipe[k].SurfaceTiling,
2047                                 myPipe[k].BytePerPixelY,
2048                                 myPipe[k].SourceRotation,
2049                                 SwathWidthY[k],
2050                                 myPipe[k].ViewportHeight,
2051                                 myPipe[k].ViewportXStart,
2052                                 myPipe[k].ViewportYStart,
2053                                 GPUVMEnable,
2054                                 HostVMEnable,
2055                                 HostVMMaxNonCachedPageTableLevels,
2056                                 GPUVMMaxPageTableLevels,
2057                                 GPUVMMinPageSizeKBytes[k],
2058                                 HostVMMinPageSize,
2059                                 st_vars->PTEBufferSizeInRequestsForLuma[k],
2060                                 myPipe[k].PitchY,
2061                                 myPipe[k].DCCMetaPitchY,
2062                                 myPipe[k].BlockWidthY,
2063                                 myPipe[k].BlockHeightY,
2064
2065                                 /* Output */
2066                                 &st_vars->MetaRowByteY[k],
2067                                 &st_vars->PixelPTEBytesPerRowY[k],
2068                                 &dpte_row_width_luma_ub[k],
2069                                 &dpte_row_height_luma[k],
2070                                 &dpte_row_height_linear_luma[k],
2071                                 &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k],
2072                                 &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k],
2073                                 &st_vars->dpte_row_height_luma_one_row_per_frame[k],
2074                                 &meta_req_width[k],
2075                                 &meta_req_height[k],
2076                                 &meta_row_width[k],
2077                                 &meta_row_height[k],
2078                                 &PixelPTEReqWidthY[k],
2079                                 &PixelPTEReqHeightY[k],
2080                                 &PTERequestSizeY[k],
2081                                 &dpde0_bytes_per_frame_ub_l[k],
2082                                 &meta_pte_bytes_per_frame_ub_l[k]);
2083
2084                 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2085                                 myPipe[k].VRatio,
2086                                 myPipe[k].VTaps,
2087                                 myPipe[k].InterlaceEnable,
2088                                 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2089                                 myPipe[k].SwathHeightY,
2090                                 myPipe[k].SourceRotation,
2091                                 myPipe[k].ViewportStationary,
2092                                 SwathWidthY[k],
2093                                 myPipe[k].ViewportHeight,
2094                                 myPipe[k].ViewportXStart,
2095                                 myPipe[k].ViewportYStart,
2096
2097                                 /* Output */
2098                                 &VInitPreFillY[k],
2099                                 &MaxNumSwathY[k]);
2100
2101                 PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC;
2102                 MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k];
2103
2104                 if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] &&
2105                                 st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) {
2106                         PTEBufferSizeNotExceeded[k] = true;
2107                 } else {
2108                         PTEBufferSizeNotExceeded[k] = false;
2109                 }
2110
2111                 st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2112                         st_vars->PTEBufferSizeInRequestsForLuma[k] &&
2113                         st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]);
2114         }
2115
2116         dml32_CalculateMALLUseForStaticScreen(
2117                         NumberOfActiveSurfaces,
2118                         MALLAllocatedForDCN,
2119                         UseMALLForStaticScreen,   // mode
2120                         SurfaceSizeInMALL,
2121                         st_vars->one_row_per_frame_fits_in_buffer,
2122                         /* Output */
2123                         UsesMALLForStaticScreen); // boolen
2124
2125         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2126                 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2127                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2128                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2129                                 (GPUVMMinPageSizeKBytes[k] > 64);
2130                 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2131         }
2132
2133         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2134 #ifdef __DML_VBA_DEBUG__
2135                 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2136                 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2137 #endif
2138                 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2139                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2140                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2141                                 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2142
2143                 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2144                                 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2145
2146                 if (use_one_row_for_frame[k]) {
2147                         dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k];
2148                         dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k];
2149                         st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k];
2150                         dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k];
2151                         dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k];
2152                         st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k];
2153                         PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k];
2154                 }
2155
2156                 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2157                         DCCMetaBufferSizeNotExceeded[k] = true;
2158                 else
2159                         DCCMetaBufferSizeNotExceeded[k] = false;
2160
2161                 PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k];
2162                 if (use_one_row_for_frame[k])
2163                         PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2164
2165                 dml32_CalculateRowBandwidth(
2166                                 GPUVMEnable,
2167                                 myPipe[k].SourcePixelFormat,
2168                                 myPipe[k].VRatio,
2169                                 myPipe[k].VRatioChroma,
2170                                 myPipe[k].DCCEnable,
2171                                 myPipe[k].HTotal / myPipe[k].PixelClock,
2172                                 st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k],
2173                                 meta_row_height[k],
2174                                 meta_row_height_chroma[k],
2175                                 st_vars->PixelPTEBytesPerRowY[k],
2176                                 st_vars->PixelPTEBytesPerRowC[k],
2177                                 dpte_row_height_luma[k],
2178                                 dpte_row_height_chroma[k],
2179
2180                                 /* Output */
2181                                 &meta_row_bw[k],
2182                                 &dpte_row_bw[k]);
2183 #ifdef __DML_VBA_DEBUG__
2184                 dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2185                 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2186                                 __func__, k, use_one_row_for_frame_flip[k]);
2187                 dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2188                                 __func__, k, UseMALLForPStateChange[k]);
2189                 dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2190                 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2191                                 __func__, k, dpte_row_width_luma_ub[k]);
2192                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, st_vars->PixelPTEBytesPerRowY[k]);
2193                 dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2194                                 __func__, k, dpte_row_height_chroma[k]);
2195                 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2196                                 __func__, k, dpte_row_width_chroma_ub[k]);
2197                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, st_vars->PixelPTEBytesPerRowC[k]);
2198                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2199                 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2200                                 __func__, k, PTEBufferSizeNotExceeded[k]);
2201                 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2202                 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2203 #endif
2204         }
2205 } // CalculateVMRowAndSwath
2206
2207 unsigned int dml32_CalculateVMAndRowBytes(
2208                 bool ViewportStationary,
2209                 bool DCCEnable,
2210                 unsigned int NumberOfDPPs,
2211                 unsigned int BlockHeight256Bytes,
2212                 unsigned int BlockWidth256Bytes,
2213                 enum source_format_class SourcePixelFormat,
2214                 unsigned int SurfaceTiling,
2215                 unsigned int BytePerPixel,
2216                 enum dm_rotation_angle SourceRotation,
2217                 double SwathWidth,
2218                 unsigned int ViewportHeight,
2219                 unsigned int    ViewportXStart,
2220                 unsigned int    ViewportYStart,
2221                 bool GPUVMEnable,
2222                 bool HostVMEnable,
2223                 unsigned int HostVMMaxNonCachedPageTableLevels,
2224                 unsigned int GPUVMMaxPageTableLevels,
2225                 unsigned int GPUVMMinPageSizeKBytes,
2226                 unsigned int HostVMMinPageSize,
2227                 unsigned int PTEBufferSizeInRequests,
2228                 unsigned int Pitch,
2229                 unsigned int DCCMetaPitch,
2230                 unsigned int MacroTileWidth,
2231                 unsigned int MacroTileHeight,
2232
2233                 /* Output */
2234                 unsigned int *MetaRowByte,
2235                 unsigned int *PixelPTEBytesPerRow,
2236                 unsigned int    *dpte_row_width_ub,
2237                 unsigned int *dpte_row_height,
2238                 unsigned int *dpte_row_height_linear,
2239                 unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2240                 unsigned int    *dpte_row_width_ub_one_row_per_frame,
2241                 unsigned int    *dpte_row_height_one_row_per_frame,
2242                 unsigned int *MetaRequestWidth,
2243                 unsigned int *MetaRequestHeight,
2244                 unsigned int *meta_row_width,
2245                 unsigned int *meta_row_height,
2246                 unsigned int *PixelPTEReqWidth,
2247                 unsigned int *PixelPTEReqHeight,
2248                 unsigned int *PTERequestSize,
2249                 unsigned int    *DPDE0BytesFrame,
2250                 unsigned int    *MetaPTEBytesFrame)
2251 {
2252         unsigned int MPDEBytesFrame;
2253         unsigned int DCCMetaSurfaceBytes;
2254         unsigned int ExtraDPDEBytesFrame;
2255         unsigned int PDEAndMetaPTEBytesFrame;
2256         unsigned int HostVMDynamicLevels = 0;
2257         unsigned int    MacroTileSizeBytes;
2258         unsigned int    vp_height_meta_ub;
2259         unsigned int    vp_height_dpte_ub;
2260         unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2261
2262         if (GPUVMEnable == true && HostVMEnable == true) {
2263                 if (HostVMMinPageSize < 2048)
2264                         HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2265                 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2266                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2267                 else
2268                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2269         }
2270
2271         *MetaRequestHeight = 8 * BlockHeight256Bytes;
2272         *MetaRequestWidth = 8 * BlockWidth256Bytes;
2273         if (SurfaceTiling == dm_sw_linear) {
2274                 *meta_row_height = 32;
2275                 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2276                                 - dml_floor(ViewportXStart, *MetaRequestWidth);
2277         } else if (!IsVertical(SourceRotation)) {
2278                 *meta_row_height = *MetaRequestHeight;
2279                 if (ViewportStationary && NumberOfDPPs == 1) {
2280                         *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2281                                         *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2282                 } else {
2283                         *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2284                 }
2285                 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2286         } else {
2287                 *meta_row_height = *MetaRequestWidth;
2288                 if (ViewportStationary && NumberOfDPPs == 1) {
2289                         *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2290                                         *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2291                 } else {
2292                         *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2293                 }
2294                 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2295         }
2296
2297         if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2298                 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2299                                 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2300         } else if (!IsVertical(SourceRotation)) {
2301                 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2302         } else {
2303                 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2304         }
2305
2306         DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2307
2308         if (GPUVMEnable == true) {
2309                 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2310                                 (8 * 4.0 * 1024), 1) + 1) * 64;
2311                 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2312         } else {
2313                 *MetaPTEBytesFrame = 0;
2314                 MPDEBytesFrame = 0;
2315         }
2316
2317         if (DCCEnable != true) {
2318                 *MetaPTEBytesFrame = 0;
2319                 MPDEBytesFrame = 0;
2320                 *MetaRowByte = 0;
2321         }
2322
2323         MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2324
2325         if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2326                 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2327                         vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2328                                         MacroTileHeight - 1, MacroTileHeight) -
2329                                         dml_floor(ViewportYStart, MacroTileHeight);
2330                 } else if (!IsVertical(SourceRotation)) {
2331                         vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2332                 } else {
2333                         vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2334                 }
2335                 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2336                                 (8 * 2097152), 1) + 1);
2337                 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2338         } else {
2339                 *DPDE0BytesFrame = 0;
2340                 ExtraDPDEBytesFrame = 0;
2341                 vp_height_dpte_ub = 0;
2342         }
2343
2344         PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2345
2346 #ifdef __DML_VBA_DEBUG__
2347         dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2348         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2349         dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2350         dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2351         dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2352         dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2353         dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2354         dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2355         dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2356         dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2357         dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2358         dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2359         dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2360         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2361         dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2362         dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2363         dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2364 #endif
2365
2366         if (HostVMEnable == true)
2367                 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2368
2369         if (SurfaceTiling == dm_sw_linear) {
2370                 *PixelPTEReqHeight = 1;
2371                 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2372                 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2373                 *PTERequestSize = 64;
2374         } else if (GPUVMMinPageSizeKBytes == 4) {
2375                 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2376                 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2377                 *PTERequestSize = 128;
2378         } else {
2379                 *PixelPTEReqHeight = MacroTileHeight;
2380                 *PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2381                 *PTERequestSize = 64;
2382         }
2383 #ifdef __DML_VBA_DEBUG__
2384         dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2385         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2386         dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2387         dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2388         dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2389         dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2390         dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2391 #endif
2392
2393         *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2394         *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2395                         (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2396                                         (double) *PixelPTEReqWidth;
2397         *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2398                         *PTERequestSize;
2399
2400         if (SurfaceTiling == dm_sw_linear) {
2401                 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2402                                 *PixelPTEReqWidth / Pitch), 1));
2403 #ifdef __DML_VBA_DEBUG__
2404                 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2405                                 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2406                 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2407                                 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2408                 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2409                                 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2410                 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2411                                 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2412                                                 *PixelPTEReqWidth / Pitch), 1));
2413                 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2414 #endif
2415                 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2416                                 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2417                 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2418
2419                 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2420                 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2421                                 PixelPTEReqWidth_linear / Pitch), 1);
2422                 if (*dpte_row_height_linear > 128)
2423                         *dpte_row_height_linear = 128;
2424
2425         } else if (!IsVertical(SourceRotation)) {
2426                 *dpte_row_height = *PixelPTEReqHeight;
2427
2428                 if (GPUVMMinPageSizeKBytes > 64) {
2429                         *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2430                                         *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2431                 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2432                         *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2433                                         *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2434                                         dml_floor(ViewportXStart, *PixelPTEReqWidth);
2435                 } else {
2436                         *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2437                                         *PixelPTEReqWidth;
2438                 }
2439
2440                 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2441         } else {
2442                 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2443
2444                 if (ViewportStationary && (NumberOfDPPs == 1)) {
2445                         *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2446                                         *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2447                 } else {
2448                         *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2449                                         * *PixelPTEReqHeight;
2450                 }
2451
2452                 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2453         }
2454
2455         if (GPUVMEnable != true)
2456                 *PixelPTEBytesPerRow = 0;
2457         if (HostVMEnable == true)
2458                 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2459
2460 #ifdef __DML_VBA_DEBUG__
2461         dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2462         dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2463         dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2464         dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2465         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2466         dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2467         dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2468         dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2469                         __func__, *dpte_row_width_ub_one_row_per_frame);
2470         dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2471                         __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2472         dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2473                         *MetaPTEBytesFrame);
2474 #endif
2475
2476         return PDEAndMetaPTEBytesFrame;
2477 } // CalculateVMAndRowBytes
2478
2479 double dml32_CalculatePrefetchSourceLines(
2480                 double VRatio,
2481                 unsigned int VTaps,
2482                 bool Interlace,
2483                 bool ProgressiveToInterlaceUnitInOPP,
2484                 unsigned int SwathHeight,
2485                 enum dm_rotation_angle SourceRotation,
2486                 bool ViewportStationary,
2487                 double SwathWidth,
2488                 unsigned int ViewportHeight,
2489                 unsigned int ViewportXStart,
2490                 unsigned int ViewportYStart,
2491
2492                 /* Output */
2493                 double *VInitPreFill,
2494                 unsigned int *MaxNumSwath)
2495 {
2496
2497         unsigned int vp_start_rot;
2498         unsigned int sw0_tmp;
2499         unsigned int MaxPartialSwath;
2500         double numLines;
2501
2502 #ifdef __DML_VBA_DEBUG__
2503         dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2504         dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2505         dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2506         dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2507         dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2508         dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2509 #endif
2510         if (ProgressiveToInterlaceUnitInOPP)
2511                 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2512         else
2513                 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2514
2515         if (ViewportStationary) {
2516                 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2517                         vp_start_rot = SwathHeight -
2518                                         (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2519                 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2520                         vp_start_rot = ViewportXStart;
2521                 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2522                         vp_start_rot = SwathHeight -
2523                                         (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2524                 } else {
2525                         vp_start_rot = ViewportYStart;
2526                 }
2527                 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2528                 if (sw0_tmp < *VInitPreFill)
2529                         *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2530                 else
2531                         *MaxNumSwath = 1;
2532                 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2533         } else {
2534                 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2535                 if (*VInitPreFill > 1)
2536                         MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2537                 else
2538                         MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2539         }
2540         numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2541
2542 #ifdef __DML_VBA_DEBUG__
2543         dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2544         dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2545         dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2546         dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2547         dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2548 #endif
2549         return numLines;
2550
2551 } // CalculatePrefetchSourceLines
2552
2553 void dml32_CalculateMALLUseForStaticScreen(
2554                 unsigned int NumberOfActiveSurfaces,
2555                 unsigned int MALLAllocatedForDCNFinal,
2556                 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2557                 unsigned int SurfaceSizeInMALL[],
2558                 bool one_row_per_frame_fits_in_buffer[],
2559
2560                 /* output */
2561                 bool UsesMALLForStaticScreen[])
2562 {
2563         unsigned int k;
2564         unsigned int SurfaceToAddToMALL;
2565         bool CanAddAnotherSurfaceToMALL;
2566         unsigned int TotalSurfaceSizeInMALL;
2567
2568         TotalSurfaceSizeInMALL = 0;
2569         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2570                 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2571                 if (UsesMALLForStaticScreen[k])
2572                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2573 #ifdef __DML_VBA_DEBUG__
2574                 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2575                 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2576 #endif
2577         }
2578
2579         SurfaceToAddToMALL = 0;
2580         CanAddAnotherSurfaceToMALL = true;
2581         while (CanAddAnotherSurfaceToMALL) {
2582                 CanAddAnotherSurfaceToMALL = false;
2583                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2584                         if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2585                                         !UsesMALLForStaticScreen[k] &&
2586                                         UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2587                                         one_row_per_frame_fits_in_buffer[k] &&
2588                                         (!CanAddAnotherSurfaceToMALL ||
2589                                         SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2590                                 CanAddAnotherSurfaceToMALL = true;
2591                                 SurfaceToAddToMALL = k;
2592 #ifdef __DML_VBA_DEBUG__
2593                                 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2594                                                 __func__, k, UseMALLForStaticScreen[k]);
2595 #endif
2596                         }
2597                 }
2598                 if (CanAddAnotherSurfaceToMALL) {
2599                         UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2600                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2601
2602 #ifdef __DML_VBA_DEBUG__
2603                         dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2604                         dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2605 #endif
2606
2607                 }
2608         }
2609 }
2610
2611 void dml32_CalculateRowBandwidth(
2612                 bool GPUVMEnable,
2613                 enum source_format_class SourcePixelFormat,
2614                 double VRatio,
2615                 double VRatioChroma,
2616                 bool DCCEnable,
2617                 double LineTime,
2618                 unsigned int MetaRowByteLuma,
2619                 unsigned int MetaRowByteChroma,
2620                 unsigned int meta_row_height_luma,
2621                 unsigned int meta_row_height_chroma,
2622                 unsigned int PixelPTEBytesPerRowLuma,
2623                 unsigned int PixelPTEBytesPerRowChroma,
2624                 unsigned int dpte_row_height_luma,
2625                 unsigned int dpte_row_height_chroma,
2626                 /* Output */
2627                 double *meta_row_bw,
2628                 double *dpte_row_bw)
2629 {
2630         if (DCCEnable != true) {
2631                 *meta_row_bw = 0;
2632         } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2633                         SourcePixelFormat == dm_rgbe_alpha) {
2634                 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2635                                 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2636         } else {
2637                 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2638         }
2639
2640         if (GPUVMEnable != true) {
2641                 *dpte_row_bw = 0;
2642         } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2643                         SourcePixelFormat == dm_rgbe_alpha) {
2644                 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2645                                 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2646         } else {
2647                 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2648         }
2649 }
2650
2651 double dml32_CalculateUrgentLatency(
2652                 double UrgentLatencyPixelDataOnly,
2653                 double UrgentLatencyPixelMixedWithVMData,
2654                 double UrgentLatencyVMDataOnly,
2655                 bool   DoUrgentLatencyAdjustment,
2656                 double UrgentLatencyAdjustmentFabricClockComponent,
2657                 double UrgentLatencyAdjustmentFabricClockReference,
2658                 double FabricClock)
2659 {
2660         double   ret;
2661
2662         ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2663         if (DoUrgentLatencyAdjustment == true) {
2664                 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2665                                 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2666         }
2667         return ret;
2668 }
2669
2670 void dml32_CalculateUrgentBurstFactor(
2671                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2672                 unsigned int    swath_width_luma_ub,
2673                 unsigned int    swath_width_chroma_ub,
2674                 unsigned int SwathHeightY,
2675                 unsigned int SwathHeightC,
2676                 double  LineTime,
2677                 double  UrgentLatency,
2678                 double  CursorBufferSize,
2679                 unsigned int CursorWidth,
2680                 unsigned int CursorBPP,
2681                 double  VRatio,
2682                 double  VRatioC,
2683                 double  BytePerPixelInDETY,
2684                 double  BytePerPixelInDETC,
2685                 unsigned int    DETBufferSizeY,
2686                 unsigned int    DETBufferSizeC,
2687                 /* Output */
2688                 double *UrgentBurstFactorCursor,
2689                 double *UrgentBurstFactorLuma,
2690                 double *UrgentBurstFactorChroma,
2691                 bool   *NotEnoughUrgentLatencyHiding)
2692 {
2693         double       LinesInDETLuma;
2694         double       LinesInDETChroma;
2695         unsigned int LinesInCursorBuffer;
2696         double       CursorBufferSizeInTime;
2697         double       DETBufferSizeInTimeLuma;
2698         double       DETBufferSizeInTimeChroma;
2699
2700         *NotEnoughUrgentLatencyHiding = 0;
2701
2702         if (CursorWidth > 0) {
2703                 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2704                                 (CursorWidth * CursorBPP / 8.0)), 1.0);
2705                 if (VRatio > 0) {
2706                         CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2707                         if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2708                                 *NotEnoughUrgentLatencyHiding = 1;
2709                                 *UrgentBurstFactorCursor = 0;
2710                         } else {
2711                                 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2712                                                 (CursorBufferSizeInTime - UrgentLatency);
2713                         }
2714                 } else {
2715                         *UrgentBurstFactorCursor = 1;
2716                 }
2717         }
2718
2719         LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2720                         DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2721
2722         if (VRatio > 0) {
2723                 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2724                 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2725                         *NotEnoughUrgentLatencyHiding = 1;
2726                         *UrgentBurstFactorLuma = 0;
2727                 } else {
2728                         *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2729                 }
2730         } else {
2731                 *UrgentBurstFactorLuma = 1;
2732         }
2733
2734         if (BytePerPixelInDETC > 0) {
2735                 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2736                                         1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2737                                         / swath_width_chroma_ub;
2738
2739                 if (VRatio > 0) {
2740                         DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2741                         if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2742                                 *NotEnoughUrgentLatencyHiding = 1;
2743                                 *UrgentBurstFactorChroma = 0;
2744                         } else {
2745                                 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2746                                                 / (DETBufferSizeInTimeChroma - UrgentLatency);
2747                         }
2748                 } else {
2749                         *UrgentBurstFactorChroma = 1;
2750                 }
2751         }
2752 } // CalculateUrgentBurstFactor
2753
2754 void dml32_CalculateDCFCLKDeepSleep(
2755                 unsigned int NumberOfActiveSurfaces,
2756                 unsigned int BytePerPixelY[],
2757                 unsigned int BytePerPixelC[],
2758                 double VRatio[],
2759                 double VRatioChroma[],
2760                 double SwathWidthY[],
2761                 double SwathWidthC[],
2762                 unsigned int DPPPerSurface[],
2763                 double HRatio[],
2764                 double HRatioChroma[],
2765                 double PixelClock[],
2766                 double PSCL_THROUGHPUT[],
2767                 double PSCL_THROUGHPUT_CHROMA[],
2768                 double Dppclk[],
2769                 double ReadBandwidthLuma[],
2770                 double ReadBandwidthChroma[],
2771                 unsigned int ReturnBusWidth,
2772
2773                 /* Output */
2774                 double *DCFClkDeepSleep)
2775 {
2776         unsigned int k;
2777         double   DisplayPipeLineDeliveryTimeLuma;
2778         double   DisplayPipeLineDeliveryTimeChroma;
2779         double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2780         double ReadBandwidth = 0.0;
2781
2782         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2783
2784                 if (VRatio[k] <= 1) {
2785                         DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2786                                         / PixelClock[k];
2787                 } else {
2788                         DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2789                 }
2790                 if (BytePerPixelC[k] == 0) {
2791                         DisplayPipeLineDeliveryTimeChroma = 0;
2792                 } else {
2793                         if (VRatioChroma[k] <= 1) {
2794                                 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2795                                                 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2796                         } else {
2797                                 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2798                                                 / Dppclk[k];
2799                         }
2800                 }
2801
2802                 if (BytePerPixelC[k] > 0) {
2803                         DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2804                                         BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2805                                         __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2806                                         32.0 / DisplayPipeLineDeliveryTimeChroma);
2807                 } else {
2808                         DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2809                                         64.0 / DisplayPipeLineDeliveryTimeLuma;
2810                 }
2811                 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2812
2813 #ifdef __DML_VBA_DEBUG__
2814                 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2815                 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2816 #endif
2817         }
2818
2819         for (k = 0; k < NumberOfActiveSurfaces; ++k)
2820                 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2821
2822         *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2823
2824 #ifdef __DML_VBA_DEBUG__
2825         dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2826         dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2827         dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2828         dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2829 #endif
2830
2831         for (k = 0; k < NumberOfActiveSurfaces; ++k)
2832                 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2833 #ifdef __DML_VBA_DEBUG__
2834         dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2835 #endif
2836 } // CalculateDCFCLKDeepSleep
2837
2838 double dml32_CalculateWriteBackDelay(
2839                 enum source_format_class WritebackPixelFormat,
2840                 double WritebackHRatio,
2841                 double WritebackVRatio,
2842                 unsigned int WritebackVTaps,
2843                 unsigned int         WritebackDestinationWidth,
2844                 unsigned int         WritebackDestinationHeight,
2845                 unsigned int         WritebackSourceHeight,
2846                 unsigned int HTotal)
2847 {
2848         double CalculateWriteBackDelay;
2849         double Line_length;
2850         double Output_lines_last_notclamped;
2851         double WritebackVInit;
2852
2853         WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2854         Line_length = dml_max((double) WritebackDestinationWidth,
2855                         dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2856         Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2857                         dml_ceil(((double)WritebackSourceHeight -
2858                                         (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2859         if (Output_lines_last_notclamped < 0) {
2860                 CalculateWriteBackDelay = 0;
2861         } else {
2862                 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2863                                 (HTotal - WritebackDestinationWidth) + 80;
2864         }
2865         return CalculateWriteBackDelay;
2866 }
2867
2868 void dml32_UseMinimumDCFCLK(
2869                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2870                 bool DRRDisplay[],
2871                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2872                 unsigned int MaxInterDCNTileRepeaters,
2873                 unsigned int MaxPrefetchMode,
2874                 double DRAMClockChangeLatencyFinal,
2875                 double FCLKChangeLatency,
2876                 double SREnterPlusExitTime,
2877                 unsigned int ReturnBusWidth,
2878                 unsigned int RoundTripPingLatencyCycles,
2879                 unsigned int ReorderingBytes,
2880                 unsigned int PixelChunkSizeInKByte,
2881                 unsigned int MetaChunkSize,
2882                 bool GPUVMEnable,
2883                 unsigned int GPUVMMaxPageTableLevels,
2884                 bool HostVMEnable,
2885                 unsigned int NumberOfActiveSurfaces,
2886                 double HostVMMinPageSize,
2887                 unsigned int HostVMMaxNonCachedPageTableLevels,
2888                 bool DynamicMetadataVMEnabled,
2889                 bool ImmediateFlipRequirement,
2890                 bool ProgressiveToInterlaceUnitInOPP,
2891                 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2892                 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2893                 unsigned int VTotal[],
2894                 unsigned int VActive[],
2895                 unsigned int DynamicMetadataTransmittedBytes[],
2896                 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2897                 bool Interlace[],
2898                 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2899                 double RequiredDISPCLK[][2],
2900                 double UrgLatency[],
2901                 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2902                 double ProjectedDCFClkDeepSleep[][2],
2903                 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2904                 unsigned int TotalNumberOfActiveDPP[][2],
2905                 unsigned int TotalNumberOfDCCActiveDPP[][2],
2906                 unsigned int dpte_group_bytes[],
2907                 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2908                 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2909                 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2910                 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2911                 unsigned int BytePerPixelY[],
2912                 unsigned int BytePerPixelC[],
2913                 unsigned int HTotal[],
2914                 double PixelClock[],
2915                 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2916                 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2917                 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2918                 bool DynamicMetadataEnable[],
2919                 double ReadBandwidthLuma[],
2920                 double ReadBandwidthChroma[],
2921                 double DCFCLKPerState[],
2922                 /* Output */
2923                 double DCFCLKState[][2])
2924 {
2925         unsigned int i, j, k;
2926         unsigned int     dummy1;
2927         double dummy2, dummy3;
2928         double   NormalEfficiency;
2929         double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2930
2931         NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2932         for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2933                 for  (j = 0; j <= 1; ++j) {
2934                         double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2935                         double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2936                         double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2937                         double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2938                         double MinimumTWait = 0.0;
2939                         double DPTEBandwidth;
2940                         double DCFCLKRequiredForAverageBandwidth;
2941                         unsigned int ExtraLatencyBytes;
2942                         double ExtraLatencyCycles;
2943                         double DCFCLKRequiredForPeakBandwidth;
2944                         unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2945                         double MinimumTvmPlus2Tr0;
2946
2947                         TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2948                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2949                                 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2950                                                 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2951                                                                 / (15.75 * HTotal[k] / PixelClock[k]);
2952                         }
2953
2954                         for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
2955                                 NoOfDPPState[k] = NoOfDPP[i][j][k];
2956
2957                         DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
2958                         DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
2959
2960                         ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
2961                                         TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
2962                                         TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
2963                                         NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
2964                                         HostVMMaxNonCachedPageTableLevels);
2965                         ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
2966                                         + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
2967                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2968                                 double DCFCLKCyclesRequiredInPrefetch;
2969                                 double PrefetchTime;
2970
2971                                 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
2972                                                 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
2973                                                 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
2974                                                                 * BytePerPixelC[k]) / NormalEfficiency
2975                                                 / ReturnBusWidth;
2976                                 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
2977                                                 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
2978                                                                 / NormalEfficiency / ReturnBusWidth
2979                                                                 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
2980                                                 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
2981                                                                 / ReturnBusWidth
2982                                                 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
2983                                                 + PixelDCFCLKCyclesRequiredInPrefetch[k];
2984                                 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
2985                                                 * HTotal[k] / PixelClock[k];
2986                                 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
2987                                                 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
2988                                                 UrgLatency[i] * GPUVMMaxPageTableLevels *
2989                                                 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
2990
2991                                 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
2992                                                 UseMALLForPStateChange[k],
2993                                                 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2994                                                 DRRDisplay[k],
2995                                                 DRAMClockChangeLatencyFinal,
2996                                                 FCLKChangeLatency,
2997                                                 UrgLatency[i],
2998                                                 SREnterPlusExitTime);
2999
3000                                 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3001                                                 MinimumTWait - UrgLatency[i] *
3002                                                 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3003                                                 GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3004                                                 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3005                                                 DynamicMetadataVMExtraLatency[k];
3006
3007                                 if (PrefetchTime > 0) {
3008                                         double ExpectedVRatioPrefetch;
3009
3010                                         ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3011                                                         PixelDCFCLKCyclesRequiredInPrefetch[k] /
3012                                                         DCFCLKCyclesRequiredInPrefetch);
3013                                         DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3014                                                         PixelDCFCLKCyclesRequiredInPrefetch[k] /
3015                                                         PrefetchPixelLinesTime[k] *
3016                                                         dml_max(1.0, ExpectedVRatioPrefetch) *
3017                                                         dml_max(1.0, ExpectedVRatioPrefetch / 4);
3018                                         if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3019                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3020                                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3021                                                                 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3022                                                                 NormalEfficiency / ReturnBusWidth;
3023                                         }
3024                                 } else {
3025                                         DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3026                                 }
3027                                 if (DynamicMetadataEnable[k] == true) {
3028                                         double TSetupPipe;
3029                                         double TdmbfPipe;
3030                                         double TdmsksPipe;
3031                                         double TdmecPipe;
3032                                         double AllowedTimeForUrgentExtraLatency;
3033
3034                                         dml32_CalculateVUpdateAndDynamicMetadataParameters(
3035                                                         MaxInterDCNTileRepeaters,
3036                                                         RequiredDPPCLKPerSurface[i][j][k],
3037                                                         RequiredDISPCLK[i][j],
3038                                                         ProjectedDCFClkDeepSleep[i][j],
3039                                                         PixelClock[k],
3040                                                         HTotal[k],
3041                                                         VTotal[k] - VActive[k],
3042                                                         DynamicMetadataTransmittedBytes[k],
3043                                                         DynamicMetadataLinesBeforeActiveRequired[k],
3044                                                         Interlace[k],
3045                                                         ProgressiveToInterlaceUnitInOPP,
3046
3047                                                         /* output */
3048                                                         &TSetupPipe,
3049                                                         &TdmbfPipe,
3050                                                         &TdmecPipe,
3051                                                         &TdmsksPipe,
3052                                                         &dummy1,
3053                                                         &dummy2,
3054                                                         &dummy3);
3055                                         AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3056                                                         PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3057                                                         TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3058                                         if (AllowedTimeForUrgentExtraLatency > 0)
3059                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3060                                                                 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3061                                                                 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3062                                         else
3063                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3064                                 }
3065                         }
3066                         DCFCLKRequiredForPeakBandwidth = 0;
3067                         for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3068                                 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3069                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3070                         }
3071                         MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3072                                         (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3073                                         (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3074                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3075                                 double MaximumTvmPlus2Tr0PlusTsw;
3076
3077                                 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3078                                                 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3079                                 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3080                                         DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3081                                 } else {
3082                                         DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3083                                                         2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3084                                                                 MinimumTvmPlus2Tr0 -
3085                                                                 PrefetchPixelLinesTime[k] / 4),
3086                                                         (2 * ExtraLatencyCycles +
3087                                                                 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3088                                                                 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3089                                 }
3090                         }
3091                         DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3092                                         dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3093                 }
3094         }
3095 }
3096
3097 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3098                 unsigned int TotalNumberOfActiveDPP,
3099                 unsigned int PixelChunkSizeInKByte,
3100                 unsigned int TotalNumberOfDCCActiveDPP,
3101                 unsigned int MetaChunkSize,
3102                 bool GPUVMEnable,
3103                 bool HostVMEnable,
3104                 unsigned int NumberOfActiveSurfaces,
3105                 unsigned int NumberOfDPP[],
3106                 unsigned int dpte_group_bytes[],
3107                 double HostVMInefficiencyFactor,
3108                 double HostVMMinPageSize,
3109                 unsigned int HostVMMaxNonCachedPageTableLevels)
3110 {
3111         unsigned int k;
3112         double   ret;
3113         unsigned int  HostVMDynamicLevels;
3114
3115         if (GPUVMEnable == true && HostVMEnable == true) {
3116                 if (HostVMMinPageSize < 2048)
3117                         HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3118                 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3119                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3120                 else
3121                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3122         } else {
3123                 HostVMDynamicLevels = 0;
3124         }
3125
3126         ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3127                         TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3128
3129         if (GPUVMEnable == true) {
3130                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3131                         ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3132                                         (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3133                 }
3134         }
3135         return ret;
3136 }
3137
3138 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3139                 unsigned int MaxInterDCNTileRepeaters,
3140                 double Dppclk,
3141                 double Dispclk,
3142                 double DCFClkDeepSleep,
3143                 double PixelClock,
3144                 unsigned int HTotal,
3145                 unsigned int VBlank,
3146                 unsigned int DynamicMetadataTransmittedBytes,
3147                 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3148                 unsigned int InterlaceEnable,
3149                 bool ProgressiveToInterlaceUnitInOPP,
3150
3151                 /* output */
3152                 double *TSetup,
3153                 double *Tdmbf,
3154                 double *Tdmec,
3155                 double *Tdmsks,
3156                 unsigned int *VUpdateOffsetPix,
3157                 double *VUpdateWidthPix,
3158                 double *VReadyOffsetPix)
3159 {
3160         double TotalRepeaterDelayTime;
3161
3162         TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3163         *VUpdateWidthPix  =
3164                         dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3165         *VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3166                         TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3167         *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3168         *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3169         *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3170         *Tdmec = HTotal / PixelClock;
3171
3172         if (DynamicMetadataLinesBeforeActiveRequired == 0)
3173                 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3174         else
3175                 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3176
3177         if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3178                 *Tdmsks = *Tdmsks / 2;
3179 #ifdef __DML_VBA_DEBUG__
3180         dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3181         dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3182         dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3183
3184         dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3185                         __func__, DynamicMetadataLinesBeforeActiveRequired);
3186         dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3187         dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3188         dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3189         dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3190 #endif
3191 }
3192
3193 double dml32_CalculateTWait(
3194                 unsigned int PrefetchMode,
3195                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3196                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3197                 bool DRRDisplay,
3198                 double DRAMClockChangeLatency,
3199                 double FCLKChangeLatency,
3200                 double UrgentLatency,
3201                 double SREnterPlusExitTime)
3202 {
3203         double TWait = 0.0;
3204
3205         if (PrefetchMode == 0 &&
3206                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3207                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3208                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3209                         !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3210                 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3211         } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3212                 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3213         } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3214                 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3215         } else {
3216                 TWait = UrgentLatency;
3217         }
3218
3219 #ifdef __DML_VBA_DEBUG__
3220         dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3221         dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3222 #endif
3223         return TWait;
3224 } // CalculateTWait
3225
3226 // Function: get_return_bw_mbps
3227 // Megabyte per second
3228 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3229                 const int VoltageLevel,
3230                 const bool HostVMEnable,
3231                 const double DCFCLK,
3232                 const double FabricClock,
3233                 const double DRAMSpeed)
3234 {
3235         double ReturnBW = 0.;
3236         double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3237         double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3238         double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3239         double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3240                         IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3241                         IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3242                                         soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3243         double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3244                         IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3245                         IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3246                                         soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3247
3248         if (HostVMEnable != true)
3249                 ReturnBW = PixelDataOnlyReturnBW;
3250         else
3251                 ReturnBW = PixelMixedWithVMDataReturnBW;
3252
3253 #ifdef __DML_VBA_DEBUG__
3254         dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3255         dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3256         dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3257         dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3258         dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3259         dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3260         dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3261         dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3262         dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3263         dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3264         dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3265 #endif
3266         return ReturnBW;
3267 }
3268
3269 // Function: get_return_bw_mbps_vm_only
3270 // Megabyte per second
3271 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3272                 const int VoltageLevel,
3273                 const double DCFCLK,
3274                 const double FabricClock,
3275                 const double DRAMSpeed)
3276 {
3277         double VMDataOnlyReturnBW = dml_min3(
3278                         soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3279                         FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3280                                         * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3281                         DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3282                                         * (VoltageLevel < 2 ?
3283                                                         soc->pct_ideal_dram_bw_after_urgent_strobe :
3284                                                         soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3285 #ifdef __DML_VBA_DEBUG__
3286         dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3287         dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3288         dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3289         dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3290         dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3291 #endif
3292         return VMDataOnlyReturnBW;
3293 }
3294
3295 double dml32_CalculateExtraLatency(
3296                 unsigned int RoundTripPingLatencyCycles,
3297                 unsigned int ReorderingBytes,
3298                 double DCFCLK,
3299                 unsigned int TotalNumberOfActiveDPP,
3300                 unsigned int PixelChunkSizeInKByte,
3301                 unsigned int TotalNumberOfDCCActiveDPP,
3302                 unsigned int MetaChunkSize,
3303                 double ReturnBW,
3304                 bool GPUVMEnable,
3305                 bool HostVMEnable,
3306                 unsigned int NumberOfActiveSurfaces,
3307                 unsigned int NumberOfDPP[],
3308                 unsigned int dpte_group_bytes[],
3309                 double HostVMInefficiencyFactor,
3310                 double HostVMMinPageSize,
3311                 unsigned int HostVMMaxNonCachedPageTableLevels)
3312 {
3313         double ExtraLatencyBytes;
3314         double ExtraLatency;
3315
3316         ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3317                         ReorderingBytes,
3318                         TotalNumberOfActiveDPP,
3319                         PixelChunkSizeInKByte,
3320                         TotalNumberOfDCCActiveDPP,
3321                         MetaChunkSize,
3322                         GPUVMEnable,
3323                         HostVMEnable,
3324                         NumberOfActiveSurfaces,
3325                         NumberOfDPP,
3326                         dpte_group_bytes,
3327                         HostVMInefficiencyFactor,
3328                         HostVMMinPageSize,
3329                         HostVMMaxNonCachedPageTableLevels);
3330
3331         ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3332
3333 #ifdef __DML_VBA_DEBUG__
3334         dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3335         dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3336         dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3337         dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3338         dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3339 #endif
3340
3341         return ExtraLatency;
3342 } // CalculateExtraLatency
3343
3344 bool dml32_CalculatePrefetchSchedule(
3345                 double HostVMInefficiencyFactor,
3346                 DmlPipe *myPipe,
3347                 unsigned int DSCDelay,
3348                 double DPPCLKDelaySubtotalPlusCNVCFormater,
3349                 double DPPCLKDelaySCL,
3350                 double DPPCLKDelaySCLLBOnly,
3351                 double DPPCLKDelayCNVCCursor,
3352                 double DISPCLKDelaySubtotal,
3353                 unsigned int DPP_RECOUT_WIDTH,
3354                 enum output_format_class OutputFormat,
3355                 unsigned int MaxInterDCNTileRepeaters,
3356                 unsigned int VStartup,
3357                 unsigned int MaxVStartup,
3358                 unsigned int GPUVMPageTableLevels,
3359                 bool GPUVMEnable,
3360                 bool HostVMEnable,
3361                 unsigned int HostVMMaxNonCachedPageTableLevels,
3362                 double HostVMMinPageSize,
3363                 bool DynamicMetadataEnable,
3364                 bool DynamicMetadataVMEnabled,
3365                 int DynamicMetadataLinesBeforeActiveRequired,
3366                 unsigned int DynamicMetadataTransmittedBytes,
3367                 double UrgentLatency,
3368                 double UrgentExtraLatency,
3369                 double TCalc,
3370                 unsigned int PDEAndMetaPTEBytesFrame,
3371                 unsigned int MetaRowByte,
3372                 unsigned int PixelPTEBytesPerRow,
3373                 double PrefetchSourceLinesY,
3374                 unsigned int SwathWidthY,
3375                 unsigned int VInitPreFillY,
3376                 unsigned int MaxNumSwathY,
3377                 double PrefetchSourceLinesC,
3378                 unsigned int SwathWidthC,
3379                 unsigned int VInitPreFillC,
3380                 unsigned int MaxNumSwathC,
3381                 unsigned int swath_width_luma_ub,
3382                 unsigned int swath_width_chroma_ub,
3383                 unsigned int SwathHeightY,
3384                 unsigned int SwathHeightC,
3385                 double TWait,
3386                 /* Output */
3387                 double   *DSTXAfterScaler,
3388                 double   *DSTYAfterScaler,
3389                 double *DestinationLinesForPrefetch,
3390                 double *PrefetchBandwidth,
3391                 double *DestinationLinesToRequestVMInVBlank,
3392                 double *DestinationLinesToRequestRowInVBlank,
3393                 double *VRatioPrefetchY,
3394                 double *VRatioPrefetchC,
3395                 double *RequiredPrefetchPixDataBWLuma,
3396                 double *RequiredPrefetchPixDataBWChroma,
3397                 bool   *NotEnoughTimeForDynamicMetadata,
3398                 double *Tno_bw,
3399                 double *prefetch_vmrow_bw,
3400                 double *Tdmdl_vm,
3401                 double *Tdmdl,
3402                 double *TSetup,
3403                 unsigned int   *VUpdateOffsetPix,
3404                 double   *VUpdateWidthPix,
3405                 double   *VReadyOffsetPix)
3406 {
3407         bool MyError = false;
3408         unsigned int DPPCycles, DISPCLKCycles;
3409         double DSTTotalPixelsAfterScaler;
3410         double LineTime;
3411         double dst_y_prefetch_equ;
3412         double prefetch_bw_oto;
3413         double Tvm_oto;
3414         double Tr0_oto;
3415         double Tvm_oto_lines;
3416         double Tr0_oto_lines;
3417         double dst_y_prefetch_oto;
3418         double TimeForFetchingMetaPTE = 0;
3419         double TimeForFetchingRowInVBlank = 0;
3420         double LinesToRequestPrefetchPixelData = 0;
3421         unsigned int HostVMDynamicLevelsTrips;
3422         double  trip_to_mem;
3423         double  Tvm_trips;
3424         double  Tr0_trips;
3425         double  Tvm_trips_rounded;
3426         double  Tr0_trips_rounded;
3427         double  Lsw_oto;
3428         double  Tpre_rounded;
3429         double  prefetch_bw_equ;
3430         double  Tvm_equ;
3431         double  Tr0_equ;
3432         double  Tdmbf;
3433         double  Tdmec;
3434         double  Tdmsks;
3435         double  prefetch_sw_bytes;
3436         double  bytes_pp;
3437         double  dep_bytes;
3438         unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3439         double  min_Lsw;
3440         double  Tsw_est1 = 0;
3441         double  Tsw_est3 = 0;
3442
3443         if (GPUVMEnable == true && HostVMEnable == true)
3444                 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3445         else
3446                 HostVMDynamicLevelsTrips = 0;
3447 #ifdef __DML_VBA_DEBUG__
3448         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
3449         dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
3450         dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3451         dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3452                         __func__, HostVMEnable, HostVMInefficiencyFactor);
3453 #endif
3454         dml32_CalculateVUpdateAndDynamicMetadataParameters(
3455                         MaxInterDCNTileRepeaters,
3456                         myPipe->Dppclk,
3457                         myPipe->Dispclk,
3458                         myPipe->DCFClkDeepSleep,
3459                         myPipe->PixelClock,
3460                         myPipe->HTotal,
3461                         myPipe->VBlank,
3462                         DynamicMetadataTransmittedBytes,
3463                         DynamicMetadataLinesBeforeActiveRequired,
3464                         myPipe->InterlaceEnable,
3465                         myPipe->ProgressiveToInterlaceUnitInOPP,
3466                         TSetup,
3467
3468                         /* output */
3469                         &Tdmbf,
3470                         &Tdmec,
3471                         &Tdmsks,
3472                         VUpdateOffsetPix,
3473                         VUpdateWidthPix,
3474                         VReadyOffsetPix);
3475
3476         LineTime = myPipe->HTotal / myPipe->PixelClock;
3477         trip_to_mem = UrgentLatency;
3478         Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3479
3480         if (DynamicMetadataVMEnabled == true)
3481                 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3482         else
3483                 *Tdmdl = TWait + UrgentExtraLatency;
3484
3485 #ifdef __DML_VBA_ALLOW_DELTA__
3486         if (DynamicMetadataEnable == false)
3487                 *Tdmdl = 0.0;
3488 #endif
3489
3490         if (DynamicMetadataEnable == true) {
3491                 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3492                         *NotEnoughTimeForDynamicMetadata = true;
3493 #ifdef __DML_VBA_DEBUG__
3494                         dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3495                         dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3496                                         __func__, Tdmbf);
3497                         dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3498                         dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3499                                         __func__, Tdmsks);
3500                         dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3501                                         __func__, *Tdmdl);
3502 #endif
3503                 } else {
3504                         *NotEnoughTimeForDynamicMetadata = false;
3505                 }
3506         } else {
3507                 *NotEnoughTimeForDynamicMetadata = false;
3508         }
3509
3510         *Tdmdl_vm =  (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
3511                         GPUVMEnable == true ? TWait + Tvm_trips : 0);
3512
3513         if (myPipe->ScalerEnabled)
3514                 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
3515         else
3516                 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
3517
3518         DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
3519
3520         DISPCLKCycles = DISPCLKDelaySubtotal;
3521
3522         if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3523                 return true;
3524
3525         *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3526                         myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3527
3528         *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3529                         + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3530                         + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3531                                         myPipe->HActive / 2 : 0)
3532                         + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3533
3534 #ifdef __DML_VBA_DEBUG__
3535         dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3536         dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3537         dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3538         dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3539         dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3540         dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3541         dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3542         dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3543         dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3544 #endif
3545
3546         if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3547                 *DSTYAfterScaler = 1;
3548         else
3549                 *DSTYAfterScaler = 0;
3550
3551         DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3552         *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3553         *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3554 #ifdef __DML_VBA_DEBUG__
3555         dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3556         dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3557 #endif
3558
3559         MyError = false;
3560
3561         Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3562
3563         if (GPUVMEnable == true) {
3564                 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3565                 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3566                 if (GPUVMPageTableLevels >= 3) {
3567                         *Tno_bw = UrgentExtraLatency + trip_to_mem *
3568                                         (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3569                 } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
3570                         Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3571                                         4.0 * LineTime; // VBA_ERROR
3572                         *Tno_bw = UrgentExtraLatency;
3573                 } else {
3574                         *Tno_bw = 0;
3575                 }
3576         } else if (myPipe->DCCEnable == true) {
3577                 Tvm_trips_rounded = LineTime / 4.0;
3578                 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3579                 *Tno_bw = 0;
3580         } else {
3581                 Tvm_trips_rounded = LineTime / 4.0;
3582                 Tr0_trips_rounded = LineTime / 2.0;
3583                 *Tno_bw = 0;
3584         }
3585         Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3586         Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3587
3588         if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3589                         || myPipe->SourcePixelFormat == dm_420_12) {
3590                 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3591         } else {
3592                 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3593         }
3594
3595         prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3596                         + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3597         prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3598                         prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3599
3600         min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3601         min_Lsw = dml_max(min_Lsw, 1.0);
3602         Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3603
3604         if (GPUVMEnable == true) {
3605                 Tvm_oto = dml_max3(
3606                                 Tvm_trips,
3607                                 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3608                                 LineTime / 4.0);
3609         } else
3610                 Tvm_oto = LineTime / 4.0;
3611
3612         if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3613                 Tr0_oto = dml_max4(
3614                                 Tr0_trips,
3615                                 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3616                                 (LineTime - Tvm_oto)/2.0,
3617                                 LineTime / 4.0);
3618 #ifdef __DML_VBA_DEBUG__
3619                 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3620                                 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3621                 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3622                 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3623                 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3624 #endif
3625         } else
3626                 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3627
3628         Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3629         Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3630         dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3631
3632         dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3633                         (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3634
3635 #ifdef __DML_VBA_DEBUG__
3636         dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3637         dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3638         dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3639         dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3640         dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3641         dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3642         dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3643         dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3644         dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3645         dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3646         dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3647         dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3648         dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3649         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3650         dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3651         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3652         dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3653         dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3654         dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3655         dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3656         dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3657         dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3658         dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3659         dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3660         dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3661         dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3662         dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3663 #endif
3664
3665         dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3666         Tpre_rounded = dst_y_prefetch_equ * LineTime;
3667 #ifdef __DML_VBA_DEBUG__
3668         dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3669         dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3670         dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3671         dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3672                         __func__, VStartup * LineTime);
3673         dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3674         dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3675         dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3676         dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3677         dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3678         dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3679         dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3680                         __func__, *DSTYAfterScaler);
3681 #endif
3682         dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3683                         MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3684
3685         if (prefetch_sw_bytes < dep_bytes)
3686                 prefetch_sw_bytes = 2 * dep_bytes;
3687
3688         *PrefetchBandwidth = 0;
3689         *DestinationLinesToRequestVMInVBlank = 0;
3690         *DestinationLinesToRequestRowInVBlank = 0;
3691         *VRatioPrefetchY = 0;
3692         *VRatioPrefetchC = 0;
3693         *RequiredPrefetchPixDataBWLuma = 0;
3694         if (dst_y_prefetch_equ > 1) {
3695                 double PrefetchBandwidth1;
3696                 double PrefetchBandwidth2;
3697                 double PrefetchBandwidth3;
3698                 double PrefetchBandwidth4;
3699
3700                 if (Tpre_rounded - *Tno_bw > 0) {
3701                         PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3702                                         + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3703                                         + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3704                         Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3705                 } else
3706                         PrefetchBandwidth1 = 0;
3707
3708                 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3709                                 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3710                         PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3711                                         + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3712                                         / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3713                 }
3714
3715                 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3716                         PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3717                         (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3718                 else
3719                         PrefetchBandwidth2 = 0;
3720
3721                 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3722                         PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3723                                         + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3724                         Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3725                 } else
3726                         PrefetchBandwidth3 = 0;
3727
3728
3729                 if (VStartup == MaxVStartup &&
3730                                 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3731                                 LineTime - Tvm_trips_rounded > 0) {
3732                         PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3733                                         / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3734                 }
3735
3736                 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3737                         PrefetchBandwidth4 = prefetch_sw_bytes /
3738                                         (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3739                 } else {
3740                         PrefetchBandwidth4 = 0;
3741                 }
3742
3743 #ifdef __DML_VBA_DEBUG__
3744                 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3745                 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3746                 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3747                 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3748                 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3749                 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3750                 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3751                 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3752                 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3753 #endif
3754                 {
3755                         bool Case1OK;
3756                         bool Case2OK;
3757                         bool Case3OK;
3758
3759                         if (PrefetchBandwidth1 > 0) {
3760                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3761                                                 >= Tvm_trips_rounded
3762                                                 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3763                                                                 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3764                                         Case1OK = true;
3765                                 } else {
3766                                         Case1OK = false;
3767                                 }
3768                         } else {
3769                                 Case1OK = false;
3770                         }
3771
3772                         if (PrefetchBandwidth2 > 0) {
3773                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3774                                                 >= Tvm_trips_rounded
3775                                                 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3776                                                 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3777                                         Case2OK = true;
3778                                 } else {
3779                                         Case2OK = false;
3780                                 }
3781                         } else {
3782                                 Case2OK = false;
3783                         }
3784
3785                         if (PrefetchBandwidth3 > 0) {
3786                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3787                                                 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3788                                                                 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3789                                                                 Tr0_trips_rounded) {
3790                                         Case3OK = true;
3791                                 } else {
3792                                         Case3OK = false;
3793                                 }
3794                         } else {
3795                                 Case3OK = false;
3796                         }
3797
3798                         if (Case1OK)
3799                                 prefetch_bw_equ = PrefetchBandwidth1;
3800                         else if (Case2OK)
3801                                 prefetch_bw_equ = PrefetchBandwidth2;
3802                         else if (Case3OK)
3803                                 prefetch_bw_equ = PrefetchBandwidth3;
3804                         else
3805                                 prefetch_bw_equ = PrefetchBandwidth4;
3806
3807 #ifdef __DML_VBA_DEBUG__
3808                         dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3809                         dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3810                         dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3811                         dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3812 #endif
3813
3814                         if (prefetch_bw_equ > 0) {
3815                                 if (GPUVMEnable == true) {
3816                                         Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3817                                                         HostVMInefficiencyFactor / prefetch_bw_equ,
3818                                                         Tvm_trips, LineTime / 4);
3819                                 } else {
3820                                         Tvm_equ = LineTime / 4;
3821                                 }
3822
3823                                 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3824                                         Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3825                                                         HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3826                                                         (LineTime - Tvm_equ) / 2, LineTime / 4);
3827                                 } else {
3828                                         Tr0_equ = (LineTime - Tvm_equ) / 2;
3829                                 }
3830                         } else {
3831                                 Tvm_equ = 0;
3832                                 Tr0_equ = 0;
3833 #ifdef __DML_VBA_DEBUG__
3834                                 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3835 #endif
3836                         }
3837                 }
3838
3839                 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3840                         *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3841                         TimeForFetchingMetaPTE = Tvm_oto;
3842                         TimeForFetchingRowInVBlank = Tr0_oto;
3843                         *PrefetchBandwidth = prefetch_bw_oto;
3844                 } else {
3845                         *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3846                         TimeForFetchingMetaPTE = Tvm_equ;
3847                         TimeForFetchingRowInVBlank = Tr0_equ;
3848                         *PrefetchBandwidth = prefetch_bw_equ;
3849                 }
3850
3851                 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3852
3853                 *DestinationLinesToRequestRowInVBlank =
3854                                 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3855
3856                 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3857                                 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3858
3859 #ifdef __DML_VBA_DEBUG__
3860                 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3861                 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3862                                 __func__, *DestinationLinesToRequestVMInVBlank);
3863                 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3864                 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3865                 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3866                                 __func__, *DestinationLinesToRequestRowInVBlank);
3867                 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3868                 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3869 #endif
3870
3871                 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3872                         *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3873                         *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3874 #ifdef __DML_VBA_DEBUG__
3875                         dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3876                         dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3877                         dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3878 #endif
3879                         if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3880                                 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3881                                         *VRatioPrefetchY =
3882                                                         dml_max((double) PrefetchSourceLinesY /
3883                                                                         LinesToRequestPrefetchPixelData,
3884                                                                         (double) MaxNumSwathY * SwathHeightY /
3885                                                                         (LinesToRequestPrefetchPixelData -
3886                                                                         (VInitPreFillY - 3.0) / 2.0));
3887                                         *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3888                                 } else {
3889                                         MyError = true;
3890                                         *VRatioPrefetchY = 0;
3891                                 }
3892 #ifdef __DML_VBA_DEBUG__
3893                                 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3894                                 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3895                                 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3896 #endif
3897                         }
3898
3899                         *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3900                         *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3901
3902 #ifdef __DML_VBA_DEBUG__
3903                         dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3904                         dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3905                         dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3906 #endif
3907                         if ((SwathHeightC > 4)) {
3908                                 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3909                                         *VRatioPrefetchC =
3910                                                 dml_max(*VRatioPrefetchC,
3911                                                         (double) MaxNumSwathC * SwathHeightC /
3912                                                         (LinesToRequestPrefetchPixelData -
3913                                                         (VInitPreFillC - 3.0) / 2.0));
3914                                         *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3915                                 } else {
3916                                         MyError = true;
3917                                         *VRatioPrefetchC = 0;
3918                                 }
3919 #ifdef __DML_VBA_DEBUG__
3920                                 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3921                                 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3922                                 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3923 #endif
3924                         }
3925
3926                         *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3927                                         / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3928                                         / LineTime;
3929
3930 #ifdef __DML_VBA_DEBUG__
3931                         dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3932                         dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3933                         dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3934                         dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3935                                         __func__, *RequiredPrefetchPixDataBWLuma);
3936 #endif
3937                         *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3938                                         LinesToRequestPrefetchPixelData
3939                                         * myPipe->BytePerPixelC
3940                                         * swath_width_chroma_ub / LineTime;
3941                 } else {
3942                         MyError = true;
3943 #ifdef __DML_VBA_DEBUG__
3944                         dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3945                                         __func__, LinesToRequestPrefetchPixelData);
3946 #endif
3947                         *VRatioPrefetchY = 0;
3948                         *VRatioPrefetchC = 0;
3949                         *RequiredPrefetchPixDataBWLuma = 0;
3950                         *RequiredPrefetchPixDataBWChroma = 0;
3951                 }
3952 #ifdef __DML_VBA_DEBUG__
3953                 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3954                         (double)LinesToRequestPrefetchPixelData * LineTime +
3955                         2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
3956                 dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
3957                 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
3958                         (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
3959                 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
3960                 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
3961                         TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
3962                         ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
3963                 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
3964                                 PixelPTEBytesPerRow);
3965 #endif
3966         } else {
3967                 MyError = true;
3968 #ifdef __DML_VBA_DEBUG__
3969                 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
3970                                 __func__, dst_y_prefetch_equ);
3971 #endif
3972         }
3973
3974         {
3975                 double prefetch_vm_bw;
3976                 double prefetch_row_bw;
3977
3978                 if (PDEAndMetaPTEBytesFrame == 0) {
3979                         prefetch_vm_bw = 0;
3980                 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
3981 #ifdef __DML_VBA_DEBUG__
3982                         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3983                         dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3984                         dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3985                                         __func__, *DestinationLinesToRequestVMInVBlank);
3986                         dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3987 #endif
3988                         prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
3989                                         (*DestinationLinesToRequestVMInVBlank * LineTime);
3990 #ifdef __DML_VBA_DEBUG__
3991                         dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
3992 #endif
3993                 } else {
3994                         prefetch_vm_bw = 0;
3995                         MyError = true;
3996 #ifdef __DML_VBA_DEBUG__
3997                         dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
3998                                         __func__, *DestinationLinesToRequestVMInVBlank);
3999 #endif
4000                 }
4001
4002                 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4003                         prefetch_row_bw = 0;
4004                 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
4005                         prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4006                                         (*DestinationLinesToRequestRowInVBlank * LineTime);
4007
4008 #ifdef __DML_VBA_DEBUG__
4009                         dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4010                         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4011                         dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4012                                         __func__, *DestinationLinesToRequestRowInVBlank);
4013                         dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4014 #endif
4015                 } else {
4016                         prefetch_row_bw = 0;
4017                         MyError = true;
4018 #ifdef __DML_VBA_DEBUG__
4019                         dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4020                                         __func__, *DestinationLinesToRequestRowInVBlank);
4021 #endif
4022                 }
4023
4024                 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4025         }
4026
4027         if (MyError) {
4028                 *PrefetchBandwidth = 0;
4029                 TimeForFetchingMetaPTE = 0;
4030                 TimeForFetchingRowInVBlank = 0;
4031                 *DestinationLinesToRequestVMInVBlank = 0;
4032                 *DestinationLinesToRequestRowInVBlank = 0;
4033                 *DestinationLinesForPrefetch = 0;
4034                 LinesToRequestPrefetchPixelData = 0;
4035                 *VRatioPrefetchY = 0;
4036                 *VRatioPrefetchC = 0;
4037                 *RequiredPrefetchPixDataBWLuma = 0;
4038                 *RequiredPrefetchPixDataBWChroma = 0;
4039         }
4040
4041         return MyError;
4042 } // CalculatePrefetchSchedule
4043
4044 void dml32_CalculateFlipSchedule(
4045                 double HostVMInefficiencyFactor,
4046                 double UrgentExtraLatency,
4047                 double UrgentLatency,
4048                 unsigned int GPUVMMaxPageTableLevels,
4049                 bool HostVMEnable,
4050                 unsigned int HostVMMaxNonCachedPageTableLevels,
4051                 bool GPUVMEnable,
4052                 double HostVMMinPageSize,
4053                 double PDEAndMetaPTEBytesPerFrame,
4054                 double MetaRowBytes,
4055                 double DPTEBytesPerRow,
4056                 double BandwidthAvailableForImmediateFlip,
4057                 unsigned int TotImmediateFlipBytes,
4058                 enum source_format_class SourcePixelFormat,
4059                 double LineTime,
4060                 double VRatio,
4061                 double VRatioChroma,
4062                 double Tno_bw,
4063                 bool DCCEnable,
4064                 unsigned int dpte_row_height,
4065                 unsigned int meta_row_height,
4066                 unsigned int dpte_row_height_chroma,
4067                 unsigned int meta_row_height_chroma,
4068                 bool    use_one_row_for_frame_flip,
4069
4070                 /* Output */
4071                 double *DestinationLinesToRequestVMInImmediateFlip,
4072                 double *DestinationLinesToRequestRowInImmediateFlip,
4073                 double *final_flip_bw,
4074                 bool *ImmediateFlipSupportedForPipe)
4075 {
4076         double min_row_time = 0.0;
4077         unsigned int HostVMDynamicLevelsTrips;
4078         double TimeForFetchingMetaPTEImmediateFlip;
4079         double TimeForFetchingRowInVBlankImmediateFlip;
4080         double ImmediateFlipBW;
4081
4082         if (GPUVMEnable == true && HostVMEnable == true)
4083                 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4084         else
4085                 HostVMDynamicLevelsTrips = 0;
4086
4087 #ifdef __DML_VBA_DEBUG__
4088         dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4089         dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4090 #endif
4091
4092         if (TotImmediateFlipBytes > 0) {
4093                 if (use_one_row_for_frame_flip) {
4094                         ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4095                                         BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4096                 } else {
4097                         ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4098                                         BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4099                 }
4100                 if (GPUVMEnable == true) {
4101                         TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4102                                         HostVMInefficiencyFactor / ImmediateFlipBW,
4103                                         UrgentExtraLatency + UrgentLatency *
4104                                         (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4105                                         LineTime / 4.0);
4106                 } else {
4107                         TimeForFetchingMetaPTEImmediateFlip = 0;
4108                 }
4109                 if ((GPUVMEnable == true || DCCEnable == true)) {
4110                         TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4111                                         (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4112                                         UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4113                 } else {
4114                         TimeForFetchingRowInVBlankImmediateFlip = 0;
4115                 }
4116
4117                 *DestinationLinesToRequestVMInImmediateFlip =
4118                                 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4119                 *DestinationLinesToRequestRowInImmediateFlip =
4120                                 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4121
4122                 if (GPUVMEnable == true) {
4123                         *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4124                                         (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4125                                         (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4126                                         (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4127                 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4128                         *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4129                                         (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4130                 } else {
4131                         *final_flip_bw = 0;
4132                 }
4133         } else {
4134                 TimeForFetchingMetaPTEImmediateFlip = 0;
4135                 TimeForFetchingRowInVBlankImmediateFlip = 0;
4136                 *DestinationLinesToRequestVMInImmediateFlip = 0;
4137                 *DestinationLinesToRequestRowInImmediateFlip = 0;
4138                 *final_flip_bw = 0;
4139         }
4140
4141         if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4142                 if (GPUVMEnable == true && DCCEnable != true) {
4143                         min_row_time = dml_min(dpte_row_height *
4144                                         LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4145                 } else if (GPUVMEnable != true && DCCEnable == true) {
4146                         min_row_time = dml_min(meta_row_height *
4147                                         LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4148                 } else {
4149                         min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4150                                         LineTime / VRatio, dpte_row_height_chroma * LineTime /
4151                                         VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4152                 }
4153         } else {
4154                 if (GPUVMEnable == true && DCCEnable != true) {
4155                         min_row_time = dpte_row_height * LineTime / VRatio;
4156                 } else if (GPUVMEnable != true && DCCEnable == true) {
4157                         min_row_time = meta_row_height * LineTime / VRatio;
4158                 } else {
4159                         min_row_time =
4160                                 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4161                 }
4162         }
4163
4164         if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4165                         || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4166                                         > min_row_time) {
4167                 *ImmediateFlipSupportedForPipe = false;
4168         } else {
4169                 *ImmediateFlipSupportedForPipe = true;
4170         }
4171
4172 #ifdef __DML_VBA_DEBUG__
4173         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4174         dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4175         dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4176                         __func__, *DestinationLinesToRequestVMInImmediateFlip);
4177         dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4178                         __func__, *DestinationLinesToRequestRowInImmediateFlip);
4179         dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4180         dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4181                         __func__, TimeForFetchingRowInVBlankImmediateFlip);
4182         dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4183         dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4184 #endif
4185 } // CalculateFlipSchedule
4186
4187 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4188                 struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
4189                 bool USRRetrainingRequiredFinal,
4190                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
4191                 unsigned int PrefetchMode,
4192                 unsigned int NumberOfActiveSurfaces,
4193                 unsigned int MaxLineBufferLines,
4194                 unsigned int LineBufferSize,
4195                 unsigned int WritebackInterfaceBufferSize,
4196                 double DCFCLK,
4197                 double ReturnBW,
4198                 bool SynchronizeTimingsFinal,
4199                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
4200                 bool DRRDisplay[],
4201                 unsigned int dpte_group_bytes[],
4202                 unsigned int meta_row_height[],
4203                 unsigned int meta_row_height_chroma[],
4204                 SOCParametersList mmSOCParameters,
4205                 unsigned int WritebackChunkSize,
4206                 double SOCCLK,
4207                 double DCFClkDeepSleep,
4208                 unsigned int DETBufferSizeY[],
4209                 unsigned int DETBufferSizeC[],
4210                 unsigned int SwathHeightY[],
4211                 unsigned int SwathHeightC[],
4212                 unsigned int LBBitPerPixel[],
4213                 double SwathWidthY[],
4214                 double SwathWidthC[],
4215                 double HRatio[],
4216                 double HRatioChroma[],
4217                 unsigned int VTaps[],
4218                 unsigned int VTapsChroma[],
4219                 double VRatio[],
4220                 double VRatioChroma[],
4221                 unsigned int HTotal[],
4222                 unsigned int VTotal[],
4223                 unsigned int VActive[],
4224                 double PixelClock[],
4225                 unsigned int BlendingAndTiming[],
4226                 unsigned int DPPPerSurface[],
4227                 double BytePerPixelDETY[],
4228                 double BytePerPixelDETC[],
4229                 double DSTXAfterScaler[],
4230                 double DSTYAfterScaler[],
4231                 bool WritebackEnable[],
4232                 enum source_format_class WritebackPixelFormat[],
4233                 double WritebackDestinationWidth[],
4234                 double WritebackDestinationHeight[],
4235                 double WritebackSourceHeight[],
4236                 bool UnboundedRequestEnabled,
4237                 unsigned int CompressedBufferSizeInkByte,
4238
4239                 /* Output */
4240                 Watermarks *Watermark,
4241                 enum clock_change_support *DRAMClockChangeSupport,
4242                 double MaxActiveDRAMClockChangeLatencySupported[],
4243                 unsigned int SubViewportLinesNeededInMALL[],
4244                 enum dm_fclock_change_support *FCLKChangeSupport,
4245                 double *MinActiveFCLKChangeLatencySupported,
4246                 bool *USRRetrainingSupport,
4247                 double ActiveDRAMClockChangeLatencyMargin[])
4248 {
4249         unsigned int i, j, k;
4250
4251         st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0;
4252         st_vars->DRAMClockChangeSupportNumber = 0;
4253         st_vars->DRAMClockChangeMethod = 0;
4254         st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4255         st_vars->MinActiveFCLKChangeMargin = 0.;
4256         st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4257         st_vars->TotalPixelBW = 0.0;
4258         st_vars->TotalActiveWriteback = 0;
4259
4260         Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4261         Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4262                         + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4263         Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark;
4264         Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark;
4265         Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4266                         + 10 / DCFClkDeepSleep;
4267         Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4268                         + 10 / DCFClkDeepSleep;
4269         Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4270                         + 10 / DCFClkDeepSleep;
4271         Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4272                         + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4273
4274 #ifdef __DML_VBA_DEBUG__
4275         dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4276         dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4277         dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4278         dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark);
4279         dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark);
4280         dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark);
4281         dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark);
4282         dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark);
4283         dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark);
4284         dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark);
4285         dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4286                         __func__, Watermark->Z8StutterEnterPlusExitWatermark);
4287 #endif
4288
4289
4290         st_vars->TotalActiveWriteback = 0;
4291         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4292                 if (WritebackEnable[k] == true)
4293                         st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1;
4294         }
4295
4296         if (st_vars->TotalActiveWriteback <= 1) {
4297                 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4298         } else {
4299                 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4300                                 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4301         }
4302         if (USRRetrainingRequiredFinal)
4303                 Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
4304                                 + mmSOCParameters.USRRetrainingLatency;
4305
4306         if (st_vars->TotalActiveWriteback <= 1) {
4307                 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4308                                 + mmSOCParameters.WritebackLatency;
4309                 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4310                                 + mmSOCParameters.WritebackLatency;
4311         } else {
4312                 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4313                                 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4314                 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4315                                 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK;
4316         }
4317
4318         if (USRRetrainingRequiredFinal)
4319                 Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark
4320                                 + mmSOCParameters.USRRetrainingLatency;
4321
4322         if (USRRetrainingRequiredFinal)
4323                 Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark
4324                                 + mmSOCParameters.USRRetrainingLatency;
4325
4326 #ifdef __DML_VBA_DEBUG__
4327         dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4328                         __func__, Watermark->WritebackDRAMClockChangeWatermark);
4329         dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark);
4330         dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark);
4331         dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal);
4332         dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4333 #endif
4334
4335         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4336                 st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
4337                                 SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
4338         }
4339
4340         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4341
4342                 st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
4343                 st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
4344
4345
4346 #ifdef __DML_VBA_DEBUG__
4347                 dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines);
4348                 dml_print("DML::%s: k=%d, LineBufferSize     = %d\n", __func__, k, LineBufferSize);
4349                 dml_print("DML::%s: k=%d, LBBitPerPixel      = %d\n", __func__, k, LBBitPerPixel[k]);
4350                 dml_print("DML::%s: k=%d, HRatio             = %f\n", __func__, k, HRatio[k]);
4351                 dml_print("DML::%s: k=%d, VTaps              = %d\n", __func__, k, VTaps[k]);
4352 #endif
4353
4354                 st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
4355                 st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
4356                 st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k];
4357
4358                 if (UnboundedRequestEnabled) {
4359                         st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY
4360                                         + CompressedBufferSizeInkByte * 1024
4361                                                         * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
4362                                                         / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW;
4363                 }
4364
4365                 st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4366                 st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]);
4367                 st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
4368
4369                 st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY
4370                                 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
4371
4372                 if (NumberOfActiveSurfaces > 1) {
4373                         st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY
4374                                         - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
4375                                                         / PixelClock[k] / VRatio[k];
4376                 }
4377
4378                 if (BytePerPixelDETC[k] > 0) {
4379                         st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4380                         st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]);
4381                         st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
4382                                         / VRatioChroma[k];
4383                         st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC
4384                                         - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
4385                                                         / PixelClock[k];
4386                         if (NumberOfActiveSurfaces > 1) {
4387                                 st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC
4388                                                 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
4389                                                                 / PixelClock[k] / VRatioChroma[k];
4390                         }
4391                         st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY,
4392                                         st_vars->ActiveClockChangeLatencyHidingC);
4393                 } else {
4394                         st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY;
4395                 }
4396
4397                 ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4398                                 - Watermark->DRAMClockChangeWatermark;
4399                 st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4400                                 - Watermark->FCLKChangeWatermark;
4401                 st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
4402
4403                 if (WritebackEnable[k]) {
4404                         st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
4405                                         / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
4406                                                         / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
4407                         if (WritebackPixelFormat[k] == dm_444_64)
4408                                 st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2;
4409
4410                         st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding
4411                                         - Watermark->WritebackDRAMClockChangeWatermark;
4412
4413                         st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding
4414                                         - Watermark->WritebackFCLKChangeWatermark;
4415
4416                         ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4417                                         st_vars->WritebackFCLKChangeLatencyMargin);
4418                         st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k],
4419                                         st_vars->WritebackDRAMClockChangeLatencyMargin);
4420                 }
4421                 MaxActiveDRAMClockChangeLatencySupported[k] =
4422                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4423                                                 0 :
4424                                                 (ActiveDRAMClockChangeLatencyMargin[k]
4425                                                                 + mmSOCParameters.DRAMClockChangeLatency);
4426         }
4427
4428         for (i = 0; i < NumberOfActiveSurfaces; ++i) {
4429                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
4430                         if (i == j ||
4431                                         (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) ||
4432                                         (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) ||
4433                                         (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) ||
4434                                         (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] &&
4435                                         HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
4436                                         VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4437                                         (DRRDisplay[i] || DRRDisplay[j]))) {
4438                                 st_vars->SynchronizedSurfaces[i][j] = true;
4439                         } else {
4440                                 st_vars->SynchronizedSurfaces[i][j] = false;
4441                         }
4442                 }
4443         }
4444
4445         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4446                 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4447                                 (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4448                                 st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) {
4449                         st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4450                         st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k];
4451                         st_vars->SurfaceWithMinActiveFCLKChangeMargin = k;
4452                 }
4453         }
4454
4455         *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4456
4457         st_vars->SameTimingForFCLKChange = true;
4458         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4459                 if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) {
4460                         if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4461                                         (st_vars->SameTimingForFCLKChange ||
4462                                         st_vars->ActiveFCLKChangeLatencyMargin[k] <
4463                                         st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4464                                 st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k];
4465                         }
4466                         st_vars->SameTimingForFCLKChange = false;
4467                 }
4468         }
4469
4470         if (st_vars->MinActiveFCLKChangeMargin > 0) {
4471                 *FCLKChangeSupport = dm_fclock_change_vactive;
4472         } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4473                         (PrefetchMode <= 1)) {
4474                 *FCLKChangeSupport = dm_fclock_change_vblank;
4475         } else {
4476                 *FCLKChangeSupport = dm_fclock_change_unsupported;
4477         }
4478
4479         *USRRetrainingSupport = true;
4480         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4481                 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4482                                 (st_vars->USRRetrainingLatencyMargin[k] < 0)) {
4483                         *USRRetrainingSupport = false;
4484                 }
4485         }
4486
4487         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4488                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4489                                 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4490                                 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4491                                 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4492                         if (PrefetchMode > 0) {
4493                                 st_vars->DRAMClockChangeSupportNumber = 2;
4494                         } else if (st_vars->DRAMClockChangeSupportNumber == 0) {
4495                                 st_vars->DRAMClockChangeSupportNumber = 1;
4496                                 st_vars->LastSurfaceWithoutMargin = k;
4497                         } else if (st_vars->DRAMClockChangeSupportNumber == 1 &&
4498                                         !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) {
4499                                 st_vars->DRAMClockChangeSupportNumber = 2;
4500                         }
4501                 }
4502         }
4503
4504         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4505                 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4506                         st_vars->DRAMClockChangeMethod = 1;
4507                 else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4508                         st_vars->DRAMClockChangeMethod = 2;
4509         }
4510
4511         if (st_vars->DRAMClockChangeMethod == 0) {
4512                 if (st_vars->DRAMClockChangeSupportNumber == 0)
4513                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4514                 else if (st_vars->DRAMClockChangeSupportNumber == 1)
4515                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4516                 else
4517                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4518         } else if (st_vars->DRAMClockChangeMethod == 1) {
4519                 if (st_vars->DRAMClockChangeSupportNumber == 0)
4520                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4521                 else if (st_vars->DRAMClockChangeSupportNumber == 1)
4522                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4523                 else
4524                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4525         } else {
4526                 if (st_vars->DRAMClockChangeSupportNumber == 0)
4527                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4528                 else if (st_vars->DRAMClockChangeSupportNumber == 1)
4529                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4530                 else
4531                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4532         }
4533
4534         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4535                 unsigned int dst_y_pstate;
4536                 unsigned int src_y_pstate_l;
4537                 unsigned int src_y_pstate_c;
4538                 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4539
4540                 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
4541                 src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
4542                 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k];
4543                 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
4544
4545 #ifdef __DML_VBA_DEBUG__
4546 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4547 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4548 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4549 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4550 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]);
4551 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4552 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4553 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4554 dml_print("DML::%s: k=%d, meta_row_height   = %d\n", __func__, k, meta_row_height[k]);
4555 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4556 #endif
4557                 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4558
4559                 if (BytePerPixelDETC[k] > 0) {
4560                         src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
4561                         src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k];
4562                         sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
4563                         SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4564
4565 #ifdef __DML_VBA_DEBUG__
4566 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4567 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4568 dml_print("DML::%s: k=%d, meta_row_height_chroma    = %d\n", __func__, k, meta_row_height_chroma[k]);
4569 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4570 #endif
4571                 }
4572         }
4573 #ifdef __DML_VBA_DEBUG__
4574         dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4575         dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4576         dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4577                         __func__, *MinActiveFCLKChangeLatencySupported);
4578         dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4579 #endif
4580 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4581
4582 double dml32_CalculateWriteBackDISPCLK(
4583                 enum source_format_class WritebackPixelFormat,
4584                 double PixelClock,
4585                 double WritebackHRatio,
4586                 double WritebackVRatio,
4587                 unsigned int WritebackHTaps,
4588                 unsigned int WritebackVTaps,
4589                 unsigned int   WritebackSourceWidth,
4590                 unsigned int   WritebackDestinationWidth,
4591                 unsigned int HTotal,
4592                 unsigned int WritebackLineBufferSize,
4593                 double DISPCLKDPPCLKVCOSpeed)
4594 {
4595         double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4596
4597         DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4598         DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4599         DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4600                         WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4601         return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4602 }
4603
4604 void dml32_CalculateMinAndMaxPrefetchMode(
4605                 enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4606                 unsigned int             *MinPrefetchMode,
4607                 unsigned int             *MaxPrefetchMode)
4608 {
4609         if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4610                 *MinPrefetchMode = 3;
4611                 *MaxPrefetchMode = 3;
4612         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4613                 *MinPrefetchMode = 2;
4614                 *MaxPrefetchMode = 2;
4615         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4616                 *MinPrefetchMode = 1;
4617                 *MaxPrefetchMode = 1;
4618         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4619                 *MinPrefetchMode = 0;
4620                 *MaxPrefetchMode = 0;
4621         } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4622                         dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4623                 *MinPrefetchMode = 0;
4624                 *MaxPrefetchMode = 3;
4625         } else {
4626                 *MinPrefetchMode = 0;
4627                 *MaxPrefetchMode = 3;
4628         }
4629 } // CalculateMinAndMaxPrefetchMode
4630
4631 void dml32_CalculatePixelDeliveryTimes(
4632                 unsigned int             NumberOfActiveSurfaces,
4633                 double              VRatio[],
4634                 double              VRatioChroma[],
4635                 double              VRatioPrefetchY[],
4636                 double              VRatioPrefetchC[],
4637                 unsigned int             swath_width_luma_ub[],
4638                 unsigned int             swath_width_chroma_ub[],
4639                 unsigned int             DPPPerSurface[],
4640                 double              HRatio[],
4641                 double              HRatioChroma[],
4642                 double              PixelClock[],
4643                 double              PSCL_THROUGHPUT[],
4644                 double              PSCL_THROUGHPUT_CHROMA[],
4645                 double              Dppclk[],
4646                 unsigned int             BytePerPixelC[],
4647                 enum dm_rotation_angle   SourceRotation[],
4648                 unsigned int             NumberOfCursors[],
4649                 unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4650                 unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4651                 unsigned int             BlockWidth256BytesY[],
4652                 unsigned int             BlockHeight256BytesY[],
4653                 unsigned int             BlockWidth256BytesC[],
4654                 unsigned int             BlockHeight256BytesC[],
4655
4656                 /* Output */
4657                 double              DisplayPipeLineDeliveryTimeLuma[],
4658                 double              DisplayPipeLineDeliveryTimeChroma[],
4659                 double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4660                 double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4661                 double              DisplayPipeRequestDeliveryTimeLuma[],
4662                 double              DisplayPipeRequestDeliveryTimeChroma[],
4663                 double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4664                 double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4665                 double              CursorRequestDeliveryTime[],
4666                 double              CursorRequestDeliveryTimePrefetch[])
4667 {
4668         double   req_per_swath_ub;
4669         unsigned int k;
4670
4671         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4672
4673 #ifdef __DML_VBA_DEBUG__
4674                 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4675                 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4676                 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4677                 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4678                 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4679                 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4680                 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4681                 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4682                 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4683                 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4684                 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4685 #endif
4686
4687                 if (VRatio[k] <= 1) {
4688                         DisplayPipeLineDeliveryTimeLuma[k] =
4689                                         swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4690                 } else {
4691                         DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4692                 }
4693
4694                 if (BytePerPixelC[k] == 0) {
4695                         DisplayPipeLineDeliveryTimeChroma[k] = 0;
4696                 } else {
4697                         if (VRatioChroma[k] <= 1) {
4698                                 DisplayPipeLineDeliveryTimeChroma[k] =
4699                                         swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4700                         } else {
4701                                 DisplayPipeLineDeliveryTimeChroma[k] =
4702                                         swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4703                         }
4704                 }
4705
4706                 if (VRatioPrefetchY[k] <= 1) {
4707                         DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4708                                         swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4709                 } else {
4710                         DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4711                                         swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4712                 }
4713
4714                 if (BytePerPixelC[k] == 0) {
4715                         DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4716                 } else {
4717                         if (VRatioPrefetchC[k] <= 1) {
4718                                 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4719                                                 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4720                         } else {
4721                                 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4722                                                 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4723                         }
4724                 }
4725 #ifdef __DML_VBA_DEBUG__
4726                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4727                                 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4728                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4729                                 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4730                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4731                                 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4732                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4733                                 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4734 #endif
4735         }
4736
4737         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4738                 if (!IsVertical(SourceRotation[k]))
4739                         req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4740                 else
4741                         req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4742 #ifdef __DML_VBA_DEBUG__
4743                 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4744 #endif
4745
4746                 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4747                 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4748                                 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4749                 if (BytePerPixelC[k] == 0) {
4750                         DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4751                         DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4752                 } else {
4753                         if (!IsVertical(SourceRotation[k]))
4754                                 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4755                         else
4756                                 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4757 #ifdef __DML_VBA_DEBUG__
4758                         dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4759 #endif
4760                         DisplayPipeRequestDeliveryTimeChroma[k] =
4761                                         DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4762                         DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4763                                         DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4764                 }
4765 #ifdef __DML_VBA_DEBUG__
4766                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4767                                 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4768                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4769                                 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4770                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4771                                 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4772                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4773                                 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4774 #endif
4775         }
4776
4777         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4778                 unsigned int cursor_req_per_width;
4779
4780                 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4781                                 256.0 / 8.0, 1.0);
4782                 if (NumberOfCursors[k] > 0) {
4783                         if (VRatio[k] <= 1) {
4784                                 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4785                                                 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4786                         } else {
4787                                 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4788                                                 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4789                         }
4790                         if (VRatioPrefetchY[k] <= 1) {
4791                                 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4792                                                 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4793                         } else {
4794                                 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4795                                                 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4796                         }
4797                 } else {
4798                         CursorRequestDeliveryTime[k] = 0;
4799                         CursorRequestDeliveryTimePrefetch[k] = 0;
4800                 }
4801 #ifdef __DML_VBA_DEBUG__
4802                 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4803                                 __func__, k, NumberOfCursors[k]);
4804                 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4805                                 __func__, k, CursorRequestDeliveryTime[k]);
4806                 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4807                                 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4808 #endif
4809         }
4810 } // CalculatePixelDeliveryTimes
4811
4812 void dml32_CalculateMetaAndPTETimes(
4813                 bool use_one_row_for_frame[],
4814                 unsigned int NumberOfActiveSurfaces,
4815                 bool GPUVMEnable,
4816                 unsigned int MetaChunkSize,
4817                 unsigned int MinMetaChunkSizeBytes,
4818                 unsigned int    HTotal[],
4819                 double  VRatio[],
4820                 double  VRatioChroma[],
4821                 double  DestinationLinesToRequestRowInVBlank[],
4822                 double  DestinationLinesToRequestRowInImmediateFlip[],
4823                 bool DCCEnable[],
4824                 double  PixelClock[],
4825                 unsigned int BytePerPixelY[],
4826                 unsigned int BytePerPixelC[],
4827                 enum dm_rotation_angle SourceRotation[],
4828                 unsigned int dpte_row_height[],
4829                 unsigned int dpte_row_height_chroma[],
4830                 unsigned int meta_row_width[],
4831                 unsigned int meta_row_width_chroma[],
4832                 unsigned int meta_row_height[],
4833                 unsigned int meta_row_height_chroma[],
4834                 unsigned int meta_req_width[],
4835                 unsigned int meta_req_width_chroma[],
4836                 unsigned int meta_req_height[],
4837                 unsigned int meta_req_height_chroma[],
4838                 unsigned int dpte_group_bytes[],
4839                 unsigned int    PTERequestSizeY[],
4840                 unsigned int    PTERequestSizeC[],
4841                 unsigned int    PixelPTEReqWidthY[],
4842                 unsigned int    PixelPTEReqHeightY[],
4843                 unsigned int    PixelPTEReqWidthC[],
4844                 unsigned int    PixelPTEReqHeightC[],
4845                 unsigned int    dpte_row_width_luma_ub[],
4846                 unsigned int    dpte_row_width_chroma_ub[],
4847
4848                 /* Output */
4849                 double DST_Y_PER_PTE_ROW_NOM_L[],
4850                 double DST_Y_PER_PTE_ROW_NOM_C[],
4851                 double DST_Y_PER_META_ROW_NOM_L[],
4852                 double DST_Y_PER_META_ROW_NOM_C[],
4853                 double TimePerMetaChunkNominal[],
4854                 double TimePerChromaMetaChunkNominal[],
4855                 double TimePerMetaChunkVBlank[],
4856                 double TimePerChromaMetaChunkVBlank[],
4857                 double TimePerMetaChunkFlip[],
4858                 double TimePerChromaMetaChunkFlip[],
4859                 double time_per_pte_group_nom_luma[],
4860                 double time_per_pte_group_vblank_luma[],
4861                 double time_per_pte_group_flip_luma[],
4862                 double time_per_pte_group_nom_chroma[],
4863                 double time_per_pte_group_vblank_chroma[],
4864                 double time_per_pte_group_flip_chroma[])
4865 {
4866         unsigned int   meta_chunk_width;
4867         unsigned int   min_meta_chunk_width;
4868         unsigned int   meta_chunk_per_row_int;
4869         unsigned int   meta_row_remainder;
4870         unsigned int   meta_chunk_threshold;
4871         unsigned int   meta_chunks_per_row_ub;
4872         unsigned int   meta_chunk_width_chroma;
4873         unsigned int   min_meta_chunk_width_chroma;
4874         unsigned int   meta_chunk_per_row_int_chroma;
4875         unsigned int   meta_row_remainder_chroma;
4876         unsigned int   meta_chunk_threshold_chroma;
4877         unsigned int   meta_chunks_per_row_ub_chroma;
4878         unsigned int   dpte_group_width_luma;
4879         unsigned int   dpte_groups_per_row_luma_ub;
4880         unsigned int   dpte_group_width_chroma;
4881         unsigned int   dpte_groups_per_row_chroma_ub;
4882         unsigned int k;
4883
4884         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4885                 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4886                 if (BytePerPixelC[k] == 0)
4887                         DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4888                 else
4889                         DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4890                 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4891                 if (BytePerPixelC[k] == 0)
4892                         DST_Y_PER_META_ROW_NOM_C[k] = 0;
4893                 else
4894                         DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4895         }
4896
4897         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4898                 if (DCCEnable[k] == true) {
4899                         meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4900                         min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4901                         meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4902                         meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4903                         if (!IsVertical(SourceRotation[k]))
4904                                 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4905                         else
4906                                 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4907
4908                         if (meta_row_remainder <= meta_chunk_threshold)
4909                                 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4910                         else
4911                                 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4912
4913                         TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4914                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4915                         TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4916                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4917                         TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4918                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4919                         if (BytePerPixelC[k] == 0) {
4920                                 TimePerChromaMetaChunkNominal[k] = 0;
4921                                 TimePerChromaMetaChunkVBlank[k] = 0;
4922                                 TimePerChromaMetaChunkFlip[k] = 0;
4923                         } else {
4924                                 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4925                                                 meta_row_height_chroma[k];
4926                                 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4927                                                 meta_row_height_chroma[k];
4928                                 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4929                                                 meta_chunk_width_chroma;
4930                                 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4931                                 if (!IsVertical(SourceRotation[k])) {
4932                                         meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4933                                                         meta_req_width_chroma[k];
4934                                 } else {
4935                                         meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4936                                                         meta_req_height_chroma[k];
4937                                 }
4938                                 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4939                                         meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4940                                 else
4941                                         meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4942
4943                                 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4944                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4945                                 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4946                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4947                                 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4948                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4949                         }
4950                 } else {
4951                         TimePerMetaChunkNominal[k] = 0;
4952                         TimePerMetaChunkVBlank[k] = 0;
4953                         TimePerMetaChunkFlip[k] = 0;
4954                         TimePerChromaMetaChunkNominal[k] = 0;
4955                         TimePerChromaMetaChunkVBlank[k] = 0;
4956                         TimePerChromaMetaChunkFlip[k] = 0;
4957                 }
4958         }
4959
4960         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4961                 if (GPUVMEnable == true) {
4962                         if (!IsVertical(SourceRotation[k])) {
4963                                 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4964                                                 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
4965                         } else {
4966                                 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4967                                                 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
4968                         }
4969
4970                         if (use_one_row_for_frame[k]) {
4971                                 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4972                                                 (double) dpte_group_width_luma / 2.0, 1.0);
4973                         } else {
4974                                 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4975                                                 (double) dpte_group_width_luma, 1.0);
4976                         }
4977 #ifdef __DML_VBA_DEBUG__
4978                         dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
4979                                         __func__, k, use_one_row_for_frame[k]);
4980                         dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
4981                                         __func__, k, dpte_group_bytes[k]);
4982                         dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
4983                                         __func__, k, PTERequestSizeY[k]);
4984                         dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
4985                                         __func__, k, PixelPTEReqWidthY[k]);
4986                         dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
4987                                         __func__, k, PixelPTEReqHeightY[k]);
4988                         dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
4989                                         __func__, k, dpte_row_width_luma_ub[k]);
4990                         dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
4991                                         __func__, k, dpte_group_width_luma);
4992                         dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
4993                                         __func__, k, dpte_groups_per_row_luma_ub);
4994 #endif
4995
4996                         time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
4997                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
4998                         time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
4999                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5000                         time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5001                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5002                         if (BytePerPixelC[k] == 0) {
5003                                 time_per_pte_group_nom_chroma[k] = 0;
5004                                 time_per_pte_group_vblank_chroma[k] = 0;
5005                                 time_per_pte_group_flip_chroma[k] = 0;
5006                         } else {
5007                                 if (!IsVertical(SourceRotation[k])) {
5008                                         dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5009                                                         (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5010                                 } else {
5011                                         dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5012                                                         (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5013                                 }
5014
5015                                 if (use_one_row_for_frame[k]) {
5016                                         dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5017                                                         (double) dpte_group_width_chroma / 2.0, 1.0);
5018                                 } else {
5019                                         dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5020                                                         (double) dpte_group_width_chroma, 1.0);
5021                                 }
5022 #ifdef __DML_VBA_DEBUG__
5023                                 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5024                                                 __func__, k, dpte_row_width_chroma_ub[k]);
5025                                 dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5026                                                 __func__, k, dpte_group_width_chroma);
5027                                 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5028                                                 __func__, k, dpte_groups_per_row_chroma_ub);
5029 #endif
5030                                 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5031                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5032                                 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5033                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5034                                 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5035                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5036                         }
5037                 } else {
5038                         time_per_pte_group_nom_luma[k] = 0;
5039                         time_per_pte_group_vblank_luma[k] = 0;
5040                         time_per_pte_group_flip_luma[k] = 0;
5041                         time_per_pte_group_nom_chroma[k] = 0;
5042                         time_per_pte_group_vblank_chroma[k] = 0;
5043                         time_per_pte_group_flip_chroma[k] = 0;
5044                 }
5045 #ifdef __DML_VBA_DEBUG__
5046                 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5047                                 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5048                 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5049                                 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5050                 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5051                                 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5052                 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5053                                 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5054                 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5055                                 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5056                 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5057                                 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5058                 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5059                                 __func__, k, TimePerMetaChunkNominal[k]);
5060                 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5061                                 __func__, k, TimePerMetaChunkVBlank[k]);
5062                 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5063                                 __func__, k, TimePerMetaChunkFlip[k]);
5064                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5065                                 __func__, k, TimePerChromaMetaChunkNominal[k]);
5066                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5067                                 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5068                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5069                                 __func__, k, TimePerChromaMetaChunkFlip[k]);
5070                 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5071                                 __func__, k, time_per_pte_group_nom_luma[k]);
5072                 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5073                                 __func__, k, time_per_pte_group_vblank_luma[k]);
5074                 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5075                                 __func__, k, time_per_pte_group_flip_luma[k]);
5076                 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5077                                 __func__, k, time_per_pte_group_nom_chroma[k]);
5078                 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5079                                 __func__, k, time_per_pte_group_vblank_chroma[k]);
5080                 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5081                                 __func__, k, time_per_pte_group_flip_chroma[k]);
5082 #endif
5083         }
5084 } // CalculateMetaAndPTETimes
5085
5086 void dml32_CalculateVMGroupAndRequestTimes(
5087                 unsigned int     NumberOfActiveSurfaces,
5088                 bool     GPUVMEnable,
5089                 unsigned int     GPUVMMaxPageTableLevels,
5090                 unsigned int     HTotal[],
5091                 unsigned int     BytePerPixelC[],
5092                 double      DestinationLinesToRequestVMInVBlank[],
5093                 double      DestinationLinesToRequestVMInImmediateFlip[],
5094                 bool     DCCEnable[],
5095                 double      PixelClock[],
5096                 unsigned int        dpte_row_width_luma_ub[],
5097                 unsigned int        dpte_row_width_chroma_ub[],
5098                 unsigned int     vm_group_bytes[],
5099                 unsigned int     dpde0_bytes_per_frame_ub_l[],
5100                 unsigned int     dpde0_bytes_per_frame_ub_c[],
5101                 unsigned int        meta_pte_bytes_per_frame_ub_l[],
5102                 unsigned int        meta_pte_bytes_per_frame_ub_c[],
5103
5104                 /* Output */
5105                 double      TimePerVMGroupVBlank[],
5106                 double      TimePerVMGroupFlip[],
5107                 double      TimePerVMRequestVBlank[],
5108                 double      TimePerVMRequestFlip[])
5109 {
5110         unsigned int k;
5111         unsigned int   num_group_per_lower_vm_stage;
5112         unsigned int   num_req_per_lower_vm_stage;
5113
5114 #ifdef __DML_VBA_DEBUG__
5115         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5116         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5117 #endif
5118         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5119
5120 #ifdef __DML_VBA_DEBUG__
5121                 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5122                 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5123                 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5124                                 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5125                 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5126                                 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5127                 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5128                                 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5129                 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5130                                 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5131 #endif
5132
5133                 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5134                         if (DCCEnable[k] == false) {
5135                                 if (BytePerPixelC[k] > 0) {
5136                                         num_group_per_lower_vm_stage = dml_ceil(
5137                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5138                                                         (double) (vm_group_bytes[k]), 1.0) +
5139                                                         dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5140                                                         (double) (vm_group_bytes[k]), 1.0);
5141                                 } else {
5142                                         num_group_per_lower_vm_stage = dml_ceil(
5143                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5144                                                         (double) (vm_group_bytes[k]), 1.0);
5145                                 }
5146                         } else {
5147                                 if (GPUVMMaxPageTableLevels == 1) {
5148                                         if (BytePerPixelC[k] > 0) {
5149                                                 num_group_per_lower_vm_stage = dml_ceil(
5150                                                         (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5151                                                         (double) (vm_group_bytes[k]), 1.0) +
5152                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5153                                                         (double) (vm_group_bytes[k]), 1.0);
5154                                         } else {
5155                                                 num_group_per_lower_vm_stage = dml_ceil(
5156                                                                 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5157                                                                 (double) (vm_group_bytes[k]), 1.0);
5158                                         }
5159                                 } else {
5160                                         if (BytePerPixelC[k] > 0) {
5161                                                 num_group_per_lower_vm_stage = 2 + dml_ceil(
5162                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5163                                                         (double) (vm_group_bytes[k]), 1) +
5164                                                         dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5165                                                         (double) (vm_group_bytes[k]), 1) +
5166                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5167                                                         (double) (vm_group_bytes[k]), 1) +
5168                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5169                                                         (double) (vm_group_bytes[k]), 1);
5170                                         } else {
5171                                                 num_group_per_lower_vm_stage = 1 + dml_ceil(
5172                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5173                                                         (double) (vm_group_bytes[k]), 1) + dml_ceil(
5174                                                         (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5175                                                         (double) (vm_group_bytes[k]), 1);
5176                                         }
5177                                 }
5178                         }
5179
5180                         if (DCCEnable[k] == false) {
5181                                 if (BytePerPixelC[k] > 0) {
5182                                         num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5183                                                         dpde0_bytes_per_frame_ub_c[k] / 64;
5184                                 } else {
5185                                         num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5186                                 }
5187                         } else {
5188                                 if (GPUVMMaxPageTableLevels == 1) {
5189                                         if (BytePerPixelC[k] > 0) {
5190                                                 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5191                                                                 meta_pte_bytes_per_frame_ub_c[k] / 64;
5192                                         } else {
5193                                                 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5194                                         }
5195                                 } else {
5196                                         if (BytePerPixelC[k] > 0) {
5197                                                 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5198                                                                 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5199                                                                 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5200                                                                 meta_pte_bytes_per_frame_ub_c[k] / 64;
5201                                         } else {
5202                                                 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5203                                                                 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5204                                         }
5205                                 }
5206                         }
5207
5208                         TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5209                                         HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5210                         TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5211                                         HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5212                         TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5213                                         HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5214                         TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5215                                         HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5216
5217                         if (GPUVMMaxPageTableLevels > 2) {
5218                                 TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5219                                 TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5220                                 TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5221                                 TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5222                         }
5223
5224                 } else {
5225                         TimePerVMGroupVBlank[k] = 0;
5226                         TimePerVMGroupFlip[k] = 0;
5227                         TimePerVMRequestVBlank[k] = 0;
5228                         TimePerVMRequestFlip[k] = 0;
5229                 }
5230
5231 #ifdef __DML_VBA_DEBUG__
5232                 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5233                 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5234                 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5235                 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5236 #endif
5237         }
5238 } // CalculateVMGroupAndRequestTimes
5239
5240 void dml32_CalculateDCCConfiguration(
5241                 bool             DCCEnabled,
5242                 bool             DCCProgrammingAssumesScanDirectionUnknown,
5243                 enum source_format_class SourcePixelFormat,
5244                 unsigned int             SurfaceWidthLuma,
5245                 unsigned int             SurfaceWidthChroma,
5246                 unsigned int             SurfaceHeightLuma,
5247                 unsigned int             SurfaceHeightChroma,
5248                 unsigned int                nomDETInKByte,
5249                 unsigned int             RequestHeight256ByteLuma,
5250                 unsigned int             RequestHeight256ByteChroma,
5251                 enum dm_swizzle_mode     TilingFormat,
5252                 unsigned int             BytePerPixelY,
5253                 unsigned int             BytePerPixelC,
5254                 double              BytePerPixelDETY,
5255                 double              BytePerPixelDETC,
5256                 enum dm_rotation_angle   SourceRotation,
5257                 /* Output */
5258                 unsigned int        *MaxUncompressedBlockLuma,
5259                 unsigned int        *MaxUncompressedBlockChroma,
5260                 unsigned int        *MaxCompressedBlockLuma,
5261                 unsigned int        *MaxCompressedBlockChroma,
5262                 unsigned int        *IndependentBlockLuma,
5263                 unsigned int        *IndependentBlockChroma)
5264 {
5265         typedef enum {
5266                 REQ_256Bytes,
5267                 REQ_128BytesNonContiguous,
5268                 REQ_128BytesContiguous,
5269                 REQ_NA
5270         } RequestType;
5271
5272         RequestType   RequestLuma;
5273         RequestType   RequestChroma;
5274
5275         unsigned int   segment_order_horz_contiguous_luma;
5276         unsigned int   segment_order_horz_contiguous_chroma;
5277         unsigned int   segment_order_vert_contiguous_luma;
5278         unsigned int   segment_order_vert_contiguous_chroma;
5279         unsigned int req128_horz_wc_l;
5280         unsigned int req128_horz_wc_c;
5281         unsigned int req128_vert_wc_l;
5282         unsigned int req128_vert_wc_c;
5283         unsigned int MAS_vp_horz_limit;
5284         unsigned int MAS_vp_vert_limit;
5285         unsigned int max_vp_horz_width;
5286         unsigned int max_vp_vert_height;
5287         unsigned int eff_surf_width_l;
5288         unsigned int eff_surf_width_c;
5289         unsigned int eff_surf_height_l;
5290         unsigned int eff_surf_height_c;
5291         unsigned int full_swath_bytes_horz_wc_l;
5292         unsigned int full_swath_bytes_horz_wc_c;
5293         unsigned int full_swath_bytes_vert_wc_l;
5294         unsigned int full_swath_bytes_vert_wc_c;
5295         unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5296
5297         unsigned int   yuv420;
5298         unsigned int   horz_div_l;
5299         unsigned int   horz_div_c;
5300         unsigned int   vert_div_l;
5301         unsigned int   vert_div_c;
5302
5303         unsigned int     swath_buf_size;
5304         double   detile_buf_vp_horz_limit;
5305         double   detile_buf_vp_vert_limit;
5306
5307         yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5308                         SourcePixelFormat == dm_420_12) ? 1 : 0);
5309         horz_div_l = 1;
5310         horz_div_c = 1;
5311         vert_div_l = 1;
5312         vert_div_c = 1;
5313
5314         if (BytePerPixelY == 1)
5315                 vert_div_l = 0;
5316         if (BytePerPixelC == 1)
5317                 vert_div_c = 0;
5318
5319         if (BytePerPixelC == 0) {
5320                 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5321                 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5322                                 BytePerPixelY / (1 + horz_div_l));
5323                 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5324                                 (1 + vert_div_l));
5325         } else {
5326                 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5327                 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5328                                 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5329                                 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5330                 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5331                                 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5332                                 (1 + vert_div_c) / (1 + yuv420));
5333         }
5334
5335         if (SourcePixelFormat == dm_420_10) {
5336                 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5337                 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5338         }
5339
5340         detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5341         detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5342
5343         MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5344         MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5345         max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5346         max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5347         eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5348         eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5349         eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5350         eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5351
5352         full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5353         full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5354         if (BytePerPixelC > 0) {
5355                 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5356                 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5357         } else {
5358                 full_swath_bytes_horz_wc_c = 0;
5359                 full_swath_bytes_vert_wc_c = 0;
5360         }
5361
5362         if (SourcePixelFormat == dm_420_10) {
5363                 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5364                 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5365                 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5366                 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5367         }
5368
5369         if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5370                 req128_horz_wc_l = 0;
5371                 req128_horz_wc_c = 0;
5372         } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5373                         full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5374                 req128_horz_wc_l = 0;
5375                 req128_horz_wc_c = 1;
5376         } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5377                         full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5378                 req128_horz_wc_l = 1;
5379                 req128_horz_wc_c = 0;
5380         } else {
5381                 req128_horz_wc_l = 1;
5382                 req128_horz_wc_c = 1;
5383         }
5384
5385         if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5386                 req128_vert_wc_l = 0;
5387                 req128_vert_wc_c = 0;
5388         } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5389                         full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5390                 req128_vert_wc_l = 0;
5391                 req128_vert_wc_c = 1;
5392         } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5393                         full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5394                 req128_vert_wc_l = 1;
5395                 req128_vert_wc_c = 0;
5396         } else {
5397                 req128_vert_wc_l = 1;
5398                 req128_vert_wc_c = 1;
5399         }
5400
5401         if (BytePerPixelY == 2) {
5402                 segment_order_horz_contiguous_luma = 0;
5403                 segment_order_vert_contiguous_luma = 1;
5404         } else {
5405                 segment_order_horz_contiguous_luma = 1;
5406                 segment_order_vert_contiguous_luma = 0;
5407         }
5408
5409         if (BytePerPixelC == 2) {
5410                 segment_order_horz_contiguous_chroma = 0;
5411                 segment_order_vert_contiguous_chroma = 1;
5412         } else {
5413                 segment_order_horz_contiguous_chroma = 1;
5414                 segment_order_vert_contiguous_chroma = 0;
5415         }
5416 #ifdef __DML_VBA_DEBUG__
5417         dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5418         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5419         dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5420         dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5421         dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5422         dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5423         dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5424         dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5425         dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5426                         __func__, segment_order_horz_contiguous_chroma);
5427 #endif
5428
5429         if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5430                 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5431                         RequestLuma = REQ_256Bytes;
5432                 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5433                                 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5434                         RequestLuma = REQ_128BytesNonContiguous;
5435                 else
5436                         RequestLuma = REQ_128BytesContiguous;
5437
5438                 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5439                         RequestChroma = REQ_256Bytes;
5440                 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5441                                 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5442                         RequestChroma = REQ_128BytesNonContiguous;
5443                 else
5444                         RequestChroma = REQ_128BytesContiguous;
5445
5446         } else if (!IsVertical(SourceRotation)) {
5447                 if (req128_horz_wc_l == 0)
5448                         RequestLuma = REQ_256Bytes;
5449                 else if (segment_order_horz_contiguous_luma == 0)
5450                         RequestLuma = REQ_128BytesNonContiguous;
5451                 else
5452                         RequestLuma = REQ_128BytesContiguous;
5453
5454                 if (req128_horz_wc_c == 0)
5455                         RequestChroma = REQ_256Bytes;
5456                 else if (segment_order_horz_contiguous_chroma == 0)
5457                         RequestChroma = REQ_128BytesNonContiguous;
5458                 else
5459                         RequestChroma = REQ_128BytesContiguous;
5460
5461         } else {
5462                 if (req128_vert_wc_l == 0)
5463                         RequestLuma = REQ_256Bytes;
5464                 else if (segment_order_vert_contiguous_luma == 0)
5465                         RequestLuma = REQ_128BytesNonContiguous;
5466                 else
5467                         RequestLuma = REQ_128BytesContiguous;
5468
5469                 if (req128_vert_wc_c == 0)
5470                         RequestChroma = REQ_256Bytes;
5471                 else if (segment_order_vert_contiguous_chroma == 0)
5472                         RequestChroma = REQ_128BytesNonContiguous;
5473                 else
5474                         RequestChroma = REQ_128BytesContiguous;
5475         }
5476
5477         if (RequestLuma == REQ_256Bytes) {
5478                 *MaxUncompressedBlockLuma = 256;
5479                 *MaxCompressedBlockLuma = 256;
5480                 *IndependentBlockLuma = 0;
5481         } else if (RequestLuma == REQ_128BytesContiguous) {
5482                 *MaxUncompressedBlockLuma = 256;
5483                 *MaxCompressedBlockLuma = 128;
5484                 *IndependentBlockLuma = 128;
5485         } else {
5486                 *MaxUncompressedBlockLuma = 256;
5487                 *MaxCompressedBlockLuma = 64;
5488                 *IndependentBlockLuma = 64;
5489         }
5490
5491         if (RequestChroma == REQ_256Bytes) {
5492                 *MaxUncompressedBlockChroma = 256;
5493                 *MaxCompressedBlockChroma = 256;
5494                 *IndependentBlockChroma = 0;
5495         } else if (RequestChroma == REQ_128BytesContiguous) {
5496                 *MaxUncompressedBlockChroma = 256;
5497                 *MaxCompressedBlockChroma = 128;
5498                 *IndependentBlockChroma = 128;
5499         } else {
5500                 *MaxUncompressedBlockChroma = 256;
5501                 *MaxCompressedBlockChroma = 64;
5502                 *IndependentBlockChroma = 64;
5503         }
5504
5505         if (DCCEnabled != true || BytePerPixelC == 0) {
5506                 *MaxUncompressedBlockChroma = 0;
5507                 *MaxCompressedBlockChroma = 0;
5508                 *IndependentBlockChroma = 0;
5509         }
5510
5511         if (DCCEnabled != true) {
5512                 *MaxUncompressedBlockLuma = 0;
5513                 *MaxCompressedBlockLuma = 0;
5514                 *IndependentBlockLuma = 0;
5515         }
5516
5517 #ifdef __DML_VBA_DEBUG__
5518         dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5519         dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5520         dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5521         dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5522         dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5523         dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5524 #endif
5525
5526 } // CalculateDCCConfiguration
5527
5528 void dml32_CalculateStutterEfficiency(
5529                 unsigned int      CompressedBufferSizeInkByte,
5530                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5531                 bool   UnboundedRequestEnabled,
5532                 unsigned int      MetaFIFOSizeInKEntries,
5533                 unsigned int      ZeroSizeBufferEntries,
5534                 unsigned int      PixelChunkSizeInKByte,
5535                 unsigned int   NumberOfActiveSurfaces,
5536                 unsigned int      ROBBufferSizeInKByte,
5537                 double    TotalDataReadBandwidth,
5538                 double    DCFCLK,
5539                 double    ReturnBW,
5540                 unsigned int      CompbufReservedSpace64B,
5541                 unsigned int      CompbufReservedSpaceZs,
5542                 double    SRExitTime,
5543                 double    SRExitZ8Time,
5544                 bool   SynchronizeTimingsFinal,
5545                 unsigned int   BlendingAndTiming[],
5546                 double    StutterEnterPlusExitWatermark,
5547                 double    Z8StutterEnterPlusExitWatermark,
5548                 bool   ProgressiveToInterlaceUnitInOPP,
5549                 bool   Interlace[],
5550                 double    MinTTUVBlank[],
5551                 unsigned int   DPPPerSurface[],
5552                 unsigned int      DETBufferSizeY[],
5553                 unsigned int   BytePerPixelY[],
5554                 double    BytePerPixelDETY[],
5555                 double      SwathWidthY[],
5556                 unsigned int   SwathHeightY[],
5557                 unsigned int   SwathHeightC[],
5558                 double    NetDCCRateLuma[],
5559                 double    NetDCCRateChroma[],
5560                 double    DCCFractionOfZeroSizeRequestsLuma[],
5561                 double    DCCFractionOfZeroSizeRequestsChroma[],
5562                 unsigned int      HTotal[],
5563                 unsigned int      VTotal[],
5564                 double    PixelClock[],
5565                 double    VRatio[],
5566                 enum dm_rotation_angle SourceRotation[],
5567                 unsigned int   BlockHeight256BytesY[],
5568                 unsigned int   BlockWidth256BytesY[],
5569                 unsigned int   BlockHeight256BytesC[],
5570                 unsigned int   BlockWidth256BytesC[],
5571                 unsigned int   DCCYMaxUncompressedBlock[],
5572                 unsigned int   DCCCMaxUncompressedBlock[],
5573                 unsigned int      VActive[],
5574                 bool   DCCEnable[],
5575                 bool   WritebackEnable[],
5576                 double    ReadBandwidthSurfaceLuma[],
5577                 double    ReadBandwidthSurfaceChroma[],
5578                 double    meta_row_bw[],
5579                 double    dpte_row_bw[],
5580
5581                 /* Output */
5582                 double   *StutterEfficiencyNotIncludingVBlank,
5583                 double   *StutterEfficiency,
5584                 unsigned int     *NumberOfStutterBurstsPerFrame,
5585                 double   *Z8StutterEfficiencyNotIncludingVBlank,
5586                 double   *Z8StutterEfficiency,
5587                 unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5588                 double   *StutterPeriod,
5589                 bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5590 {
5591
5592         bool FoundCriticalSurface = false;
5593         unsigned int SwathSizeCriticalSurface = 0;
5594         unsigned int LastChunkOfSwathSize;
5595         unsigned int MissingPartOfLastSwathOfDETSize;
5596         double LastZ8StutterPeriod = 0.0;
5597         double LastStutterPeriod = 0.0;
5598         unsigned int TotalNumberOfActiveOTG = 0;
5599         double doublePixelClock;
5600         unsigned int doubleHTotal;
5601         unsigned int doubleVTotal;
5602         bool SameTiming = true;
5603         double DETBufferingTimeY;
5604         double SwathWidthYCriticalSurface = 0.0;
5605         double SwathHeightYCriticalSurface = 0.0;
5606         double VActiveTimeCriticalSurface = 0.0;
5607         double FrameTimeCriticalSurface = 0.0;
5608         unsigned int BytePerPixelYCriticalSurface = 0;
5609         double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5610         unsigned int DETBufferSizeYCriticalSurface = 0;
5611         double MinTTUVBlankCriticalSurface = 0.0;
5612         unsigned int BlockWidth256BytesYCriticalSurface = 0;
5613         bool doublePlaneCriticalSurface = 0;
5614         bool doublePipeCriticalSurface = 0;
5615         double TotalCompressedReadBandwidth;
5616         double TotalRowReadBandwidth;
5617         double AverageDCCCompressionRate;
5618         double EffectiveCompressedBufferSize;
5619         double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5620         double StutterBurstTime;
5621         unsigned int TotalActiveWriteback;
5622         double LinesInDETY;
5623         double LinesInDETYRoundedDownToSwath;
5624         double MaximumEffectiveCompressionLuma;
5625         double MaximumEffectiveCompressionChroma;
5626         double TotalZeroSizeRequestReadBandwidth;
5627         double TotalZeroSizeCompressedReadBandwidth;
5628         double AverageDCCZeroSizeFraction;
5629         double AverageZeroSizeCompressionRate;
5630         unsigned int k;
5631
5632         TotalZeroSizeRequestReadBandwidth = 0;
5633         TotalZeroSizeCompressedReadBandwidth = 0;
5634         TotalRowReadBandwidth = 0;
5635         TotalCompressedReadBandwidth = 0;
5636
5637         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5638                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5639                         if (DCCEnable[k] == true) {
5640                                 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5641                                                 || (!IsVertical(SourceRotation[k])
5642                                                                 && BlockHeight256BytesY[k] > SwathHeightY[k])
5643                                                 || DCCYMaxUncompressedBlock[k] < 256) {
5644                                         MaximumEffectiveCompressionLuma = 2;
5645                                 } else {
5646                                         MaximumEffectiveCompressionLuma = 4;
5647                                 }
5648                                 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5649                                                 + ReadBandwidthSurfaceLuma[k]
5650                                                                 / dml_min(NetDCCRateLuma[k],
5651                                                                                 MaximumEffectiveCompressionLuma);
5652 #ifdef __DML_VBA_DEBUG__
5653                                 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5654                                                 __func__, k, ReadBandwidthSurfaceLuma[k]);
5655                                 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5656                                                 __func__, k, NetDCCRateLuma[k]);
5657                                 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5658                                                 __func__, k, MaximumEffectiveCompressionLuma);
5659 #endif
5660                                 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5661                                                 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5662                                 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5663                                                 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5664                                                                 / MaximumEffectiveCompressionLuma;
5665
5666                                 if (ReadBandwidthSurfaceChroma[k] > 0) {
5667                                         if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5668                                                         || (!IsVertical(SourceRotation[k])
5669                                                                         && BlockHeight256BytesC[k] > SwathHeightC[k])
5670                                                         || DCCCMaxUncompressedBlock[k] < 256) {
5671                                                 MaximumEffectiveCompressionChroma = 2;
5672                                         } else {
5673                                                 MaximumEffectiveCompressionChroma = 4;
5674                                         }
5675                                         TotalCompressedReadBandwidth =
5676                                                         TotalCompressedReadBandwidth
5677                                                         + ReadBandwidthSurfaceChroma[k]
5678                                                         / dml_min(NetDCCRateChroma[k],
5679                                                         MaximumEffectiveCompressionChroma);
5680 #ifdef __DML_VBA_DEBUG__
5681                                         dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5682                                                         __func__, k, ReadBandwidthSurfaceChroma[k]);
5683                                         dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5684                                                         __func__, k, NetDCCRateChroma[k]);
5685                                         dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5686                                                         __func__, k, MaximumEffectiveCompressionChroma);
5687 #endif
5688                                         TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5689                                                         + ReadBandwidthSurfaceChroma[k]
5690                                                                         * DCCFractionOfZeroSizeRequestsChroma[k];
5691                                         TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5692                                                         + ReadBandwidthSurfaceChroma[k]
5693                                                                         * DCCFractionOfZeroSizeRequestsChroma[k]
5694                                                                         / MaximumEffectiveCompressionChroma;
5695                                 }
5696                         } else {
5697                                 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5698                                                 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5699                         }
5700                         TotalRowReadBandwidth = TotalRowReadBandwidth
5701                                         + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5702                 }
5703         }
5704
5705         AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5706         AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5707
5708 #ifdef __DML_VBA_DEBUG__
5709         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5710         dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5711         dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5712         dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5713                         __func__, TotalZeroSizeCompressedReadBandwidth);
5714         dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5715         dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5716         dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5717         dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5718         dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5719         dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5720         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5721 #endif
5722         if (AverageDCCZeroSizeFraction == 1) {
5723                 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5724                                 / TotalZeroSizeCompressedReadBandwidth;
5725                 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5726                                 * AverageZeroSizeCompressionRate
5727                                 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5728                                                 * AverageZeroSizeCompressionRate;
5729         } else if (AverageDCCZeroSizeFraction > 0) {
5730                 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5731                                 / TotalZeroSizeCompressedReadBandwidth;
5732                 EffectiveCompressedBufferSize = dml_min(
5733                                 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5734                                 (double) MetaFIFOSizeInKEntries * 1024 * 64
5735                                         / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5736                                         + 1 / AverageDCCCompressionRate))
5737                                         + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5738                                         * AverageDCCCompressionRate,
5739                                         ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5740                                         / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5741
5742 #ifdef __DML_VBA_DEBUG__
5743                 dml_print("DML::%s: min 1 = %f\n", __func__,
5744                                 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5745                 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5746                                 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5747                                                 AverageDCCCompressionRate));
5748                 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5749                                 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5750                 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5751                                 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5752 #endif
5753         } else {
5754                 EffectiveCompressedBufferSize = dml_min(
5755                                 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5756                                 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5757                                 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5758                                                 * AverageDCCCompressionRate;
5759
5760 #ifdef __DML_VBA_DEBUG__
5761                 dml_print("DML::%s: min 1 = %f\n", __func__,
5762                                 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5763                 dml_print("DML::%s: min 2 = %f\n", __func__,
5764                                 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5765 #endif
5766         }
5767
5768 #ifdef __DML_VBA_DEBUG__
5769         dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5770         dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5771         dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5772 #endif
5773
5774         *StutterPeriod = 0;
5775
5776         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5777                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5778                         LinesInDETY = ((double) DETBufferSizeY[k]
5779                                         + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5780                                                         * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5781                                         / BytePerPixelDETY[k] / SwathWidthY[k];
5782                         LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5783                         DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5784                                         / VRatio[k];
5785 #ifdef __DML_VBA_DEBUG__
5786                         dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5787                         dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5788                         dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5789                         dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5790                                         __func__, k, ReadBandwidthSurfaceLuma[k]);
5791                         dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5792                         dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5793                         dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5794                                         __func__, k, LinesInDETYRoundedDownToSwath);
5795                         dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5796                         dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5797                         dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5798                         dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5799                         dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5800 #endif
5801
5802                         if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5803                                 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5804
5805                                 FoundCriticalSurface = true;
5806                                 *StutterPeriod = DETBufferingTimeY;
5807                                 FrameTimeCriticalSurface = (
5808                                                 isInterlaceTiming ?
5809                                                                 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5810                                                 * (double) HTotal[k] / PixelClock[k];
5811                                 VActiveTimeCriticalSurface = (
5812                                                 isInterlaceTiming ?
5813                                                                 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5814                                                 * (double) HTotal[k] / PixelClock[k];
5815                                 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5816                                 SwathWidthYCriticalSurface = SwathWidthY[k];
5817                                 SwathHeightYCriticalSurface = SwathHeightY[k];
5818                                 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5819                                 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5820                                                 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5821                                 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5822                                 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5823                                 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5824                                 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5825
5826 #ifdef __DML_VBA_DEBUG__
5827                                 dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5828                                                 __func__, k, FoundCriticalSurface);
5829                                 dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5830                                                 __func__, k, *StutterPeriod);
5831                                 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5832                                                 __func__, k, MinTTUVBlankCriticalSurface);
5833                                 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5834                                                 __func__, k, FrameTimeCriticalSurface);
5835                                 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5836                                                 __func__, k, VActiveTimeCriticalSurface);
5837                                 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5838                                                 __func__, k, BytePerPixelYCriticalSurface);
5839                                 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5840                                                 __func__, k, SwathWidthYCriticalSurface);
5841                                 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5842                                                 __func__, k, SwathHeightYCriticalSurface);
5843                                 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5844                                                 __func__, k, BlockWidth256BytesYCriticalSurface);
5845                                 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5846                                                 __func__, k, doublePlaneCriticalSurface);
5847                                 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5848                                                 __func__, k, doublePipeCriticalSurface);
5849                                 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5850                                                 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5851 #endif
5852                         }
5853                 }
5854         }
5855
5856         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5857                         EffectiveCompressedBufferSize);
5858 #ifdef __DML_VBA_DEBUG__
5859         dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5860         dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5861         dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5862                         __func__, *StutterPeriod * TotalDataReadBandwidth);
5863         dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5864         dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5865                         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5866         dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5867         dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5868         dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5869         dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5870 #endif
5871
5872         StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5873                         / ReturnBW
5874                         + (*StutterPeriod * TotalDataReadBandwidth
5875                                         - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5876                         + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5877 #ifdef __DML_VBA_DEBUG__
5878         dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5879                         AverageDCCCompressionRate / ReturnBW);
5880         dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5881                         __func__, (*StutterPeriod * TotalDataReadBandwidth));
5882         dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5883                         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5884         dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5885         dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5886 #endif
5887         StutterBurstTime = dml_max(StutterBurstTime,
5888                         LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5889                                         * SwathWidthYCriticalSurface / ReturnBW);
5890
5891 #ifdef __DML_VBA_DEBUG__
5892         dml_print("DML::%s: Time to finish residue swath=%f\n",
5893                         __func__,
5894                         LinesToFinishSwathTransferStutterCriticalSurface *
5895                         BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5896 #endif
5897
5898         TotalActiveWriteback = 0;
5899         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5900                 if (WritebackEnable[k])
5901                         TotalActiveWriteback = TotalActiveWriteback + 1;
5902         }
5903
5904         if (TotalActiveWriteback == 0) {
5905 #ifdef __DML_VBA_DEBUG__
5906                 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5907                 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5908                 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5909                 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5910 #endif
5911                 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5912                                 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5913                 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5914                                 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5915                 *NumberOfStutterBurstsPerFrame = (
5916                                 *StutterEfficiencyNotIncludingVBlank > 0 ?
5917                                                 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5918                 *Z8NumberOfStutterBurstsPerFrame = (
5919                                 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5920                                                 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5921         } else {
5922                 *StutterEfficiencyNotIncludingVBlank = 0.;
5923                 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5924                 *NumberOfStutterBurstsPerFrame = 0;
5925                 *Z8NumberOfStutterBurstsPerFrame = 0;
5926         }
5927 #ifdef __DML_VBA_DEBUG__
5928         dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5929         dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5930                         __func__, *StutterEfficiencyNotIncludingVBlank);
5931         dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5932                         __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5933         dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5934         dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5935 #endif
5936
5937         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5938                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5939                         if (BlendingAndTiming[k] == k) {
5940                                 if (TotalNumberOfActiveOTG == 0) {
5941                                         doublePixelClock = PixelClock[k];
5942                                         doubleHTotal = HTotal[k];
5943                                         doubleVTotal = VTotal[k];
5944                                 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5945                                                 || doubleVTotal != VTotal[k]) {
5946                                         SameTiming = false;
5947                                 }
5948                                 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5949                         }
5950                 }
5951         }
5952
5953         if (*StutterEfficiencyNotIncludingVBlank > 0) {
5954                 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5955
5956                 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
5957                                 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
5958                         *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
5959                                                 + StutterBurstTime * VActiveTimeCriticalSurface
5960                                                 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5961                 } else {
5962                         *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
5963                 }
5964         } else {
5965                 *StutterEfficiency = 0;
5966         }
5967
5968         if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
5969                 LastZ8StutterPeriod = VActiveTimeCriticalSurface
5970                                 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5971                 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
5972                                 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
5973                         *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
5974                                 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5975                 } else {
5976                         *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
5977                 }
5978         } else {
5979                 *Z8StutterEfficiency = 0.;
5980         }
5981
5982 #ifdef __DML_VBA_DEBUG__
5983         dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
5984         dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
5985         dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5986         dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5987         dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
5988         dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
5989         dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5990                         __func__, *StutterEfficiencyNotIncludingVBlank);
5991         dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5992 #endif
5993
5994         SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
5995                         * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
5996         LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
5997         MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
5998                         - DETBufferSizeYCriticalSurface;
5999
6000         *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6001                         && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6002                         && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6003                         && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6004
6005 #ifdef __DML_VBA_DEBUG__
6006         dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6007         dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6008         dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6009         dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6010 #endif
6011 } // CalculateStutterEfficiency
6012
6013 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6014                 unsigned int    ConfigReturnBufferSizeInKByte,
6015                 unsigned int    ROBBufferSizeInKByte,
6016                 unsigned int MaxNumDPP,
6017                 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6018                 unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6019
6020                 /* Output */
6021                 unsigned int *MaxTotalDETInKByte,
6022                 unsigned int *nomDETInKByte,
6023                 unsigned int *MinCompressedBufferSizeInKByte)
6024 {
6025         bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6026         unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6027
6028         *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6029                         (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6030         *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6031         *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6032
6033 #ifdef __DML_VBA_DEBUG__
6034         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6035         dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6036         dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6037         dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6038         dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6039         dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6040 #endif
6041
6042         if (det_buff_size_override_en) {
6043                 *nomDETInKByte = det_buff_size_override_val;
6044 #ifdef __DML_VBA_DEBUG__
6045                 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6046 #endif
6047         }
6048 } // CalculateMaxDETAndMinCompressedBufferSize
6049
6050 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6051                 double ReturnBW,
6052                 bool NotUrgentLatencyHiding[],
6053                 double ReadBandwidthLuma[],
6054                 double ReadBandwidthChroma[],
6055                 double cursor_bw[],
6056                 double meta_row_bandwidth[],
6057                 double dpte_row_bandwidth[],
6058                 unsigned int NumberOfDPP[],
6059                 double UrgentBurstFactorLuma[],
6060                 double UrgentBurstFactorChroma[],
6061                 double UrgentBurstFactorCursor[])
6062 {
6063         unsigned int k;
6064         bool NotEnoughUrgentLatencyHiding = false;
6065         bool CalculateVActiveBandwithSupport_val = false;
6066         double VActiveBandwith = 0;
6067
6068         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6069                 if (NotUrgentLatencyHiding[k]) {
6070                         NotEnoughUrgentLatencyHiding = true;
6071                 }
6072         }
6073
6074         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6075                 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6076         }
6077
6078         CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6079
6080 #ifdef __DML_VBA_DEBUG__
6081 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6082 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6083 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6084 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6085 #endif
6086         return CalculateVActiveBandwithSupport_val;
6087 }
6088
6089 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6090                 double ReturnBW,
6091                 bool NotUrgentLatencyHiding[],
6092                 double ReadBandwidthLuma[],
6093                 double ReadBandwidthChroma[],
6094                 double PrefetchBandwidthLuma[],
6095                 double PrefetchBandwidthChroma[],
6096                 double cursor_bw[],
6097                 double meta_row_bandwidth[],
6098                 double dpte_row_bandwidth[],
6099                 double cursor_bw_pre[],
6100                 double prefetch_vmrow_bw[],
6101                 unsigned int NumberOfDPP[],
6102                 double UrgentBurstFactorLuma[],
6103                 double UrgentBurstFactorChroma[],
6104                 double UrgentBurstFactorCursor[],
6105                 double UrgentBurstFactorLumaPre[],
6106                 double UrgentBurstFactorChromaPre[],
6107                 double UrgentBurstFactorCursorPre[],
6108
6109                 /* output */
6110                 double  *PrefetchBandwidth,
6111                 double  *FractionOfUrgentBandwidth,
6112                 bool *PrefetchBandwidthSupport)
6113 {
6114         unsigned int k;
6115         bool NotEnoughUrgentLatencyHiding = false;
6116         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6117                 if (NotUrgentLatencyHiding[k]) {
6118                         NotEnoughUrgentLatencyHiding = true;
6119                 }
6120         }
6121
6122         *PrefetchBandwidth = 0;
6123         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6124                 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6125                                 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6126                                 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6127         }
6128
6129         *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6130         *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6131 }
6132
6133 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6134                 double ReturnBW,
6135                 double ReadBandwidthLuma[],
6136                 double ReadBandwidthChroma[],
6137                 double PrefetchBandwidthLuma[],
6138                 double PrefetchBandwidthChroma[],
6139                 double cursor_bw[],
6140                 double cursor_bw_pre[],
6141                 unsigned int NumberOfDPP[],
6142                 double UrgentBurstFactorLuma[],
6143                 double UrgentBurstFactorChroma[],
6144                 double UrgentBurstFactorCursor[],
6145                 double UrgentBurstFactorLumaPre[],
6146                 double UrgentBurstFactorChromaPre[],
6147                 double UrgentBurstFactorCursorPre[])
6148 {
6149         unsigned int k;
6150         double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6151
6152         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6153                 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6154                                 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6155         }
6156
6157         return CalculateBandwidthAvailableForImmediateFlip_val;
6158 }
6159
6160 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6161                 double ReturnBW,
6162                 enum immediate_flip_requirement ImmediateFlipRequirement[],
6163                 double final_flip_bw[],
6164                 double ReadBandwidthLuma[],
6165                 double ReadBandwidthChroma[],
6166                 double PrefetchBandwidthLuma[],
6167                 double PrefetchBandwidthChroma[],
6168                 double cursor_bw[],
6169                 double meta_row_bandwidth[],
6170                 double dpte_row_bandwidth[],
6171                 double cursor_bw_pre[],
6172                 double prefetch_vmrow_bw[],
6173                 unsigned int NumberOfDPP[],
6174                 double UrgentBurstFactorLuma[],
6175                 double UrgentBurstFactorChroma[],
6176                 double UrgentBurstFactorCursor[],
6177                 double UrgentBurstFactorLumaPre[],
6178                 double UrgentBurstFactorChromaPre[],
6179                 double UrgentBurstFactorCursorPre[],
6180
6181                 /* output */
6182                 double  *TotalBandwidth,
6183                 double  *FractionOfUrgentBandwidth,
6184                 bool *ImmediateFlipBandwidthSupport)
6185 {
6186         unsigned int k;
6187         *TotalBandwidth = 0;
6188         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6189                 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6190                         *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6191                                         NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6192                                         NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6193                 } else {
6194                         *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6195                                         NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6196                                         NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6197                 }
6198         }
6199         *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6200         *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6201 }