2 * Copyright 2022 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
30 unsigned int dml32_dscceComputeDelay(
33 unsigned int sliceWidth,
34 unsigned int numSlices,
35 enum output_format_class pixelFormat,
36 enum output_encoder_class Output)
38 // valid bpc = source bits per component in the set of {8, 10, 12}
39 // valid bpp = increments of 1/16 of a bit
40 // min = 6/7/8 in N420/N422/444, respectively
41 // max = such that compression is 1:1
42 //valid sliceWidth = number of pixels per slice line,
43 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
44 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
45 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48 unsigned int rcModelSize = 8192;
50 // N422/N420 operate at 2 pixels per clock
51 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 if (pixelFormat == dm_420)
56 else if (pixelFormat == dm_n422)
58 // #all other modes operate at 1 pixel per clock
62 //initial transmit delay as per PPS
63 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
73 //divide by pixel per cycle to compute slice width as seen by DSC
74 w = sliceWidth / pixelsPerClock;
76 //422 mode has an additional cycle of delay
77 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
82 //main calculation for the dscce
83 ix = initalXmitDelay + 45;
88 ax = (a + 2) / 3 + D + 6 + 1;
89 L = (ax + wx - 1) / wx;
90 if ((ix % w) == 0 && p != 0)
94 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
96 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
97 pixels = Delay * 3 * pixelsPerClock;
99 #ifdef __DML_VBA_DEBUG__
100 dml_print("DML::%s: bpc: %d\n", __func__, bpc);
101 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
102 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
103 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
104 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
105 dml_print("DML::%s: Output: %d\n", __func__, Output);
106 dml_print("DML::%s: pixels: %d\n", __func__, pixels);
112 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
114 unsigned int Delay = 0;
116 if (pixelFormat == dm_420) {
121 // dscc - input deserializer
123 // dscc gets pixels every other cycle
125 // dscc - input cdc fifo
127 // dscc gets pixels every other cycle
129 // dscc - cdc uncertainty
131 // dscc - output cdc fifo
133 // dscc gets pixels every other cycle
135 // dscc - cdc uncertainty
137 // dscc - output serializer
141 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
146 // dscc - input deserializer
148 // dscc - input cdc fifo
150 // dscc - cdc uncertainty
152 // dscc - output cdc fifo
154 // dscc - cdc uncertainty
156 // dscc - output serializer
165 // dscc - input deserializer
167 // dscc - input cdc fifo
169 // dscc - cdc uncertainty
171 // dscc - output cdc fifo
173 // dscc - output serializer
175 // dscc - cdc uncertainty
185 bool IsVertical(enum dm_rotation_angle Scan)
187 bool is_vert = false;
189 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
196 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
201 double MaxDCHUBToPSCLThroughput,
202 double MaxPSCLToLBThroughput,
204 enum source_format_class SourcePixelFormat,
206 unsigned int HTapsChroma,
208 unsigned int VTapsChroma,
211 double *PSCL_THROUGHPUT,
212 double *PSCL_THROUGHPUT_CHROMA,
213 double *DPPCLKUsingSingleDPP)
215 double DPPCLKUsingSingleDPPLuma;
216 double DPPCLKUsingSingleDPPChroma;
219 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
220 dml_ceil((double) HTaps / 6.0, 1.0));
222 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
226 *PSCL_THROUGHPUT, 1);
228 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
229 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
231 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
232 SourcePixelFormat != dm_rgbe_alpha)) {
233 *PSCL_THROUGHPUT_CHROMA = 0;
234 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
236 if (HRatioChroma > 1) {
237 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
238 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
240 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
242 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
243 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
244 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
245 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
246 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
250 void dml32_CalculateBytePerPixelAndBlockSizes(
251 enum source_format_class SourcePixelFormat,
252 enum dm_swizzle_mode SurfaceTiling,
255 unsigned int *BytePerPixelY,
256 unsigned int *BytePerPixelC,
257 double *BytePerPixelDETY,
258 double *BytePerPixelDETC,
259 unsigned int *BlockHeight256BytesY,
260 unsigned int *BlockHeight256BytesC,
261 unsigned int *BlockWidth256BytesY,
262 unsigned int *BlockWidth256BytesC,
263 unsigned int *MacroTileHeightY,
264 unsigned int *MacroTileHeightC,
265 unsigned int *MacroTileWidthY,
266 unsigned int *MacroTileWidthC)
268 if (SourcePixelFormat == dm_444_64) {
269 *BytePerPixelDETY = 8;
270 *BytePerPixelDETC = 0;
273 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
274 *BytePerPixelDETY = 4;
275 *BytePerPixelDETC = 0;
278 } else if (SourcePixelFormat == dm_444_16) {
279 *BytePerPixelDETY = 2;
280 *BytePerPixelDETC = 0;
283 } else if (SourcePixelFormat == dm_444_8) {
284 *BytePerPixelDETY = 1;
285 *BytePerPixelDETC = 0;
288 } else if (SourcePixelFormat == dm_rgbe_alpha) {
289 *BytePerPixelDETY = 4;
290 *BytePerPixelDETC = 1;
293 } else if (SourcePixelFormat == dm_420_8) {
294 *BytePerPixelDETY = 1;
295 *BytePerPixelDETC = 2;
298 } else if (SourcePixelFormat == dm_420_12) {
299 *BytePerPixelDETY = 2;
300 *BytePerPixelDETC = 4;
304 *BytePerPixelDETY = 4.0 / 3;
305 *BytePerPixelDETC = 8.0 / 3;
309 #ifdef __DML_VBA_DEBUG__
310 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
311 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
312 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
313 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
314 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
316 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
317 || SourcePixelFormat == dm_444_16
318 || SourcePixelFormat == dm_444_8
319 || SourcePixelFormat == dm_mono_16
320 || SourcePixelFormat == dm_mono_8
321 || SourcePixelFormat == dm_rgbe)) {
322 if (SurfaceTiling == dm_sw_linear)
323 *BlockHeight256BytesY = 1;
324 else if (SourcePixelFormat == dm_444_64)
325 *BlockHeight256BytesY = 4;
326 else if (SourcePixelFormat == dm_444_8)
327 *BlockHeight256BytesY = 16;
329 *BlockHeight256BytesY = 8;
331 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
332 *BlockHeight256BytesC = 0;
333 *BlockWidth256BytesC = 0;
335 if (SurfaceTiling == dm_sw_linear) {
336 *BlockHeight256BytesY = 1;
337 *BlockHeight256BytesC = 1;
338 } else if (SourcePixelFormat == dm_rgbe_alpha) {
339 *BlockHeight256BytesY = 8;
340 *BlockHeight256BytesC = 16;
341 } else if (SourcePixelFormat == dm_420_8) {
342 *BlockHeight256BytesY = 16;
343 *BlockHeight256BytesC = 8;
345 *BlockHeight256BytesY = 8;
346 *BlockHeight256BytesC = 8;
348 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
349 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
351 #ifdef __DML_VBA_DEBUG__
352 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
353 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
354 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
355 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 if (SurfaceTiling == dm_sw_linear) {
359 *MacroTileHeightY = *BlockHeight256BytesY;
360 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
361 *MacroTileHeightC = *BlockHeight256BytesC;
362 if (*MacroTileHeightC == 0)
363 *MacroTileWidthC = 0;
365 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
366 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
367 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
368 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
369 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
370 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
371 if (*MacroTileHeightC == 0)
372 *MacroTileWidthC = 0;
374 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
376 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
377 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
378 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
379 if (*MacroTileHeightC == 0)
380 *MacroTileWidthC = 0;
382 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 #ifdef __DML_VBA_DEBUG__
386 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
387 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
388 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
389 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
391 } // CalculateBytePerPixelAndBlockSizes
393 void dml32_CalculateSwathAndDETConfiguration(
394 struct dml32_CalculateSwathAndDETConfiguration *st_vars,
395 unsigned int DETSizeOverride[],
396 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
397 unsigned int ConfigReturnBufferSizeInKByte,
398 unsigned int MaxTotalDETInKByte,
399 unsigned int MinCompressedBufferSizeInKByte,
400 double ForceSingleDPP,
401 unsigned int NumberOfActiveSurfaces,
402 unsigned int nomDETInKByte,
403 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
404 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
405 unsigned int PixelChunkSizeKBytes,
406 unsigned int ROBSizeKBytes,
407 unsigned int CompressedBufferSegmentSizeInkByteFinal,
408 enum output_encoder_class Output[],
409 double ReadBandwidthLuma[],
410 double ReadBandwidthChroma[],
411 double MaximumSwathWidthLuma[],
412 double MaximumSwathWidthChroma[],
413 enum dm_rotation_angle SourceRotation[],
414 bool ViewportStationary[],
415 enum source_format_class SourcePixelFormat[],
416 enum dm_swizzle_mode SurfaceTiling[],
417 unsigned int ViewportWidth[],
418 unsigned int ViewportHeight[],
419 unsigned int ViewportXStart[],
420 unsigned int ViewportYStart[],
421 unsigned int ViewportXStartC[],
422 unsigned int ViewportYStartC[],
423 unsigned int SurfaceWidthY[],
424 unsigned int SurfaceWidthC[],
425 unsigned int SurfaceHeightY[],
426 unsigned int SurfaceHeightC[],
427 unsigned int Read256BytesBlockHeightY[],
428 unsigned int Read256BytesBlockHeightC[],
429 unsigned int Read256BytesBlockWidthY[],
430 unsigned int Read256BytesBlockWidthC[],
431 enum odm_combine_mode ODMMode[],
432 unsigned int BlendingAndTiming[],
433 unsigned int BytePerPixY[],
434 unsigned int BytePerPixC[],
435 double BytePerPixDETY[],
436 double BytePerPixDETC[],
437 unsigned int HActive[],
439 double HRatioChroma[],
440 unsigned int DPPPerSurface[],
443 unsigned int swath_width_luma_ub[],
444 unsigned int swath_width_chroma_ub[],
446 double SwathWidthChroma[],
447 unsigned int SwathHeightY[],
448 unsigned int SwathHeightC[],
449 unsigned int DETBufferSizeInKByte[],
450 unsigned int DETBufferSizeY[],
451 unsigned int DETBufferSizeC[],
452 bool *UnboundedRequestEnabled,
453 unsigned int *CompressedBufferSizeInkByte,
454 unsigned int *CompBufReservedSpaceKBytes,
455 bool *CompBufReservedSpaceNeedAdjustment,
456 bool ViewportSizeSupportPerSurface[],
457 bool *ViewportSizeSupport)
461 st_vars->TotalActiveDPP = 0;
462 st_vars->NoChromaSurfaces = true;
464 #ifdef __DML_VBA_DEBUG__
465 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
466 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
467 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
469 dml32_CalculateSwathWidth(ForceSingleDPP,
470 NumberOfActiveSurfaces,
487 Read256BytesBlockHeightY,
488 Read256BytesBlockHeightC,
489 Read256BytesBlockWidthY,
490 Read256BytesBlockWidthC,
497 st_vars->SwathWidthdoubleDPP,
498 st_vars->SwathWidthdoubleDPPChroma,
501 st_vars->MaximumSwathHeightY,
502 st_vars->MaximumSwathHeightC,
504 swath_width_chroma_ub);
506 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
507 st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k];
508 st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k];
509 #ifdef __DML_VBA_DEBUG__
510 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
511 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
512 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
513 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]);
514 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
515 st_vars->RoundedUpMaxSwathSizeBytesY[k]);
516 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
517 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
518 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]);
519 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
520 st_vars->RoundedUpMaxSwathSizeBytesC[k]);
523 if (SourcePixelFormat[k] == dm_420_10) {
524 st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256);
525 st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256);
529 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
530 st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
531 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
532 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
533 st_vars->NoChromaSurfaces = false;
537 // By default, just set the reserved space to 2 pixel chunks size
538 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
540 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
541 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
542 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
543 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512);
545 if (*CompBufReservedSpaceNeedAdjustment == 1) {
546 *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512;
549 #ifdef __DML_VBA_DEBUG__
550 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes);
551 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
554 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
556 dml32_CalculateDETBufferSize(DETSizeOverride,
557 UseMALLForPStateChange,
559 NumberOfActiveSurfaces,
560 *UnboundedRequestEnabled,
563 ConfigReturnBufferSizeInKByte,
564 MinCompressedBufferSizeInKByte,
565 CompressedBufferSegmentSizeInkByteFinal,
569 st_vars->RoundedUpMaxSwathSizeBytesY,
570 st_vars->RoundedUpMaxSwathSizeBytesC,
574 DETBufferSizeInKByte, // per hubp pipe
575 CompressedBufferSizeInkByte);
577 #ifdef __DML_VBA_DEBUG__
578 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP);
579 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
580 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
581 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
582 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
583 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
586 *ViewportSizeSupport = true;
587 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
589 st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
590 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
591 #ifdef __DML_VBA_DEBUG__
592 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
593 st_vars->DETBufferSizeInKByteForSwathCalculation);
596 if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
597 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
598 SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
599 SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
600 st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
601 st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
602 } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
603 st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
604 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
605 SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
606 SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
607 st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
608 st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
609 } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
610 st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <=
611 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
612 SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
613 SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
614 st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
615 st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
617 SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
618 SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
619 st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
620 st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
623 if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 >
624 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
625 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
626 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
627 *ViewportSizeSupport = false;
628 ViewportSizeSupportPerSurface[k] = false;
630 ViewportSizeSupportPerSurface[k] = true;
633 if (SwathHeightC[k] == 0) {
634 #ifdef __DML_VBA_DEBUG__
635 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
637 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
638 DETBufferSizeC[k] = 0;
639 } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) {
640 #ifdef __DML_VBA_DEBUG__
641 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
643 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
644 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
646 #ifdef __DML_VBA_DEBUG__
647 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
649 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
650 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
653 #ifdef __DML_VBA_DEBUG__
654 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
655 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
656 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
657 k, st_vars->RoundedUpMaxSwathSizeBytesY[k]);
658 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
659 k, st_vars->RoundedUpMaxSwathSizeBytesC[k]);
660 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY);
661 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC);
662 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
663 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
664 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
665 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
666 ViewportSizeSupportPerSurface[k]);
670 } // CalculateSwathAndDETConfiguration
672 void dml32_CalculateSwathWidth(
674 unsigned int NumberOfActiveSurfaces,
675 enum source_format_class SourcePixelFormat[],
676 enum dm_rotation_angle SourceRotation[],
677 bool ViewportStationary[],
678 unsigned int ViewportWidth[],
679 unsigned int ViewportHeight[],
680 unsigned int ViewportXStart[],
681 unsigned int ViewportYStart[],
682 unsigned int ViewportXStartC[],
683 unsigned int ViewportYStartC[],
684 unsigned int SurfaceWidthY[],
685 unsigned int SurfaceWidthC[],
686 unsigned int SurfaceHeightY[],
687 unsigned int SurfaceHeightC[],
688 enum odm_combine_mode ODMMode[],
689 unsigned int BytePerPixY[],
690 unsigned int BytePerPixC[],
691 unsigned int Read256BytesBlockHeightY[],
692 unsigned int Read256BytesBlockHeightC[],
693 unsigned int Read256BytesBlockWidthY[],
694 unsigned int Read256BytesBlockWidthC[],
695 unsigned int BlendingAndTiming[],
696 unsigned int HActive[],
698 unsigned int DPPPerSurface[],
701 double SwathWidthdoubleDPPY[],
702 double SwathWidthdoubleDPPC[],
703 double SwathWidthY[], // per-pipe
704 double SwathWidthC[], // per-pipe
705 unsigned int MaximumSwathHeightY[],
706 unsigned int MaximumSwathHeightC[],
707 unsigned int swath_width_luma_ub[], // per-pipe
708 unsigned int swath_width_chroma_ub[]) // per-pipe
711 enum odm_combine_mode MainSurfaceODMMode;
713 unsigned int surface_width_ub_l;
714 unsigned int surface_height_ub_l;
715 unsigned int surface_width_ub_c;
716 unsigned int surface_height_ub_c;
718 #ifdef __DML_VBA_DEBUG__
719 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
720 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
723 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
724 if (!IsVertical(SourceRotation[k]))
725 SwathWidthdoubleDPPY[k] = ViewportWidth[k];
727 SwathWidthdoubleDPPY[k] = ViewportHeight[k];
729 #ifdef __DML_VBA_DEBUG__
730 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
731 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
734 MainSurfaceODMMode = ODMMode[k];
735 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
736 if (BlendingAndTiming[k] == j)
737 MainSurfaceODMMode = ODMMode[j];
740 if (ForceSingleDPP) {
741 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
743 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
744 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
745 dml_round(HActive[k] / 4.0 * HRatio[k]));
746 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
747 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
748 dml_round(HActive[k] / 2.0 * HRatio[k]));
749 } else if (DPPPerSurface[k] == 2) {
750 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
752 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
756 #ifdef __DML_VBA_DEBUG__
757 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
758 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
759 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
760 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
761 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
764 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
765 SourcePixelFormat[k] == dm_420_12) {
766 SwathWidthC[k] = SwathWidthY[k] / 2;
767 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
769 SwathWidthC[k] = SwathWidthY[k];
770 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
773 if (ForceSingleDPP == true) {
774 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
775 SwathWidthC[k] = SwathWidthdoubleDPPC[k];
778 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
779 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
780 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
781 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
783 #ifdef __DML_VBA_DEBUG__
784 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
785 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
786 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
787 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
788 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
789 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
790 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
791 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
792 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
793 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
796 if (!IsVertical(SourceRotation[k])) {
797 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
798 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
799 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
800 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
801 dml_floor(ViewportXStart[k] +
803 Read256BytesBlockWidthY[k] - 1,
804 Read256BytesBlockWidthY[k]) -
805 dml_floor(ViewportXStart[k],
806 Read256BytesBlockWidthY[k]));
808 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
809 dml_ceil(SwathWidthY[k] - 1,
810 Read256BytesBlockWidthY[k]) +
811 Read256BytesBlockWidthY[k]);
813 if (BytePerPixC[k] > 0) {
814 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
815 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
816 dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
817 Read256BytesBlockWidthC[k] - 1,
818 Read256BytesBlockWidthC[k]) -
819 dml_floor(ViewportXStartC[k],
820 Read256BytesBlockWidthC[k]));
822 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
823 dml_ceil(SwathWidthC[k] - 1,
824 Read256BytesBlockWidthC[k]) +
825 Read256BytesBlockWidthC[k]);
828 swath_width_chroma_ub[k] = 0;
831 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
832 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
834 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
836 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
837 Read256BytesBlockHeightY[k]) -
838 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
840 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
841 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
843 if (BytePerPixC[k] > 0) {
844 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
845 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
846 dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
847 Read256BytesBlockHeightC[k] - 1,
848 Read256BytesBlockHeightC[k]) -
849 dml_floor(ViewportYStartC[k],
850 Read256BytesBlockHeightC[k]));
852 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
853 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
854 Read256BytesBlockHeightC[k]);
857 swath_width_chroma_ub[k] = 0;
861 #ifdef __DML_VBA_DEBUG__
862 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
863 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
864 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
865 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
869 } // CalculateSwathWidth
871 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
872 unsigned int TotalNumberOfActiveDPP,
874 enum output_encoder_class Output,
875 enum dm_swizzle_mode SurfaceTiling,
876 bool CompBufReservedSpaceNeedAdjustment,
877 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
879 bool ret_val = false;
881 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
882 TotalNumberOfActiveDPP == 1 && NoChroma);
883 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
886 if (SurfaceTiling == dm_sw_linear)
889 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
892 #ifdef __DML_VBA_DEBUG__
893 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment);
894 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
895 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val);
901 void dml32_CalculateDETBufferSize(
902 unsigned int DETSizeOverride[],
903 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
905 unsigned int NumberOfActiveSurfaces,
906 bool UnboundedRequestEnabled,
907 unsigned int nomDETInKByte,
908 unsigned int MaxTotalDETInKByte,
909 unsigned int ConfigReturnBufferSizeInKByte,
910 unsigned int MinCompressedBufferSizeInKByte,
911 unsigned int CompressedBufferSegmentSizeInkByteFinal,
912 enum source_format_class SourcePixelFormat[],
913 double ReadBandwidthLuma[],
914 double ReadBandwidthChroma[],
915 unsigned int RoundedUpMaxSwathSizeBytesY[],
916 unsigned int RoundedUpMaxSwathSizeBytesC[],
917 unsigned int DPPPerSurface[],
919 unsigned int DETBufferSizeInKByte[],
920 unsigned int *CompressedBufferSizeInkByte)
922 unsigned int DETBufferSizePoolInKByte;
923 unsigned int NextDETBufferPieceInKByte;
924 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
925 bool NextPotentialSurfaceToAssignDETPieceFound;
926 unsigned int NextSurfaceToAssignDETPiece;
927 double TotalBandwidth;
928 double BandwidthOfSurfacesNotAssignedDETPiece;
929 unsigned int max_minDET;
931 unsigned int minDET_pipe;
934 #ifdef __DML_VBA_DEBUG__
935 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
936 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
937 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
938 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
939 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
940 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
941 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
942 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
943 CompressedBufferSegmentSizeInkByteFinal);
946 // Note: Will use default det size if that fits 2 swaths
947 if (UnboundedRequestEnabled) {
948 if (DETSizeOverride[0] > 0) {
949 DETBufferSizeInKByte[0] = DETSizeOverride[0];
951 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
952 ((double) RoundedUpMaxSwathSizeBytesY[0] +
953 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
955 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
957 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
958 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
959 DETBufferSizeInKByte[k] = nomDETInKByte;
960 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
961 SourcePixelFormat[k] == dm_420_12) {
962 max_minDET = nomDETInKByte - 64;
964 max_minDET = nomDETInKByte;
969 // add DET resource until can hold 2 full swaths
970 while (minDET <= max_minDET && minDET_pipe == 0) {
971 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
972 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
973 minDET_pipe = minDET;
974 minDET = minDET + 64;
977 #ifdef __DML_VBA_DEBUG__
978 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
979 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
980 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
981 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
982 RoundedUpMaxSwathSizeBytesY[k]);
983 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
984 RoundedUpMaxSwathSizeBytesC[k]);
987 if (minDET_pipe == 0) {
988 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
989 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
990 #ifdef __DML_VBA_DEBUG__
991 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
992 __func__, k, minDET_pipe);
996 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
997 DETBufferSizeInKByte[k] = 0;
998 } else if (DETSizeOverride[k] > 0) {
999 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1000 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1001 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1002 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1003 DETBufferSizeInKByte[k] = minDET_pipe;
1004 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1005 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1008 #ifdef __DML_VBA_DEBUG__
1009 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1010 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1011 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1012 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1017 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1018 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1019 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1021 #ifdef __DML_VBA_DEBUG__
1022 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1023 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1024 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1025 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1026 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1028 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1029 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1031 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1032 DETPieceAssignedToThisSurfaceAlready[k] = true;
1033 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1034 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1035 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1036 DETPieceAssignedToThisSurfaceAlready[k] = true;
1037 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1038 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1040 DETPieceAssignedToThisSurfaceAlready[k] = false;
1042 #ifdef __DML_VBA_DEBUG__
1043 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1044 DETPieceAssignedToThisSurfaceAlready[k]);
1045 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1046 BandwidthOfSurfacesNotAssignedDETPiece);
1050 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1051 NextPotentialSurfaceToAssignDETPieceFound = false;
1052 NextSurfaceToAssignDETPiece = 0;
1054 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1055 #ifdef __DML_VBA_DEBUG__
1056 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1057 ReadBandwidthLuma[k]);
1058 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1059 ReadBandwidthChroma[k]);
1060 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1061 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1063 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1064 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1065 NextSurfaceToAssignDETPiece);
1067 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1068 (!NextPotentialSurfaceToAssignDETPieceFound ||
1069 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1070 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1071 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1072 NextSurfaceToAssignDETPiece = k;
1073 NextPotentialSurfaceToAssignDETPieceFound = true;
1075 #ifdef __DML_VBA_DEBUG__
1076 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1077 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1078 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1079 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1083 if (NextPotentialSurfaceToAssignDETPieceFound) {
1084 // Note: To show the banker's rounding behavior in VBA and also the fact
1085 // that the DET buffer size varies due to precision issue
1087 //double tmp1 = ((double) DETBufferSizePoolInKByte *
1088 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1089 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1090 // BandwidthOfSurfacesNotAssignedDETPiece /
1091 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1092 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
1093 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1094 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1095 //BandwidthOfSurfacesNotAssignedDETPiece /
1096 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1099 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1101 NextDETBufferPieceInKByte = dml_min(
1102 dml_round((double) DETBufferSizePoolInKByte *
1103 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1104 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1105 BandwidthOfSurfacesNotAssignedDETPiece /
1106 ((ForceSingleDPP ? 1 :
1107 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1108 (ForceSingleDPP ? 1 :
1109 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1110 dml_floor((double) DETBufferSizePoolInKByte,
1111 (ForceSingleDPP ? 1 :
1112 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1114 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1115 // We should limit the per-pipe DET size to the nominal / max per pipe.
1116 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1117 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1118 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1119 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1120 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1122 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1123 // already has the max per-pipe value
1124 NextDETBufferPieceInKByte = 0;
1128 #ifdef __DML_VBA_DEBUG__
1129 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1130 DETBufferSizePoolInKByte);
1131 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1132 NextSurfaceToAssignDETPiece);
1133 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1134 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1135 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1136 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1137 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1138 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1139 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1140 NextDETBufferPieceInKByte);
1141 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1142 __func__, j, NextSurfaceToAssignDETPiece,
1143 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1146 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1147 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1148 + NextDETBufferPieceInKByte
1149 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1150 #ifdef __DML_VBA_DEBUG__
1151 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1154 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1155 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1156 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1157 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1158 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1161 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1163 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1165 #ifdef __DML_VBA_DEBUG__
1166 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1167 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1168 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1169 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1170 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1173 } // CalculateDETBufferSize
1175 void dml32_CalculateODMMode(
1176 unsigned int MaximumPixelsPerLinePerDSCUnit,
1177 unsigned int HActive,
1178 enum output_encoder_class Output,
1179 enum odm_combine_policy ODMUse,
1180 double StateDispclk,
1183 unsigned int TotalNumberOfActiveDPP,
1184 unsigned int MaxNumDPP,
1186 double DISPCLKDPPCLKDSCCLKDownSpreading,
1187 double DISPCLKRampingMargin,
1188 double DISPCLKDPPCLKVCOSpeed,
1191 bool *TotalAvailablePipesSupport,
1192 unsigned int *NumberOfDPP,
1193 enum odm_combine_mode *ODMMode,
1194 double *RequiredDISPCLKPerSurface)
1197 double SurfaceRequiredDISPCLKWithoutODMCombine;
1198 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1199 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1201 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1202 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1204 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1205 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1207 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1208 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1210 *TotalAvailablePipesSupport = true;
1211 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1213 if (ODMUse == dm_odm_combine_policy_none)
1214 *ODMMode = dm_odm_combine_mode_disabled;
1216 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1219 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1220 // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1222 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1223 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1224 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) {
1225 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1226 *ODMMode = dm_odm_combine_mode_4to1;
1227 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1230 *TotalAvailablePipesSupport = false;
1232 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1233 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1234 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1235 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)))))) {
1236 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1237 *ODMMode = dm_odm_combine_mode_2to1;
1238 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1241 *TotalAvailablePipesSupport = false;
1244 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1247 *TotalAvailablePipesSupport = false;
1251 double dml32_CalculateRequiredDispclk(
1252 enum odm_combine_mode ODMMode,
1254 double DISPCLKDPPCLKDSCCLKDownSpreading,
1255 double DISPCLKRampingMargin,
1256 double DISPCLKDPPCLKVCOSpeed,
1259 double RequiredDispclk = 0.;
1260 double PixelClockAfterODM;
1261 double DISPCLKWithRampingRoundedToDFSGranularity;
1262 double DISPCLKWithoutRampingRoundedToDFSGranularity;
1263 double MaxDispclkRoundedDownToDFSGranularity;
1265 if (ODMMode == dm_odm_combine_mode_4to1)
1266 PixelClockAfterODM = PixelClock / 4;
1267 else if (ODMMode == dm_odm_combine_mode_2to1)
1268 PixelClockAfterODM = PixelClock / 2;
1270 PixelClockAfterODM = PixelClock;
1273 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1274 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1275 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1277 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1278 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1280 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1282 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1283 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1284 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1285 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1287 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1289 return RequiredDispclk;
1292 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1298 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1300 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1303 void dml32_CalculateOutputLink(
1304 double PHYCLKPerState,
1305 double PHYCLKD18PerState,
1306 double PHYCLKD32PerState,
1307 double Downspreading,
1308 bool IsMainSurfaceUsingTheIndicatedTiming,
1309 enum output_encoder_class Output,
1310 enum output_format_class OutputFormat,
1311 unsigned int HTotal,
1312 unsigned int HActive,
1313 double PixelClockBackEnd,
1314 double ForcedOutputLinkBPP,
1315 unsigned int DSCInputBitPerComponent,
1316 unsigned int NumberOfDSCSlices,
1317 double AudioSampleRate,
1318 unsigned int AudioSampleLayout,
1319 enum odm_combine_mode ODMModeNoDSC,
1320 enum odm_combine_mode ODMModeDSC,
1322 unsigned int OutputLinkDPLanes,
1323 enum dm_output_link_dp_rate OutputLinkDPRate,
1327 double *RequiresFEC,
1329 enum dm_output_type *OutputType,
1330 enum dm_output_rate *OutputRate,
1331 unsigned int *RequiredSlots)
1335 *RequiresDSC = false;
1336 *RequiresFEC = false;
1338 *OutputType = dm_output_type_unknown;
1339 *OutputRate = dm_output_rate_unknown;
1341 if (IsMainSurfaceUsingTheIndicatedTiming) {
1342 if (Output == dm_hdmi) {
1343 *RequiresDSC = false;
1344 *RequiresFEC = false;
1345 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1346 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1347 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1348 ODMModeNoDSC, ODMModeDSC, &dummy);
1349 //OutputTypeAndRate = "HDMI";
1350 *OutputType = dm_output_type_hdmi;
1352 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1353 if (DSCEnable == true) {
1354 *RequiresDSC = true;
1355 LinkDSCEnable = true;
1356 if (Output == dm_dp || Output == dm_dp2p0)
1357 *RequiresFEC = true;
1359 *RequiresFEC = false;
1361 *RequiresDSC = false;
1362 LinkDSCEnable = false;
1363 if (Output == dm_dp2p0)
1364 *RequiresFEC = true;
1366 *RequiresFEC = false;
1368 if (Output == dm_dp2p0) {
1370 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1371 PHYCLKD32PerState >= 10000 / 32) {
1372 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1373 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1374 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1375 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1376 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1377 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1378 ForcedOutputLinkBPP == 0) {
1379 *RequiresDSC = true;
1380 LinkDSCEnable = true;
1381 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1382 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1383 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1384 OutputFormat, DSCInputBitPerComponent,
1385 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1386 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1388 //OutputTypeAndRate = Output & " UHBR10";
1389 *OutputType = dm_output_type_dp2p0;
1390 *OutputRate = dm_output_rate_dp_rate_uhbr10;
1392 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1393 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1394 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1395 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1396 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1397 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1398 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1400 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1401 ForcedOutputLinkBPP == 0) {
1402 *RequiresDSC = true;
1403 LinkDSCEnable = true;
1404 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1405 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1406 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1407 OutputFormat, DSCInputBitPerComponent,
1408 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1409 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1411 //OutputTypeAndRate = Output & " UHBR13p5";
1412 *OutputType = dm_output_type_dp2p0;
1413 *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1415 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1416 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1417 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1418 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1419 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1420 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1421 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1422 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1423 *RequiresDSC = true;
1424 LinkDSCEnable = true;
1425 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1426 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1427 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1428 OutputFormat, DSCInputBitPerComponent,
1429 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1430 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432 //OutputTypeAndRate = Output & " UHBR20";
1433 *OutputType = dm_output_type_dp2p0;
1434 *OutputRate = dm_output_rate_dp_rate_uhbr20;
1438 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1439 PHYCLKPerState >= 270) {
1440 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1441 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1442 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1443 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1444 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1445 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1446 ForcedOutputLinkBPP == 0) {
1447 *RequiresDSC = true;
1448 LinkDSCEnable = true;
1449 if (Output == dm_dp)
1450 *RequiresFEC = true;
1451 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1452 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1453 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1454 OutputFormat, DSCInputBitPerComponent,
1455 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1456 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1458 //OutputTypeAndRate = Output & " HBR";
1459 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1460 *OutputRate = dm_output_rate_dp_rate_hbr;
1462 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1463 *OutBpp == 0 && PHYCLKPerState >= 540) {
1464 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1465 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1466 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1467 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1468 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1470 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1471 ForcedOutputLinkBPP == 0) {
1472 *RequiresDSC = true;
1473 LinkDSCEnable = true;
1474 if (Output == dm_dp)
1475 *RequiresFEC = true;
1477 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1478 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1479 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1480 OutputFormat, DSCInputBitPerComponent,
1481 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1482 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1484 //OutputTypeAndRate = Output & " HBR2";
1485 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1486 *OutputRate = dm_output_rate_dp_rate_hbr2;
1488 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1489 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1490 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1491 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1492 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1493 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1496 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1497 *RequiresDSC = true;
1498 LinkDSCEnable = true;
1499 if (Output == dm_dp)
1500 *RequiresFEC = true;
1502 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1503 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1504 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1505 OutputFormat, DSCInputBitPerComponent,
1506 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1507 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1509 //OutputTypeAndRate = Output & " HBR3";
1510 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1511 *OutputRate = dm_output_rate_dp_rate_hbr3;
1518 void dml32_CalculateDPPCLK(
1519 unsigned int NumberOfActiveSurfaces,
1520 double DISPCLKDPPCLKDSCCLKDownSpreading,
1521 double DISPCLKDPPCLKVCOSpeed,
1522 double DPPCLKUsingSingleDPP[],
1523 unsigned int DPPPerSurface[],
1526 double *GlobalDPPCLK,
1531 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1532 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1533 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1535 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1536 for (k = 0; k < NumberOfActiveSurfaces; ++k)
1537 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1540 double dml32_TruncToValidBPP(
1543 unsigned int HTotal,
1544 unsigned int HActive,
1548 enum output_encoder_class Output,
1549 enum output_format_class Format,
1550 unsigned int DSCInputBitPerComponent,
1551 unsigned int DSCSlices,
1552 unsigned int AudioRate,
1553 unsigned int AudioLayout,
1554 enum odm_combine_mode ODMModeNoDSC,
1555 enum odm_combine_mode ODMModeDSC,
1557 unsigned int *RequiredSlots)
1560 unsigned int MinDSCBPP;
1562 unsigned int NonDSCBPP0;
1563 unsigned int NonDSCBPP1;
1564 unsigned int NonDSCBPP2;
1565 unsigned int NonDSCBPP3;
1567 if (Format == dm_420) {
1572 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1573 } else if (Format == dm_444) {
1579 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1581 if (Output == dm_hdmi) {
1590 if (Format == dm_n422) {
1592 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1595 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1598 if (Output == dm_dp2p0) {
1599 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1600 } else if (DSCEnable && Output == dm_dp) {
1601 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1603 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1607 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1608 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1609 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1610 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1611 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1612 MaxLinkBPP = 2 * MaxLinkBPP;
1614 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1615 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1616 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1617 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1618 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1619 MaxLinkBPP = 2 * MaxLinkBPP;
1622 if (DesiredBPP == 0) {
1624 if (MaxLinkBPP < MinDSCBPP)
1626 else if (MaxLinkBPP >= MaxDSCBPP)
1629 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1631 if (MaxLinkBPP >= NonDSCBPP3)
1633 else if (MaxLinkBPP >= NonDSCBPP2)
1635 else if (MaxLinkBPP >= NonDSCBPP1)
1637 else if (MaxLinkBPP >= NonDSCBPP0)
1643 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1644 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1645 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1651 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1654 } // TruncToValidBPP
1656 double dml32_RequiredDTBCLK(
1659 enum output_format_class OutputFormat,
1661 unsigned int DSCSlices,
1662 unsigned int HTotal,
1663 unsigned int HActive,
1664 unsigned int AudioRate,
1665 unsigned int AudioLayout)
1667 double PixelWordRate;
1670 double AverageTribyteRate;
1671 double HActiveTribyteRate;
1673 if (DSCEnable != true)
1674 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1676 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
1677 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1678 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1680 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1681 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1682 HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1683 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1686 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1687 enum odm_combine_mode ODMMode,
1688 unsigned int DSCInputBitPerComponent,
1690 unsigned int HActive,
1691 unsigned int HTotal,
1692 unsigned int NumberOfDSCSlices,
1693 enum output_format_class OutputFormat,
1694 enum output_encoder_class Output,
1696 double PixelClockBackEnd)
1698 unsigned int DSCDelayRequirement_val;
1700 if (DSCEnabled == true && OutputBpp != 0) {
1701 if (ODMMode == dm_odm_combine_mode_4to1) {
1702 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1703 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1704 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1705 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1706 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1707 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1708 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1710 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1711 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1712 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1715 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1716 dml_ceil(DSCDelayRequirement_val / HActive, 1);
1718 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1721 DSCDelayRequirement_val = 0;
1724 #ifdef __DML_VBA_DEBUG__
1725 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
1726 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
1727 dml_print("DML::%s: HActive = %d\n", __func__, HActive);
1728 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
1729 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1730 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
1731 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1734 return DSCDelayRequirement_val;
1737 void dml32_CalculateSurfaceSizeInMall(
1738 unsigned int NumberOfActiveSurfaces,
1739 unsigned int MALLAllocatedForDCN,
1740 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1742 bool ViewportStationary[],
1743 unsigned int ViewportXStartY[],
1744 unsigned int ViewportYStartY[],
1745 unsigned int ViewportXStartC[],
1746 unsigned int ViewportYStartC[],
1747 unsigned int ViewportWidthY[],
1748 unsigned int ViewportHeightY[],
1749 unsigned int BytesPerPixelY[],
1750 unsigned int ViewportWidthC[],
1751 unsigned int ViewportHeightC[],
1752 unsigned int BytesPerPixelC[],
1753 unsigned int SurfaceWidthY[],
1754 unsigned int SurfaceWidthC[],
1755 unsigned int SurfaceHeightY[],
1756 unsigned int SurfaceHeightC[],
1757 unsigned int Read256BytesBlockWidthY[],
1758 unsigned int Read256BytesBlockWidthC[],
1759 unsigned int Read256BytesBlockHeightY[],
1760 unsigned int Read256BytesBlockHeightC[],
1761 unsigned int ReadBlockWidthY[],
1762 unsigned int ReadBlockWidthC[],
1763 unsigned int ReadBlockHeightY[],
1764 unsigned int ReadBlockHeightC[],
1767 unsigned int SurfaceSizeInMALL[],
1768 bool *ExceededMALLSize)
1770 unsigned int TotalSurfaceSizeInMALL = 0;
1773 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1774 if (ViewportStationary[k]) {
1775 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1776 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1777 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1778 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1779 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1780 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1781 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1783 if (ReadBlockWidthC[k] > 0) {
1784 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1785 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1786 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1787 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1788 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1789 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1790 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1791 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1792 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1795 if (DCCEnable[k] == true) {
1796 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1797 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1798 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1799 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1800 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1801 * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1802 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1803 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1804 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1805 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1806 if (Read256BytesBlockWidthC[k] > 0) {
1807 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1808 dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1809 Read256BytesBlockWidthC[k]),
1810 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1811 * Read256BytesBlockWidthC[k] - 1, 8 *
1812 Read256BytesBlockWidthC[k]) -
1813 dml_floor(ViewportXStartC[k], 8 *
1814 Read256BytesBlockWidthC[k])) *
1815 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1816 Read256BytesBlockHeightC[k]),
1817 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1818 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1819 Read256BytesBlockHeightC[k]) -
1820 dml_floor(ViewportYStartC[k], 8 *
1821 Read256BytesBlockHeightC[k])) *
1822 BytesPerPixelC[k] / 256;
1826 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1827 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1828 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1829 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1831 if (ReadBlockWidthC[k] > 0) {
1832 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1833 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1834 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1835 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1836 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1839 if (DCCEnable[k] == true) {
1840 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1841 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1842 Read256BytesBlockWidthY[k] - 1), 8 *
1843 Read256BytesBlockWidthY[k]) *
1844 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1845 Read256BytesBlockHeightY[k] - 1), 8 *
1846 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1848 if (Read256BytesBlockWidthC[k] > 0) {
1849 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1850 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1851 Read256BytesBlockWidthC[k] - 1), 8 *
1852 Read256BytesBlockWidthC[k]) *
1853 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1854 Read256BytesBlockHeightC[k] - 1), 8 *
1855 Read256BytesBlockHeightC[k]) *
1856 BytesPerPixelC[k] / 256;
1862 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1863 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1864 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1866 *ExceededMALLSize = (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true);
1867 } // CalculateSurfaceSizeInMall
1869 void dml32_CalculateVMRowAndSwath(
1870 struct dml32_CalculateVMRowAndSwath *st_vars,
1871 unsigned int NumberOfActiveSurfaces,
1873 unsigned int SurfaceSizeInMALL[],
1874 unsigned int PTEBufferSizeInRequestsLuma,
1875 unsigned int PTEBufferSizeInRequestsChroma,
1876 unsigned int DCCMetaBufferSizeBytes,
1877 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1878 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1879 unsigned int MALLAllocatedForDCN,
1880 double SwathWidthY[],
1881 double SwathWidthC[],
1884 unsigned int HostVMMaxNonCachedPageTableLevels,
1885 unsigned int GPUVMMaxPageTableLevels,
1886 unsigned int GPUVMMinPageSizeKBytes[],
1887 unsigned int HostVMMinPageSize,
1890 bool PTEBufferSizeNotExceeded[],
1891 bool DCCMetaBufferSizeNotExceeded[],
1892 unsigned int dpte_row_width_luma_ub[],
1893 unsigned int dpte_row_width_chroma_ub[],
1894 unsigned int dpte_row_height_luma[],
1895 unsigned int dpte_row_height_chroma[],
1896 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
1897 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
1898 unsigned int meta_req_width[],
1899 unsigned int meta_req_width_chroma[],
1900 unsigned int meta_req_height[],
1901 unsigned int meta_req_height_chroma[],
1902 unsigned int meta_row_width[],
1903 unsigned int meta_row_width_chroma[],
1904 unsigned int meta_row_height[],
1905 unsigned int meta_row_height_chroma[],
1906 unsigned int vm_group_bytes[],
1907 unsigned int dpte_group_bytes[],
1908 unsigned int PixelPTEReqWidthY[],
1909 unsigned int PixelPTEReqHeightY[],
1910 unsigned int PTERequestSizeY[],
1911 unsigned int PixelPTEReqWidthC[],
1912 unsigned int PixelPTEReqHeightC[],
1913 unsigned int PTERequestSizeC[],
1914 unsigned int dpde0_bytes_per_frame_ub_l[],
1915 unsigned int meta_pte_bytes_per_frame_ub_l[],
1916 unsigned int dpde0_bytes_per_frame_ub_c[],
1917 unsigned int meta_pte_bytes_per_frame_ub_c[],
1918 double PrefetchSourceLinesY[],
1919 double PrefetchSourceLinesC[],
1920 double VInitPreFillY[],
1921 double VInitPreFillC[],
1922 unsigned int MaxNumSwathY[],
1923 unsigned int MaxNumSwathC[],
1924 double meta_row_bw[],
1925 double dpte_row_bw[],
1926 double PixelPTEBytesPerRow[],
1927 double PDEAndMetaPTEBytesFrame[],
1928 double MetaRowByte[],
1929 bool use_one_row_for_frame[],
1930 bool use_one_row_for_frame_flip[],
1931 bool UsesMALLForStaticScreen[],
1932 bool PTE_BUFFER_MODE[],
1933 unsigned int BIGK_FRAGMENT_SIZE[])
1937 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1938 if (HostVMEnable == true) {
1939 vm_group_bytes[k] = 512;
1940 dpte_group_bytes[k] = 512;
1941 } else if (GPUVMEnable == true) {
1942 vm_group_bytes[k] = 2048;
1943 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1944 dpte_group_bytes[k] = 512;
1946 dpte_group_bytes[k] = 2048;
1948 vm_group_bytes[k] = 0;
1949 dpte_group_bytes[k] = 0;
1952 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
1953 myPipe[k].SourcePixelFormat == dm_420_12 ||
1954 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
1955 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
1956 !IsVertical(myPipe[k].SourceRotation)) {
1957 st_vars->PTEBufferSizeInRequestsForLuma[k] =
1958 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
1959 st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k];
1961 st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
1962 st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
1965 st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
1966 myPipe[k].ViewportStationary,
1967 myPipe[k].DCCEnable,
1968 myPipe[k].DPPPerSurface,
1969 myPipe[k].BlockHeight256BytesC,
1970 myPipe[k].BlockWidth256BytesC,
1971 myPipe[k].SourcePixelFormat,
1972 myPipe[k].SurfaceTiling,
1973 myPipe[k].BytePerPixelC,
1974 myPipe[k].SourceRotation,
1976 myPipe[k].ViewportHeightChroma,
1977 myPipe[k].ViewportXStartC,
1978 myPipe[k].ViewportYStartC,
1981 HostVMMaxNonCachedPageTableLevels,
1982 GPUVMMaxPageTableLevels,
1983 GPUVMMinPageSizeKBytes[k],
1985 st_vars->PTEBufferSizeInRequestsForChroma[k],
1987 myPipe[k].DCCMetaPitchC,
1988 myPipe[k].BlockWidthC,
1989 myPipe[k].BlockHeightC,
1992 &st_vars->MetaRowByteC[k],
1993 &st_vars->PixelPTEBytesPerRowC[k],
1994 &dpte_row_width_chroma_ub[k],
1995 &dpte_row_height_chroma[k],
1996 &dpte_row_height_linear_chroma[k],
1997 &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k],
1998 &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k],
1999 &st_vars->dpte_row_height_chroma_one_row_per_frame[k],
2000 &meta_req_width_chroma[k],
2001 &meta_req_height_chroma[k],
2002 &meta_row_width_chroma[k],
2003 &meta_row_height_chroma[k],
2004 &PixelPTEReqWidthC[k],
2005 &PixelPTEReqHeightC[k],
2006 &PTERequestSizeC[k],
2007 &dpde0_bytes_per_frame_ub_c[k],
2008 &meta_pte_bytes_per_frame_ub_c[k]);
2010 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2011 myPipe[k].VRatioChroma,
2012 myPipe[k].VTapsChroma,
2013 myPipe[k].InterlaceEnable,
2014 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2015 myPipe[k].SwathHeightC,
2016 myPipe[k].SourceRotation,
2017 myPipe[k].ViewportStationary,
2019 myPipe[k].ViewportHeightChroma,
2020 myPipe[k].ViewportXStartC,
2021 myPipe[k].ViewportYStartC,
2027 st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2028 st_vars->PTEBufferSizeInRequestsForChroma[k] = 0;
2029 st_vars->PixelPTEBytesPerRowC[k] = 0;
2030 st_vars->PDEAndMetaPTEBytesFrameC = 0;
2031 st_vars->MetaRowByteC[k] = 0;
2032 MaxNumSwathC[k] = 0;
2033 PrefetchSourceLinesC[k] = 0;
2034 st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0;
2035 st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2036 st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2039 st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2040 myPipe[k].ViewportStationary,
2041 myPipe[k].DCCEnable,
2042 myPipe[k].DPPPerSurface,
2043 myPipe[k].BlockHeight256BytesY,
2044 myPipe[k].BlockWidth256BytesY,
2045 myPipe[k].SourcePixelFormat,
2046 myPipe[k].SurfaceTiling,
2047 myPipe[k].BytePerPixelY,
2048 myPipe[k].SourceRotation,
2050 myPipe[k].ViewportHeight,
2051 myPipe[k].ViewportXStart,
2052 myPipe[k].ViewportYStart,
2055 HostVMMaxNonCachedPageTableLevels,
2056 GPUVMMaxPageTableLevels,
2057 GPUVMMinPageSizeKBytes[k],
2059 st_vars->PTEBufferSizeInRequestsForLuma[k],
2061 myPipe[k].DCCMetaPitchY,
2062 myPipe[k].BlockWidthY,
2063 myPipe[k].BlockHeightY,
2066 &st_vars->MetaRowByteY[k],
2067 &st_vars->PixelPTEBytesPerRowY[k],
2068 &dpte_row_width_luma_ub[k],
2069 &dpte_row_height_luma[k],
2070 &dpte_row_height_linear_luma[k],
2071 &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k],
2072 &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k],
2073 &st_vars->dpte_row_height_luma_one_row_per_frame[k],
2075 &meta_req_height[k],
2077 &meta_row_height[k],
2078 &PixelPTEReqWidthY[k],
2079 &PixelPTEReqHeightY[k],
2080 &PTERequestSizeY[k],
2081 &dpde0_bytes_per_frame_ub_l[k],
2082 &meta_pte_bytes_per_frame_ub_l[k]);
2084 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2087 myPipe[k].InterlaceEnable,
2088 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2089 myPipe[k].SwathHeightY,
2090 myPipe[k].SourceRotation,
2091 myPipe[k].ViewportStationary,
2093 myPipe[k].ViewportHeight,
2094 myPipe[k].ViewportXStart,
2095 myPipe[k].ViewportYStart,
2101 PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC;
2102 MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k];
2104 if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] &&
2105 st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) {
2106 PTEBufferSizeNotExceeded[k] = true;
2108 PTEBufferSizeNotExceeded[k] = false;
2111 st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2112 st_vars->PTEBufferSizeInRequestsForLuma[k] &&
2113 st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]);
2116 dml32_CalculateMALLUseForStaticScreen(
2117 NumberOfActiveSurfaces,
2118 MALLAllocatedForDCN,
2119 UseMALLForStaticScreen, // mode
2121 st_vars->one_row_per_frame_fits_in_buffer,
2123 UsesMALLForStaticScreen); // boolen
2125 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2126 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2127 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2128 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2129 (GPUVMMinPageSizeKBytes[k] > 64);
2130 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2133 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2134 #ifdef __DML_VBA_DEBUG__
2135 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
2136 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2138 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2139 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2140 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2141 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2143 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2144 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2146 if (use_one_row_for_frame[k]) {
2147 dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k];
2148 dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k];
2149 st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k];
2150 dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k];
2151 dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k];
2152 st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k];
2153 PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k];
2156 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2157 DCCMetaBufferSizeNotExceeded[k] = true;
2159 DCCMetaBufferSizeNotExceeded[k] = false;
2161 PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k];
2162 if (use_one_row_for_frame[k])
2163 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2165 dml32_CalculateRowBandwidth(
2167 myPipe[k].SourcePixelFormat,
2169 myPipe[k].VRatioChroma,
2170 myPipe[k].DCCEnable,
2171 myPipe[k].HTotal / myPipe[k].PixelClock,
2172 st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k],
2174 meta_row_height_chroma[k],
2175 st_vars->PixelPTEBytesPerRowY[k],
2176 st_vars->PixelPTEBytesPerRowC[k],
2177 dpte_row_height_luma[k],
2178 dpte_row_height_chroma[k],
2183 #ifdef __DML_VBA_DEBUG__
2184 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
2185 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
2186 __func__, k, use_one_row_for_frame_flip[k]);
2187 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
2188 __func__, k, UseMALLForPStateChange[k]);
2189 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
2190 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
2191 __func__, k, dpte_row_width_luma_ub[k]);
2192 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]);
2193 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
2194 __func__, k, dpte_row_height_chroma[k]);
2195 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
2196 __func__, k, dpte_row_width_chroma_ub[k]);
2197 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]);
2198 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
2199 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
2200 __func__, k, PTEBufferSizeNotExceeded[k]);
2201 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2202 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2205 } // CalculateVMRowAndSwath
2207 unsigned int dml32_CalculateVMAndRowBytes(
2208 bool ViewportStationary,
2210 unsigned int NumberOfDPPs,
2211 unsigned int BlockHeight256Bytes,
2212 unsigned int BlockWidth256Bytes,
2213 enum source_format_class SourcePixelFormat,
2214 unsigned int SurfaceTiling,
2215 unsigned int BytePerPixel,
2216 enum dm_rotation_angle SourceRotation,
2218 unsigned int ViewportHeight,
2219 unsigned int ViewportXStart,
2220 unsigned int ViewportYStart,
2223 unsigned int HostVMMaxNonCachedPageTableLevels,
2224 unsigned int GPUVMMaxPageTableLevels,
2225 unsigned int GPUVMMinPageSizeKBytes,
2226 unsigned int HostVMMinPageSize,
2227 unsigned int PTEBufferSizeInRequests,
2229 unsigned int DCCMetaPitch,
2230 unsigned int MacroTileWidth,
2231 unsigned int MacroTileHeight,
2234 unsigned int *MetaRowByte,
2235 unsigned int *PixelPTEBytesPerRow,
2236 unsigned int *dpte_row_width_ub,
2237 unsigned int *dpte_row_height,
2238 unsigned int *dpte_row_height_linear,
2239 unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
2240 unsigned int *dpte_row_width_ub_one_row_per_frame,
2241 unsigned int *dpte_row_height_one_row_per_frame,
2242 unsigned int *MetaRequestWidth,
2243 unsigned int *MetaRequestHeight,
2244 unsigned int *meta_row_width,
2245 unsigned int *meta_row_height,
2246 unsigned int *PixelPTEReqWidth,
2247 unsigned int *PixelPTEReqHeight,
2248 unsigned int *PTERequestSize,
2249 unsigned int *DPDE0BytesFrame,
2250 unsigned int *MetaPTEBytesFrame)
2252 unsigned int MPDEBytesFrame;
2253 unsigned int DCCMetaSurfaceBytes;
2254 unsigned int ExtraDPDEBytesFrame;
2255 unsigned int PDEAndMetaPTEBytesFrame;
2256 unsigned int HostVMDynamicLevels = 0;
2257 unsigned int MacroTileSizeBytes;
2258 unsigned int vp_height_meta_ub;
2259 unsigned int vp_height_dpte_ub;
2260 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2262 if (GPUVMEnable == true && HostVMEnable == true) {
2263 if (HostVMMinPageSize < 2048)
2264 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2265 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2266 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2268 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2271 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2272 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2273 if (SurfaceTiling == dm_sw_linear) {
2274 *meta_row_height = 32;
2275 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2276 - dml_floor(ViewportXStart, *MetaRequestWidth);
2277 } else if (!IsVertical(SourceRotation)) {
2278 *meta_row_height = *MetaRequestHeight;
2279 if (ViewportStationary && NumberOfDPPs == 1) {
2280 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2281 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2283 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2285 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2287 *meta_row_height = *MetaRequestWidth;
2288 if (ViewportStationary && NumberOfDPPs == 1) {
2289 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2290 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2292 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2294 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2297 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2298 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2299 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2300 } else if (!IsVertical(SourceRotation)) {
2301 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2303 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2306 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2308 if (GPUVMEnable == true) {
2309 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2310 (8 * 4.0 * 1024), 1) + 1) * 64;
2311 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2313 *MetaPTEBytesFrame = 0;
2317 if (DCCEnable != true) {
2318 *MetaPTEBytesFrame = 0;
2323 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2325 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2326 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2327 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2328 MacroTileHeight - 1, MacroTileHeight) -
2329 dml_floor(ViewportYStart, MacroTileHeight);
2330 } else if (!IsVertical(SourceRotation)) {
2331 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2333 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2335 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2336 (8 * 2097152), 1) + 1);
2337 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2339 *DPDE0BytesFrame = 0;
2340 ExtraDPDEBytesFrame = 0;
2341 vp_height_dpte_ub = 0;
2344 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2346 #ifdef __DML_VBA_DEBUG__
2347 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2348 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2349 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2350 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2351 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2352 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2353 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2354 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2355 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2356 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2357 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2358 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2359 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2360 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2361 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2362 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2363 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2366 if (HostVMEnable == true)
2367 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2369 if (SurfaceTiling == dm_sw_linear) {
2370 *PixelPTEReqHeight = 1;
2371 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2372 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2373 *PTERequestSize = 64;
2374 } else if (GPUVMMinPageSizeKBytes == 4) {
2375 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2376 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2377 *PTERequestSize = 128;
2379 *PixelPTEReqHeight = MacroTileHeight;
2380 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2381 *PTERequestSize = 64;
2383 #ifdef __DML_VBA_DEBUG__
2384 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2385 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2386 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2387 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2388 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2389 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2390 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2393 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2394 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2395 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2396 (double) *PixelPTEReqWidth;
2397 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2400 if (SurfaceTiling == dm_sw_linear) {
2401 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2402 *PixelPTEReqWidth / Pitch), 1));
2403 #ifdef __DML_VBA_DEBUG__
2404 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2405 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2406 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2407 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2408 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2409 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2410 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2411 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2412 *PixelPTEReqWidth / Pitch), 1));
2413 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2415 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2416 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2417 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2419 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2420 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2421 PixelPTEReqWidth_linear / Pitch), 1);
2422 if (*dpte_row_height_linear > 128)
2423 *dpte_row_height_linear = 128;
2425 } else if (!IsVertical(SourceRotation)) {
2426 *dpte_row_height = *PixelPTEReqHeight;
2428 if (GPUVMMinPageSizeKBytes > 64) {
2429 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2430 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2431 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2432 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2433 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2434 dml_floor(ViewportXStart, *PixelPTEReqWidth);
2436 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2440 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2442 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2444 if (ViewportStationary && (NumberOfDPPs == 1)) {
2445 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2446 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2448 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2449 * *PixelPTEReqHeight;
2452 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2455 if (GPUVMEnable != true)
2456 *PixelPTEBytesPerRow = 0;
2457 if (HostVMEnable == true)
2458 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2460 #ifdef __DML_VBA_DEBUG__
2461 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2462 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2463 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2464 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2465 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2466 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2467 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2468 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2469 __func__, *dpte_row_width_ub_one_row_per_frame);
2470 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2471 __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2472 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2473 *MetaPTEBytesFrame);
2476 return PDEAndMetaPTEBytesFrame;
2477 } // CalculateVMAndRowBytes
2479 double dml32_CalculatePrefetchSourceLines(
2483 bool ProgressiveToInterlaceUnitInOPP,
2484 unsigned int SwathHeight,
2485 enum dm_rotation_angle SourceRotation,
2486 bool ViewportStationary,
2488 unsigned int ViewportHeight,
2489 unsigned int ViewportXStart,
2490 unsigned int ViewportYStart,
2493 double *VInitPreFill,
2494 unsigned int *MaxNumSwath)
2497 unsigned int vp_start_rot;
2498 unsigned int sw0_tmp;
2499 unsigned int MaxPartialSwath;
2502 #ifdef __DML_VBA_DEBUG__
2503 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2504 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2505 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2506 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2507 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2508 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2510 if (ProgressiveToInterlaceUnitInOPP)
2511 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2513 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2515 if (ViewportStationary) {
2516 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2517 vp_start_rot = SwathHeight -
2518 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2519 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2520 vp_start_rot = ViewportXStart;
2521 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2522 vp_start_rot = SwathHeight -
2523 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2525 vp_start_rot = ViewportYStart;
2527 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2528 if (sw0_tmp < *VInitPreFill)
2529 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2532 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2534 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2535 if (*VInitPreFill > 1)
2536 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2538 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2540 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2542 #ifdef __DML_VBA_DEBUG__
2543 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2544 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2545 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2546 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2547 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2551 } // CalculatePrefetchSourceLines
2553 void dml32_CalculateMALLUseForStaticScreen(
2554 unsigned int NumberOfActiveSurfaces,
2555 unsigned int MALLAllocatedForDCNFinal,
2556 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2557 unsigned int SurfaceSizeInMALL[],
2558 bool one_row_per_frame_fits_in_buffer[],
2561 bool UsesMALLForStaticScreen[])
2564 unsigned int SurfaceToAddToMALL;
2565 bool CanAddAnotherSurfaceToMALL;
2566 unsigned int TotalSurfaceSizeInMALL;
2568 TotalSurfaceSizeInMALL = 0;
2569 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2570 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2571 if (UsesMALLForStaticScreen[k])
2572 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2573 #ifdef __DML_VBA_DEBUG__
2574 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2575 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
2579 SurfaceToAddToMALL = 0;
2580 CanAddAnotherSurfaceToMALL = true;
2581 while (CanAddAnotherSurfaceToMALL) {
2582 CanAddAnotherSurfaceToMALL = false;
2583 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2584 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2585 !UsesMALLForStaticScreen[k] &&
2586 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2587 one_row_per_frame_fits_in_buffer[k] &&
2588 (!CanAddAnotherSurfaceToMALL ||
2589 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2590 CanAddAnotherSurfaceToMALL = true;
2591 SurfaceToAddToMALL = k;
2592 #ifdef __DML_VBA_DEBUG__
2593 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2594 __func__, k, UseMALLForStaticScreen[k]);
2598 if (CanAddAnotherSurfaceToMALL) {
2599 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2600 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2602 #ifdef __DML_VBA_DEBUG__
2603 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
2604 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
2611 void dml32_CalculateRowBandwidth(
2613 enum source_format_class SourcePixelFormat,
2615 double VRatioChroma,
2618 unsigned int MetaRowByteLuma,
2619 unsigned int MetaRowByteChroma,
2620 unsigned int meta_row_height_luma,
2621 unsigned int meta_row_height_chroma,
2622 unsigned int PixelPTEBytesPerRowLuma,
2623 unsigned int PixelPTEBytesPerRowChroma,
2624 unsigned int dpte_row_height_luma,
2625 unsigned int dpte_row_height_chroma,
2627 double *meta_row_bw,
2628 double *dpte_row_bw)
2630 if (DCCEnable != true) {
2632 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2633 SourcePixelFormat == dm_rgbe_alpha) {
2634 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2635 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2637 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2640 if (GPUVMEnable != true) {
2642 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2643 SourcePixelFormat == dm_rgbe_alpha) {
2644 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2645 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2647 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2651 double dml32_CalculateUrgentLatency(
2652 double UrgentLatencyPixelDataOnly,
2653 double UrgentLatencyPixelMixedWithVMData,
2654 double UrgentLatencyVMDataOnly,
2655 bool DoUrgentLatencyAdjustment,
2656 double UrgentLatencyAdjustmentFabricClockComponent,
2657 double UrgentLatencyAdjustmentFabricClockReference,
2662 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2663 if (DoUrgentLatencyAdjustment == true) {
2664 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2665 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2670 void dml32_CalculateUrgentBurstFactor(
2671 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2672 unsigned int swath_width_luma_ub,
2673 unsigned int swath_width_chroma_ub,
2674 unsigned int SwathHeightY,
2675 unsigned int SwathHeightC,
2677 double UrgentLatency,
2678 double CursorBufferSize,
2679 unsigned int CursorWidth,
2680 unsigned int CursorBPP,
2683 double BytePerPixelInDETY,
2684 double BytePerPixelInDETC,
2685 unsigned int DETBufferSizeY,
2686 unsigned int DETBufferSizeC,
2688 double *UrgentBurstFactorCursor,
2689 double *UrgentBurstFactorLuma,
2690 double *UrgentBurstFactorChroma,
2691 bool *NotEnoughUrgentLatencyHiding)
2693 double LinesInDETLuma;
2694 double LinesInDETChroma;
2695 unsigned int LinesInCursorBuffer;
2696 double CursorBufferSizeInTime;
2697 double DETBufferSizeInTimeLuma;
2698 double DETBufferSizeInTimeChroma;
2700 *NotEnoughUrgentLatencyHiding = 0;
2702 if (CursorWidth > 0) {
2703 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2704 (CursorWidth * CursorBPP / 8.0)), 1.0);
2706 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2707 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2708 *NotEnoughUrgentLatencyHiding = 1;
2709 *UrgentBurstFactorCursor = 0;
2711 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2712 (CursorBufferSizeInTime - UrgentLatency);
2715 *UrgentBurstFactorCursor = 1;
2719 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2720 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2723 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2724 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2725 *NotEnoughUrgentLatencyHiding = 1;
2726 *UrgentBurstFactorLuma = 0;
2728 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2731 *UrgentBurstFactorLuma = 1;
2734 if (BytePerPixelInDETC > 0) {
2735 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2736 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2737 / swath_width_chroma_ub;
2740 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2741 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2742 *NotEnoughUrgentLatencyHiding = 1;
2743 *UrgentBurstFactorChroma = 0;
2745 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2746 / (DETBufferSizeInTimeChroma - UrgentLatency);
2749 *UrgentBurstFactorChroma = 1;
2752 } // CalculateUrgentBurstFactor
2754 void dml32_CalculateDCFCLKDeepSleep(
2755 unsigned int NumberOfActiveSurfaces,
2756 unsigned int BytePerPixelY[],
2757 unsigned int BytePerPixelC[],
2759 double VRatioChroma[],
2760 double SwathWidthY[],
2761 double SwathWidthC[],
2762 unsigned int DPPPerSurface[],
2764 double HRatioChroma[],
2765 double PixelClock[],
2766 double PSCL_THROUGHPUT[],
2767 double PSCL_THROUGHPUT_CHROMA[],
2769 double ReadBandwidthLuma[],
2770 double ReadBandwidthChroma[],
2771 unsigned int ReturnBusWidth,
2774 double *DCFClkDeepSleep)
2777 double DisplayPipeLineDeliveryTimeLuma;
2778 double DisplayPipeLineDeliveryTimeChroma;
2779 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2780 double ReadBandwidth = 0.0;
2782 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2784 if (VRatio[k] <= 1) {
2785 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2788 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2790 if (BytePerPixelC[k] == 0) {
2791 DisplayPipeLineDeliveryTimeChroma = 0;
2793 if (VRatioChroma[k] <= 1) {
2794 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2795 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2797 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2802 if (BytePerPixelC[k] > 0) {
2803 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2804 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2805 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2806 32.0 / DisplayPipeLineDeliveryTimeChroma);
2808 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2809 64.0 / DisplayPipeLineDeliveryTimeLuma;
2811 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2813 #ifdef __DML_VBA_DEBUG__
2814 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2815 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2819 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2820 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2822 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2824 #ifdef __DML_VBA_DEBUG__
2825 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2826 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2827 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2828 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2831 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2832 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2833 #ifdef __DML_VBA_DEBUG__
2834 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2836 } // CalculateDCFCLKDeepSleep
2838 double dml32_CalculateWriteBackDelay(
2839 enum source_format_class WritebackPixelFormat,
2840 double WritebackHRatio,
2841 double WritebackVRatio,
2842 unsigned int WritebackVTaps,
2843 unsigned int WritebackDestinationWidth,
2844 unsigned int WritebackDestinationHeight,
2845 unsigned int WritebackSourceHeight,
2846 unsigned int HTotal)
2848 double CalculateWriteBackDelay;
2850 double Output_lines_last_notclamped;
2851 double WritebackVInit;
2853 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2854 Line_length = dml_max((double) WritebackDestinationWidth,
2855 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2856 Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2857 dml_ceil(((double)WritebackSourceHeight -
2858 (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2859 if (Output_lines_last_notclamped < 0) {
2860 CalculateWriteBackDelay = 0;
2862 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2863 (HTotal - WritebackDestinationWidth) + 80;
2865 return CalculateWriteBackDelay;
2868 void dml32_UseMinimumDCFCLK(
2869 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2871 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2872 unsigned int MaxInterDCNTileRepeaters,
2873 unsigned int MaxPrefetchMode,
2874 double DRAMClockChangeLatencyFinal,
2875 double FCLKChangeLatency,
2876 double SREnterPlusExitTime,
2877 unsigned int ReturnBusWidth,
2878 unsigned int RoundTripPingLatencyCycles,
2879 unsigned int ReorderingBytes,
2880 unsigned int PixelChunkSizeInKByte,
2881 unsigned int MetaChunkSize,
2883 unsigned int GPUVMMaxPageTableLevels,
2885 unsigned int NumberOfActiveSurfaces,
2886 double HostVMMinPageSize,
2887 unsigned int HostVMMaxNonCachedPageTableLevels,
2888 bool DynamicMetadataVMEnabled,
2889 bool ImmediateFlipRequirement,
2890 bool ProgressiveToInterlaceUnitInOPP,
2891 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2892 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2893 unsigned int VTotal[],
2894 unsigned int VActive[],
2895 unsigned int DynamicMetadataTransmittedBytes[],
2896 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2898 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2899 double RequiredDISPCLK[][2],
2900 double UrgLatency[],
2901 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2902 double ProjectedDCFClkDeepSleep[][2],
2903 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2904 unsigned int TotalNumberOfActiveDPP[][2],
2905 unsigned int TotalNumberOfDCCActiveDPP[][2],
2906 unsigned int dpte_group_bytes[],
2907 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2908 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2909 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2910 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2911 unsigned int BytePerPixelY[],
2912 unsigned int BytePerPixelC[],
2913 unsigned int HTotal[],
2914 double PixelClock[],
2915 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2916 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2917 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2918 bool DynamicMetadataEnable[],
2919 double ReadBandwidthLuma[],
2920 double ReadBandwidthChroma[],
2921 double DCFCLKPerState[],
2923 double DCFCLKState[][2])
2925 unsigned int i, j, k;
2926 unsigned int dummy1;
2927 double dummy2, dummy3;
2928 double NormalEfficiency;
2929 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2931 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2932 for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2933 for (j = 0; j <= 1; ++j) {
2934 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2935 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2936 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2937 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2938 double MinimumTWait = 0.0;
2939 double DPTEBandwidth;
2940 double DCFCLKRequiredForAverageBandwidth;
2941 unsigned int ExtraLatencyBytes;
2942 double ExtraLatencyCycles;
2943 double DCFCLKRequiredForPeakBandwidth;
2944 unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2945 double MinimumTvmPlus2Tr0;
2947 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2948 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2949 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2950 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2951 / (15.75 * HTotal[k] / PixelClock[k]);
2954 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
2955 NoOfDPPState[k] = NoOfDPP[i][j][k];
2957 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
2958 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
2960 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
2961 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
2962 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
2963 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
2964 HostVMMaxNonCachedPageTableLevels);
2965 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
2966 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
2967 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2968 double DCFCLKCyclesRequiredInPrefetch;
2969 double PrefetchTime;
2971 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
2972 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
2973 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
2974 * BytePerPixelC[k]) / NormalEfficiency
2976 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
2977 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
2978 / NormalEfficiency / ReturnBusWidth
2979 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
2980 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
2982 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
2983 + PixelDCFCLKCyclesRequiredInPrefetch[k];
2984 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
2985 * HTotal[k] / PixelClock[k];
2986 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
2987 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
2988 UrgLatency[i] * GPUVMMaxPageTableLevels *
2989 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
2991 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
2992 UseMALLForPStateChange[k],
2993 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2995 DRAMClockChangeLatencyFinal,
2998 SREnterPlusExitTime);
3000 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3001 MinimumTWait - UrgLatency[i] *
3002 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3003 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
3004 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3005 DynamicMetadataVMExtraLatency[k];
3007 if (PrefetchTime > 0) {
3008 double ExpectedVRatioPrefetch;
3010 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3011 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3012 DCFCLKCyclesRequiredInPrefetch);
3013 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3014 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3015 PrefetchPixelLinesTime[k] *
3016 dml_max(1.0, ExpectedVRatioPrefetch) *
3017 dml_max(1.0, ExpectedVRatioPrefetch / 4);
3018 if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3019 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3020 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3021 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3022 NormalEfficiency / ReturnBusWidth;
3025 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3027 if (DynamicMetadataEnable[k] == true) {
3032 double AllowedTimeForUrgentExtraLatency;
3034 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3035 MaxInterDCNTileRepeaters,
3036 RequiredDPPCLKPerSurface[i][j][k],
3037 RequiredDISPCLK[i][j],
3038 ProjectedDCFClkDeepSleep[i][j],
3041 VTotal[k] - VActive[k],
3042 DynamicMetadataTransmittedBytes[k],
3043 DynamicMetadataLinesBeforeActiveRequired[k],
3045 ProgressiveToInterlaceUnitInOPP,
3055 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3056 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3057 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3058 if (AllowedTimeForUrgentExtraLatency > 0)
3059 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3060 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3061 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3063 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3066 DCFCLKRequiredForPeakBandwidth = 0;
3067 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3068 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3069 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3071 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3072 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3073 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3074 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3075 double MaximumTvmPlus2Tr0PlusTsw;
3077 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3078 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3079 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3080 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3082 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3083 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3084 MinimumTvmPlus2Tr0 -
3085 PrefetchPixelLinesTime[k] / 4),
3086 (2 * ExtraLatencyCycles +
3087 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3088 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3091 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3092 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3097 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3098 unsigned int TotalNumberOfActiveDPP,
3099 unsigned int PixelChunkSizeInKByte,
3100 unsigned int TotalNumberOfDCCActiveDPP,
3101 unsigned int MetaChunkSize,
3104 unsigned int NumberOfActiveSurfaces,
3105 unsigned int NumberOfDPP[],
3106 unsigned int dpte_group_bytes[],
3107 double HostVMInefficiencyFactor,
3108 double HostVMMinPageSize,
3109 unsigned int HostVMMaxNonCachedPageTableLevels)
3113 unsigned int HostVMDynamicLevels;
3115 if (GPUVMEnable == true && HostVMEnable == true) {
3116 if (HostVMMinPageSize < 2048)
3117 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3118 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3119 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3121 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3123 HostVMDynamicLevels = 0;
3126 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3127 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3129 if (GPUVMEnable == true) {
3130 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3131 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3132 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3138 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3139 unsigned int MaxInterDCNTileRepeaters,
3142 double DCFClkDeepSleep,
3144 unsigned int HTotal,
3145 unsigned int VBlank,
3146 unsigned int DynamicMetadataTransmittedBytes,
3147 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3148 unsigned int InterlaceEnable,
3149 bool ProgressiveToInterlaceUnitInOPP,
3156 unsigned int *VUpdateOffsetPix,
3157 double *VUpdateWidthPix,
3158 double *VReadyOffsetPix)
3160 double TotalRepeaterDelayTime;
3162 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3164 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3165 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
3166 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3167 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3168 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3169 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3170 *Tdmec = HTotal / PixelClock;
3172 if (DynamicMetadataLinesBeforeActiveRequired == 0)
3173 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3175 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3177 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3178 *Tdmsks = *Tdmsks / 2;
3179 #ifdef __DML_VBA_DEBUG__
3180 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3181 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3182 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3184 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3185 __func__, DynamicMetadataLinesBeforeActiveRequired);
3186 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3187 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3188 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3189 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3193 double dml32_CalculateTWait(
3194 unsigned int PrefetchMode,
3195 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3196 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3198 double DRAMClockChangeLatency,
3199 double FCLKChangeLatency,
3200 double UrgentLatency,
3201 double SREnterPlusExitTime)
3205 if (PrefetchMode == 0 &&
3206 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3207 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3208 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3209 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3210 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3211 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3212 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3213 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3214 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3216 TWait = UrgentLatency;
3219 #ifdef __DML_VBA_DEBUG__
3220 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3221 dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3226 // Function: get_return_bw_mbps
3227 // Megabyte per second
3228 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3229 const int VoltageLevel,
3230 const bool HostVMEnable,
3231 const double DCFCLK,
3232 const double FabricClock,
3233 const double DRAMSpeed)
3235 double ReturnBW = 0.;
3236 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3237 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3238 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3239 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3240 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3241 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3242 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3243 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3244 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3245 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3246 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3248 if (HostVMEnable != true)
3249 ReturnBW = PixelDataOnlyReturnBW;
3251 ReturnBW = PixelMixedWithVMDataReturnBW;
3253 #ifdef __DML_VBA_DEBUG__
3254 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3255 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3256 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3257 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3258 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3259 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
3260 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
3261 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
3262 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
3263 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3264 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
3269 // Function: get_return_bw_mbps_vm_only
3270 // Megabyte per second
3271 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3272 const int VoltageLevel,
3273 const double DCFCLK,
3274 const double FabricClock,
3275 const double DRAMSpeed)
3277 double VMDataOnlyReturnBW = dml_min3(
3278 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3279 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3280 * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3281 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3282 * (VoltageLevel < 2 ?
3283 soc->pct_ideal_dram_bw_after_urgent_strobe :
3284 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3285 #ifdef __DML_VBA_DEBUG__
3286 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3287 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3288 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3289 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3290 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3292 return VMDataOnlyReturnBW;
3295 double dml32_CalculateExtraLatency(
3296 unsigned int RoundTripPingLatencyCycles,
3297 unsigned int ReorderingBytes,
3299 unsigned int TotalNumberOfActiveDPP,
3300 unsigned int PixelChunkSizeInKByte,
3301 unsigned int TotalNumberOfDCCActiveDPP,
3302 unsigned int MetaChunkSize,
3306 unsigned int NumberOfActiveSurfaces,
3307 unsigned int NumberOfDPP[],
3308 unsigned int dpte_group_bytes[],
3309 double HostVMInefficiencyFactor,
3310 double HostVMMinPageSize,
3311 unsigned int HostVMMaxNonCachedPageTableLevels)
3313 double ExtraLatencyBytes;
3314 double ExtraLatency;
3316 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3318 TotalNumberOfActiveDPP,
3319 PixelChunkSizeInKByte,
3320 TotalNumberOfDCCActiveDPP,
3324 NumberOfActiveSurfaces,
3327 HostVMInefficiencyFactor,
3329 HostVMMaxNonCachedPageTableLevels);
3331 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3333 #ifdef __DML_VBA_DEBUG__
3334 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3335 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3336 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3337 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3338 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3341 return ExtraLatency;
3342 } // CalculateExtraLatency
3344 bool dml32_CalculatePrefetchSchedule(
3345 double HostVMInefficiencyFactor,
3347 unsigned int DSCDelay,
3348 double DPPCLKDelaySubtotalPlusCNVCFormater,
3349 double DPPCLKDelaySCL,
3350 double DPPCLKDelaySCLLBOnly,
3351 double DPPCLKDelayCNVCCursor,
3352 double DISPCLKDelaySubtotal,
3353 unsigned int DPP_RECOUT_WIDTH,
3354 enum output_format_class OutputFormat,
3355 unsigned int MaxInterDCNTileRepeaters,
3356 unsigned int VStartup,
3357 unsigned int MaxVStartup,
3358 unsigned int GPUVMPageTableLevels,
3361 unsigned int HostVMMaxNonCachedPageTableLevels,
3362 double HostVMMinPageSize,
3363 bool DynamicMetadataEnable,
3364 bool DynamicMetadataVMEnabled,
3365 int DynamicMetadataLinesBeforeActiveRequired,
3366 unsigned int DynamicMetadataTransmittedBytes,
3367 double UrgentLatency,
3368 double UrgentExtraLatency,
3370 unsigned int PDEAndMetaPTEBytesFrame,
3371 unsigned int MetaRowByte,
3372 unsigned int PixelPTEBytesPerRow,
3373 double PrefetchSourceLinesY,
3374 unsigned int SwathWidthY,
3375 unsigned int VInitPreFillY,
3376 unsigned int MaxNumSwathY,
3377 double PrefetchSourceLinesC,
3378 unsigned int SwathWidthC,
3379 unsigned int VInitPreFillC,
3380 unsigned int MaxNumSwathC,
3381 unsigned int swath_width_luma_ub,
3382 unsigned int swath_width_chroma_ub,
3383 unsigned int SwathHeightY,
3384 unsigned int SwathHeightC,
3387 double *DSTXAfterScaler,
3388 double *DSTYAfterScaler,
3389 double *DestinationLinesForPrefetch,
3390 double *PrefetchBandwidth,
3391 double *DestinationLinesToRequestVMInVBlank,
3392 double *DestinationLinesToRequestRowInVBlank,
3393 double *VRatioPrefetchY,
3394 double *VRatioPrefetchC,
3395 double *RequiredPrefetchPixDataBWLuma,
3396 double *RequiredPrefetchPixDataBWChroma,
3397 bool *NotEnoughTimeForDynamicMetadata,
3399 double *prefetch_vmrow_bw,
3403 unsigned int *VUpdateOffsetPix,
3404 double *VUpdateWidthPix,
3405 double *VReadyOffsetPix)
3407 bool MyError = false;
3408 unsigned int DPPCycles, DISPCLKCycles;
3409 double DSTTotalPixelsAfterScaler;
3411 double dst_y_prefetch_equ;
3412 double prefetch_bw_oto;
3415 double Tvm_oto_lines;
3416 double Tr0_oto_lines;
3417 double dst_y_prefetch_oto;
3418 double TimeForFetchingMetaPTE = 0;
3419 double TimeForFetchingRowInVBlank = 0;
3420 double LinesToRequestPrefetchPixelData = 0;
3421 unsigned int HostVMDynamicLevelsTrips;
3425 double Tvm_trips_rounded;
3426 double Tr0_trips_rounded;
3428 double Tpre_rounded;
3429 double prefetch_bw_equ;
3435 double prefetch_sw_bytes;
3438 unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3440 double Tsw_est1 = 0;
3441 double Tsw_est3 = 0;
3443 if (GPUVMEnable == true && HostVMEnable == true)
3444 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3446 HostVMDynamicLevelsTrips = 0;
3447 #ifdef __DML_VBA_DEBUG__
3448 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
3449 dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
3450 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3451 dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3452 __func__, HostVMEnable, HostVMInefficiencyFactor);
3454 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3455 MaxInterDCNTileRepeaters,
3458 myPipe->DCFClkDeepSleep,
3462 DynamicMetadataTransmittedBytes,
3463 DynamicMetadataLinesBeforeActiveRequired,
3464 myPipe->InterlaceEnable,
3465 myPipe->ProgressiveToInterlaceUnitInOPP,
3476 LineTime = myPipe->HTotal / myPipe->PixelClock;
3477 trip_to_mem = UrgentLatency;
3478 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3480 if (DynamicMetadataVMEnabled == true)
3481 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3483 *Tdmdl = TWait + UrgentExtraLatency;
3485 #ifdef __DML_VBA_ALLOW_DELTA__
3486 if (DynamicMetadataEnable == false)
3490 if (DynamicMetadataEnable == true) {
3491 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3492 *NotEnoughTimeForDynamicMetadata = true;
3493 #ifdef __DML_VBA_DEBUG__
3494 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3495 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3497 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3498 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3500 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3504 *NotEnoughTimeForDynamicMetadata = false;
3507 *NotEnoughTimeForDynamicMetadata = false;
3510 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
3511 GPUVMEnable == true ? TWait + Tvm_trips : 0);
3513 if (myPipe->ScalerEnabled)
3514 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
3516 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
3518 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
3520 DISPCLKCycles = DISPCLKDelaySubtotal;
3522 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3525 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3526 myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3528 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3529 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3530 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3531 myPipe->HActive / 2 : 0)
3532 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3534 #ifdef __DML_VBA_DEBUG__
3535 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3536 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3537 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3538 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3539 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
3540 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
3541 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
3542 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3543 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
3546 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3547 *DSTYAfterScaler = 1;
3549 *DSTYAfterScaler = 0;
3551 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3552 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3553 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3554 #ifdef __DML_VBA_DEBUG__
3555 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
3556 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3561 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3563 if (GPUVMEnable == true) {
3564 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3565 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3566 if (GPUVMPageTableLevels >= 3) {
3567 *Tno_bw = UrgentExtraLatency + trip_to_mem *
3568 (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3569 } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
3570 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3571 4.0 * LineTime; // VBA_ERROR
3572 *Tno_bw = UrgentExtraLatency;
3576 } else if (myPipe->DCCEnable == true) {
3577 Tvm_trips_rounded = LineTime / 4.0;
3578 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3581 Tvm_trips_rounded = LineTime / 4.0;
3582 Tr0_trips_rounded = LineTime / 2.0;
3585 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3586 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3588 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3589 || myPipe->SourcePixelFormat == dm_420_12) {
3590 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3592 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3595 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3596 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3597 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3598 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3600 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3601 min_Lsw = dml_max(min_Lsw, 1.0);
3602 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3604 if (GPUVMEnable == true) {
3607 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3610 Tvm_oto = LineTime / 4.0;
3612 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3615 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3616 (LineTime - Tvm_oto)/2.0,
3618 #ifdef __DML_VBA_DEBUG__
3619 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3620 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3621 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3622 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3623 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3626 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3628 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3629 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3630 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3632 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3633 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3635 #ifdef __DML_VBA_DEBUG__
3636 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3637 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3638 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3639 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3640 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3641 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3642 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3643 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3644 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3645 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3646 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3647 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3648 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3649 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3650 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3651 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3652 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3653 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3654 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3655 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3656 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3657 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3658 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3659 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3660 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3661 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3662 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3665 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3666 Tpre_rounded = dst_y_prefetch_equ * LineTime;
3667 #ifdef __DML_VBA_DEBUG__
3668 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3669 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3670 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3671 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3672 __func__, VStartup * LineTime);
3673 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3674 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3675 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3676 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3677 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3678 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3679 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3680 __func__, *DSTYAfterScaler);
3682 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3683 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3685 if (prefetch_sw_bytes < dep_bytes)
3686 prefetch_sw_bytes = 2 * dep_bytes;
3688 *PrefetchBandwidth = 0;
3689 *DestinationLinesToRequestVMInVBlank = 0;
3690 *DestinationLinesToRequestRowInVBlank = 0;
3691 *VRatioPrefetchY = 0;
3692 *VRatioPrefetchC = 0;
3693 *RequiredPrefetchPixDataBWLuma = 0;
3694 if (dst_y_prefetch_equ > 1) {
3695 double PrefetchBandwidth1;
3696 double PrefetchBandwidth2;
3697 double PrefetchBandwidth3;
3698 double PrefetchBandwidth4;
3700 if (Tpre_rounded - *Tno_bw > 0) {
3701 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3702 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3703 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3704 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3706 PrefetchBandwidth1 = 0;
3708 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3709 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3710 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3711 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3712 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3715 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3716 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3717 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3719 PrefetchBandwidth2 = 0;
3721 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3722 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3723 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3724 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3726 PrefetchBandwidth3 = 0;
3729 if (VStartup == MaxVStartup &&
3730 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3731 LineTime - Tvm_trips_rounded > 0) {
3732 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3733 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3736 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3737 PrefetchBandwidth4 = prefetch_sw_bytes /
3738 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3740 PrefetchBandwidth4 = 0;
3743 #ifdef __DML_VBA_DEBUG__
3744 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3745 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3746 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3747 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3748 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3749 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3750 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3751 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3752 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3759 if (PrefetchBandwidth1 > 0) {
3760 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3761 >= Tvm_trips_rounded
3762 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3763 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3772 if (PrefetchBandwidth2 > 0) {
3773 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3774 >= Tvm_trips_rounded
3775 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3776 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3785 if (PrefetchBandwidth3 > 0) {
3786 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3787 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3788 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3789 Tr0_trips_rounded) {
3799 prefetch_bw_equ = PrefetchBandwidth1;
3801 prefetch_bw_equ = PrefetchBandwidth2;
3803 prefetch_bw_equ = PrefetchBandwidth3;
3805 prefetch_bw_equ = PrefetchBandwidth4;
3807 #ifdef __DML_VBA_DEBUG__
3808 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3809 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3810 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3811 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3814 if (prefetch_bw_equ > 0) {
3815 if (GPUVMEnable == true) {
3816 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3817 HostVMInefficiencyFactor / prefetch_bw_equ,
3818 Tvm_trips, LineTime / 4);
3820 Tvm_equ = LineTime / 4;
3823 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3824 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3825 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3826 (LineTime - Tvm_equ) / 2, LineTime / 4);
3828 Tr0_equ = (LineTime - Tvm_equ) / 2;
3833 #ifdef __DML_VBA_DEBUG__
3834 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3839 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3840 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3841 TimeForFetchingMetaPTE = Tvm_oto;
3842 TimeForFetchingRowInVBlank = Tr0_oto;
3843 *PrefetchBandwidth = prefetch_bw_oto;
3845 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3846 TimeForFetchingMetaPTE = Tvm_equ;
3847 TimeForFetchingRowInVBlank = Tr0_equ;
3848 *PrefetchBandwidth = prefetch_bw_equ;
3851 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3853 *DestinationLinesToRequestRowInVBlank =
3854 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3856 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3857 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3859 #ifdef __DML_VBA_DEBUG__
3860 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3861 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3862 __func__, *DestinationLinesToRequestVMInVBlank);
3863 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3864 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3865 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3866 __func__, *DestinationLinesToRequestRowInVBlank);
3867 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3868 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3871 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3872 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3873 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3874 #ifdef __DML_VBA_DEBUG__
3875 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3876 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3877 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3879 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3880 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3882 dml_max((double) PrefetchSourceLinesY /
3883 LinesToRequestPrefetchPixelData,
3884 (double) MaxNumSwathY * SwathHeightY /
3885 (LinesToRequestPrefetchPixelData -
3886 (VInitPreFillY - 3.0) / 2.0));
3887 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3890 *VRatioPrefetchY = 0;
3892 #ifdef __DML_VBA_DEBUG__
3893 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3894 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3895 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3899 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3900 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3902 #ifdef __DML_VBA_DEBUG__
3903 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3904 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3905 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3907 if ((SwathHeightC > 4)) {
3908 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3910 dml_max(*VRatioPrefetchC,
3911 (double) MaxNumSwathC * SwathHeightC /
3912 (LinesToRequestPrefetchPixelData -
3913 (VInitPreFillC - 3.0) / 2.0));
3914 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3917 *VRatioPrefetchC = 0;
3919 #ifdef __DML_VBA_DEBUG__
3920 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3921 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3922 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3926 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3927 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3930 #ifdef __DML_VBA_DEBUG__
3931 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3932 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3933 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3934 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3935 __func__, *RequiredPrefetchPixDataBWLuma);
3937 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3938 LinesToRequestPrefetchPixelData
3939 * myPipe->BytePerPixelC
3940 * swath_width_chroma_ub / LineTime;
3943 #ifdef __DML_VBA_DEBUG__
3944 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3945 __func__, LinesToRequestPrefetchPixelData);
3947 *VRatioPrefetchY = 0;
3948 *VRatioPrefetchC = 0;
3949 *RequiredPrefetchPixDataBWLuma = 0;
3950 *RequiredPrefetchPixDataBWChroma = 0;
3952 #ifdef __DML_VBA_DEBUG__
3953 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3954 (double)LinesToRequestPrefetchPixelData * LineTime +
3955 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
3956 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
3957 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
3958 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
3959 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
3960 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
3961 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
3962 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
3963 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
3964 PixelPTEBytesPerRow);
3968 #ifdef __DML_VBA_DEBUG__
3969 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
3970 __func__, dst_y_prefetch_equ);
3975 double prefetch_vm_bw;
3976 double prefetch_row_bw;
3978 if (PDEAndMetaPTEBytesFrame == 0) {
3980 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
3981 #ifdef __DML_VBA_DEBUG__
3982 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3983 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3984 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3985 __func__, *DestinationLinesToRequestVMInVBlank);
3986 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3988 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
3989 (*DestinationLinesToRequestVMInVBlank * LineTime);
3990 #ifdef __DML_VBA_DEBUG__
3991 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
3996 #ifdef __DML_VBA_DEBUG__
3997 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
3998 __func__, *DestinationLinesToRequestVMInVBlank);
4002 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4003 prefetch_row_bw = 0;
4004 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
4005 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4006 (*DestinationLinesToRequestRowInVBlank * LineTime);
4008 #ifdef __DML_VBA_DEBUG__
4009 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4010 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4011 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4012 __func__, *DestinationLinesToRequestRowInVBlank);
4013 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4016 prefetch_row_bw = 0;
4018 #ifdef __DML_VBA_DEBUG__
4019 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4020 __func__, *DestinationLinesToRequestRowInVBlank);
4024 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4028 *PrefetchBandwidth = 0;
4029 TimeForFetchingMetaPTE = 0;
4030 TimeForFetchingRowInVBlank = 0;
4031 *DestinationLinesToRequestVMInVBlank = 0;
4032 *DestinationLinesToRequestRowInVBlank = 0;
4033 *DestinationLinesForPrefetch = 0;
4034 LinesToRequestPrefetchPixelData = 0;
4035 *VRatioPrefetchY = 0;
4036 *VRatioPrefetchC = 0;
4037 *RequiredPrefetchPixDataBWLuma = 0;
4038 *RequiredPrefetchPixDataBWChroma = 0;
4042 } // CalculatePrefetchSchedule
4044 void dml32_CalculateFlipSchedule(
4045 double HostVMInefficiencyFactor,
4046 double UrgentExtraLatency,
4047 double UrgentLatency,
4048 unsigned int GPUVMMaxPageTableLevels,
4050 unsigned int HostVMMaxNonCachedPageTableLevels,
4052 double HostVMMinPageSize,
4053 double PDEAndMetaPTEBytesPerFrame,
4054 double MetaRowBytes,
4055 double DPTEBytesPerRow,
4056 double BandwidthAvailableForImmediateFlip,
4057 unsigned int TotImmediateFlipBytes,
4058 enum source_format_class SourcePixelFormat,
4061 double VRatioChroma,
4064 unsigned int dpte_row_height,
4065 unsigned int meta_row_height,
4066 unsigned int dpte_row_height_chroma,
4067 unsigned int meta_row_height_chroma,
4068 bool use_one_row_for_frame_flip,
4071 double *DestinationLinesToRequestVMInImmediateFlip,
4072 double *DestinationLinesToRequestRowInImmediateFlip,
4073 double *final_flip_bw,
4074 bool *ImmediateFlipSupportedForPipe)
4076 double min_row_time = 0.0;
4077 unsigned int HostVMDynamicLevelsTrips;
4078 double TimeForFetchingMetaPTEImmediateFlip;
4079 double TimeForFetchingRowInVBlankImmediateFlip;
4080 double ImmediateFlipBW;
4082 if (GPUVMEnable == true && HostVMEnable == true)
4083 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4085 HostVMDynamicLevelsTrips = 0;
4087 #ifdef __DML_VBA_DEBUG__
4088 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4089 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4092 if (TotImmediateFlipBytes > 0) {
4093 if (use_one_row_for_frame_flip) {
4094 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4095 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4097 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4098 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4100 if (GPUVMEnable == true) {
4101 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4102 HostVMInefficiencyFactor / ImmediateFlipBW,
4103 UrgentExtraLatency + UrgentLatency *
4104 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4107 TimeForFetchingMetaPTEImmediateFlip = 0;
4109 if ((GPUVMEnable == true || DCCEnable == true)) {
4110 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4111 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4112 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4114 TimeForFetchingRowInVBlankImmediateFlip = 0;
4117 *DestinationLinesToRequestVMInImmediateFlip =
4118 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4119 *DestinationLinesToRequestRowInImmediateFlip =
4120 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4122 if (GPUVMEnable == true) {
4123 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4124 (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4125 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4126 (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4127 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4128 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4129 (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4134 TimeForFetchingMetaPTEImmediateFlip = 0;
4135 TimeForFetchingRowInVBlankImmediateFlip = 0;
4136 *DestinationLinesToRequestVMInImmediateFlip = 0;
4137 *DestinationLinesToRequestRowInImmediateFlip = 0;
4141 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4142 if (GPUVMEnable == true && DCCEnable != true) {
4143 min_row_time = dml_min(dpte_row_height *
4144 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4145 } else if (GPUVMEnable != true && DCCEnable == true) {
4146 min_row_time = dml_min(meta_row_height *
4147 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4149 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4150 LineTime / VRatio, dpte_row_height_chroma * LineTime /
4151 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4154 if (GPUVMEnable == true && DCCEnable != true) {
4155 min_row_time = dpte_row_height * LineTime / VRatio;
4156 } else if (GPUVMEnable != true && DCCEnable == true) {
4157 min_row_time = meta_row_height * LineTime / VRatio;
4160 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4164 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4165 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4167 *ImmediateFlipSupportedForPipe = false;
4169 *ImmediateFlipSupportedForPipe = true;
4172 #ifdef __DML_VBA_DEBUG__
4173 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4174 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4175 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4176 __func__, *DestinationLinesToRequestVMInImmediateFlip);
4177 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4178 __func__, *DestinationLinesToRequestRowInImmediateFlip);
4179 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4180 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4181 __func__, TimeForFetchingRowInVBlankImmediateFlip);
4182 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4183 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4185 } // CalculateFlipSchedule
4187 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4188 struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
4189 bool USRRetrainingRequiredFinal,
4190 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
4191 unsigned int PrefetchMode,
4192 unsigned int NumberOfActiveSurfaces,
4193 unsigned int MaxLineBufferLines,
4194 unsigned int LineBufferSize,
4195 unsigned int WritebackInterfaceBufferSize,
4198 bool SynchronizeTimingsFinal,
4199 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
4201 unsigned int dpte_group_bytes[],
4202 unsigned int meta_row_height[],
4203 unsigned int meta_row_height_chroma[],
4204 SOCParametersList mmSOCParameters,
4205 unsigned int WritebackChunkSize,
4207 double DCFClkDeepSleep,
4208 unsigned int DETBufferSizeY[],
4209 unsigned int DETBufferSizeC[],
4210 unsigned int SwathHeightY[],
4211 unsigned int SwathHeightC[],
4212 unsigned int LBBitPerPixel[],
4213 double SwathWidthY[],
4214 double SwathWidthC[],
4216 double HRatioChroma[],
4217 unsigned int VTaps[],
4218 unsigned int VTapsChroma[],
4220 double VRatioChroma[],
4221 unsigned int HTotal[],
4222 unsigned int VTotal[],
4223 unsigned int VActive[],
4224 double PixelClock[],
4225 unsigned int BlendingAndTiming[],
4226 unsigned int DPPPerSurface[],
4227 double BytePerPixelDETY[],
4228 double BytePerPixelDETC[],
4229 double DSTXAfterScaler[],
4230 double DSTYAfterScaler[],
4231 bool WritebackEnable[],
4232 enum source_format_class WritebackPixelFormat[],
4233 double WritebackDestinationWidth[],
4234 double WritebackDestinationHeight[],
4235 double WritebackSourceHeight[],
4236 bool UnboundedRequestEnabled,
4237 unsigned int CompressedBufferSizeInkByte,
4240 Watermarks *Watermark,
4241 enum clock_change_support *DRAMClockChangeSupport,
4242 double MaxActiveDRAMClockChangeLatencySupported[],
4243 unsigned int SubViewportLinesNeededInMALL[],
4244 enum dm_fclock_change_support *FCLKChangeSupport,
4245 double *MinActiveFCLKChangeLatencySupported,
4246 bool *USRRetrainingSupport,
4247 double ActiveDRAMClockChangeLatencyMargin[])
4249 unsigned int i, j, k;
4251 st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0;
4252 st_vars->DRAMClockChangeSupportNumber = 0;
4253 st_vars->DRAMClockChangeMethod = 0;
4254 st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4255 st_vars->MinActiveFCLKChangeMargin = 0.;
4256 st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4257 st_vars->TotalPixelBW = 0.0;
4258 st_vars->TotalActiveWriteback = 0;
4260 Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4261 Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4262 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4263 Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark;
4264 Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark;
4265 Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4266 + 10 / DCFClkDeepSleep;
4267 Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4268 + 10 / DCFClkDeepSleep;
4269 Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4270 + 10 / DCFClkDeepSleep;
4271 Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4272 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4274 #ifdef __DML_VBA_DEBUG__
4275 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4276 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4277 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4278 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark);
4279 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark);
4280 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark);
4281 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark);
4282 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark);
4283 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark);
4284 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark);
4285 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4286 __func__, Watermark->Z8StutterEnterPlusExitWatermark);
4290 st_vars->TotalActiveWriteback = 0;
4291 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4292 if (WritebackEnable[k] == true)
4293 st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1;
4296 if (st_vars->TotalActiveWriteback <= 1) {
4297 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4299 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4300 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4302 if (USRRetrainingRequiredFinal)
4303 Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
4304 + mmSOCParameters.USRRetrainingLatency;
4306 if (st_vars->TotalActiveWriteback <= 1) {
4307 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4308 + mmSOCParameters.WritebackLatency;
4309 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4310 + mmSOCParameters.WritebackLatency;
4312 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4313 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4314 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4315 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK;
4318 if (USRRetrainingRequiredFinal)
4319 Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark
4320 + mmSOCParameters.USRRetrainingLatency;
4322 if (USRRetrainingRequiredFinal)
4323 Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark
4324 + mmSOCParameters.USRRetrainingLatency;
4326 #ifdef __DML_VBA_DEBUG__
4327 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4328 __func__, Watermark->WritebackDRAMClockChangeWatermark);
4329 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark);
4330 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark);
4331 dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal);
4332 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4335 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4336 st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
4337 SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
4340 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4342 st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
4343 st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
4346 #ifdef __DML_VBA_DEBUG__
4347 dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines);
4348 dml_print("DML::%s: k=%d, LineBufferSize = %d\n", __func__, k, LineBufferSize);
4349 dml_print("DML::%s: k=%d, LBBitPerPixel = %d\n", __func__, k, LBBitPerPixel[k]);
4350 dml_print("DML::%s: k=%d, HRatio = %f\n", __func__, k, HRatio[k]);
4351 dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]);
4354 st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
4355 st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
4356 st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k];
4358 if (UnboundedRequestEnabled) {
4359 st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY
4360 + CompressedBufferSizeInkByte * 1024
4361 * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
4362 / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW;
4365 st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4366 st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]);
4367 st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
4369 st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY
4370 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
4372 if (NumberOfActiveSurfaces > 1) {
4373 st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY
4374 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
4375 / PixelClock[k] / VRatio[k];
4378 if (BytePerPixelDETC[k] > 0) {
4379 st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4380 st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]);
4381 st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
4383 st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC
4384 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
4386 if (NumberOfActiveSurfaces > 1) {
4387 st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC
4388 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
4389 / PixelClock[k] / VRatioChroma[k];
4391 st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY,
4392 st_vars->ActiveClockChangeLatencyHidingC);
4394 st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY;
4397 ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4398 - Watermark->DRAMClockChangeWatermark;
4399 st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4400 - Watermark->FCLKChangeWatermark;
4401 st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
4403 if (WritebackEnable[k]) {
4404 st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
4405 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
4406 / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
4407 if (WritebackPixelFormat[k] == dm_444_64)
4408 st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2;
4410 st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding
4411 - Watermark->WritebackDRAMClockChangeWatermark;
4413 st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding
4414 - Watermark->WritebackFCLKChangeWatermark;
4416 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4417 st_vars->WritebackFCLKChangeLatencyMargin);
4418 st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k],
4419 st_vars->WritebackDRAMClockChangeLatencyMargin);
4421 MaxActiveDRAMClockChangeLatencySupported[k] =
4422 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4424 (ActiveDRAMClockChangeLatencyMargin[k]
4425 + mmSOCParameters.DRAMClockChangeLatency);
4428 for (i = 0; i < NumberOfActiveSurfaces; ++i) {
4429 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
4431 (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) ||
4432 (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) ||
4433 (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) ||
4434 (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] &&
4435 HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
4436 VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4437 (DRRDisplay[i] || DRRDisplay[j]))) {
4438 st_vars->SynchronizedSurfaces[i][j] = true;
4440 st_vars->SynchronizedSurfaces[i][j] = false;
4445 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4446 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4447 (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4448 st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) {
4449 st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4450 st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k];
4451 st_vars->SurfaceWithMinActiveFCLKChangeMargin = k;
4455 *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4457 st_vars->SameTimingForFCLKChange = true;
4458 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4459 if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) {
4460 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4461 (st_vars->SameTimingForFCLKChange ||
4462 st_vars->ActiveFCLKChangeLatencyMargin[k] <
4463 st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4464 st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k];
4466 st_vars->SameTimingForFCLKChange = false;
4470 if (st_vars->MinActiveFCLKChangeMargin > 0) {
4471 *FCLKChangeSupport = dm_fclock_change_vactive;
4472 } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4473 (PrefetchMode <= 1)) {
4474 *FCLKChangeSupport = dm_fclock_change_vblank;
4476 *FCLKChangeSupport = dm_fclock_change_unsupported;
4479 *USRRetrainingSupport = true;
4480 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4481 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4482 (st_vars->USRRetrainingLatencyMargin[k] < 0)) {
4483 *USRRetrainingSupport = false;
4487 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4488 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4489 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4490 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4491 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4492 if (PrefetchMode > 0) {
4493 st_vars->DRAMClockChangeSupportNumber = 2;
4494 } else if (st_vars->DRAMClockChangeSupportNumber == 0) {
4495 st_vars->DRAMClockChangeSupportNumber = 1;
4496 st_vars->LastSurfaceWithoutMargin = k;
4497 } else if (st_vars->DRAMClockChangeSupportNumber == 1 &&
4498 !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) {
4499 st_vars->DRAMClockChangeSupportNumber = 2;
4504 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4505 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4506 st_vars->DRAMClockChangeMethod = 1;
4507 else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4508 st_vars->DRAMClockChangeMethod = 2;
4511 if (st_vars->DRAMClockChangeMethod == 0) {
4512 if (st_vars->DRAMClockChangeSupportNumber == 0)
4513 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4514 else if (st_vars->DRAMClockChangeSupportNumber == 1)
4515 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4517 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4518 } else if (st_vars->DRAMClockChangeMethod == 1) {
4519 if (st_vars->DRAMClockChangeSupportNumber == 0)
4520 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4521 else if (st_vars->DRAMClockChangeSupportNumber == 1)
4522 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4524 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4526 if (st_vars->DRAMClockChangeSupportNumber == 0)
4527 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4528 else if (st_vars->DRAMClockChangeSupportNumber == 1)
4529 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4531 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4534 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4535 unsigned int dst_y_pstate;
4536 unsigned int src_y_pstate_l;
4537 unsigned int src_y_pstate_c;
4538 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4540 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
4541 src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
4542 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k];
4543 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
4545 #ifdef __DML_VBA_DEBUG__
4546 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
4547 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
4548 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
4549 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
4550 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]);
4551 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
4552 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
4553 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
4554 dml_print("DML::%s: k=%d, meta_row_height = %d\n", __func__, k, meta_row_height[k]);
4555 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
4557 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4559 if (BytePerPixelDETC[k] > 0) {
4560 src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
4561 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k];
4562 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
4563 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4565 #ifdef __DML_VBA_DEBUG__
4566 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
4567 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
4568 dml_print("DML::%s: k=%d, meta_row_height_chroma = %d\n", __func__, k, meta_row_height_chroma[k]);
4569 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
4573 #ifdef __DML_VBA_DEBUG__
4574 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4575 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4576 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4577 __func__, *MinActiveFCLKChangeLatencySupported);
4578 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4580 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4582 double dml32_CalculateWriteBackDISPCLK(
4583 enum source_format_class WritebackPixelFormat,
4585 double WritebackHRatio,
4586 double WritebackVRatio,
4587 unsigned int WritebackHTaps,
4588 unsigned int WritebackVTaps,
4589 unsigned int WritebackSourceWidth,
4590 unsigned int WritebackDestinationWidth,
4591 unsigned int HTotal,
4592 unsigned int WritebackLineBufferSize,
4593 double DISPCLKDPPCLKVCOSpeed)
4595 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4597 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4598 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4599 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4600 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4601 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4604 void dml32_CalculateMinAndMaxPrefetchMode(
4605 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
4606 unsigned int *MinPrefetchMode,
4607 unsigned int *MaxPrefetchMode)
4609 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4610 *MinPrefetchMode = 3;
4611 *MaxPrefetchMode = 3;
4612 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4613 *MinPrefetchMode = 2;
4614 *MaxPrefetchMode = 2;
4615 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4616 *MinPrefetchMode = 1;
4617 *MaxPrefetchMode = 1;
4618 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4619 *MinPrefetchMode = 0;
4620 *MaxPrefetchMode = 0;
4621 } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4622 dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4623 *MinPrefetchMode = 0;
4624 *MaxPrefetchMode = 3;
4626 *MinPrefetchMode = 0;
4627 *MaxPrefetchMode = 3;
4629 } // CalculateMinAndMaxPrefetchMode
4631 void dml32_CalculatePixelDeliveryTimes(
4632 unsigned int NumberOfActiveSurfaces,
4634 double VRatioChroma[],
4635 double VRatioPrefetchY[],
4636 double VRatioPrefetchC[],
4637 unsigned int swath_width_luma_ub[],
4638 unsigned int swath_width_chroma_ub[],
4639 unsigned int DPPPerSurface[],
4641 double HRatioChroma[],
4642 double PixelClock[],
4643 double PSCL_THROUGHPUT[],
4644 double PSCL_THROUGHPUT_CHROMA[],
4646 unsigned int BytePerPixelC[],
4647 enum dm_rotation_angle SourceRotation[],
4648 unsigned int NumberOfCursors[],
4649 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
4650 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
4651 unsigned int BlockWidth256BytesY[],
4652 unsigned int BlockHeight256BytesY[],
4653 unsigned int BlockWidth256BytesC[],
4654 unsigned int BlockHeight256BytesC[],
4657 double DisplayPipeLineDeliveryTimeLuma[],
4658 double DisplayPipeLineDeliveryTimeChroma[],
4659 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
4660 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
4661 double DisplayPipeRequestDeliveryTimeLuma[],
4662 double DisplayPipeRequestDeliveryTimeChroma[],
4663 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4664 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4665 double CursorRequestDeliveryTime[],
4666 double CursorRequestDeliveryTimePrefetch[])
4668 double req_per_swath_ub;
4671 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4673 #ifdef __DML_VBA_DEBUG__
4674 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4675 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4676 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4677 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4678 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4679 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4680 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4681 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4682 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4683 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4684 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4687 if (VRatio[k] <= 1) {
4688 DisplayPipeLineDeliveryTimeLuma[k] =
4689 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4691 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4694 if (BytePerPixelC[k] == 0) {
4695 DisplayPipeLineDeliveryTimeChroma[k] = 0;
4697 if (VRatioChroma[k] <= 1) {
4698 DisplayPipeLineDeliveryTimeChroma[k] =
4699 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4701 DisplayPipeLineDeliveryTimeChroma[k] =
4702 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4706 if (VRatioPrefetchY[k] <= 1) {
4707 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4708 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4710 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4711 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4714 if (BytePerPixelC[k] == 0) {
4715 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4717 if (VRatioPrefetchC[k] <= 1) {
4718 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4719 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4721 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4722 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4725 #ifdef __DML_VBA_DEBUG__
4726 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4727 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4728 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4729 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4730 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4731 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4732 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4733 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4737 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4738 if (!IsVertical(SourceRotation[k]))
4739 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4741 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4742 #ifdef __DML_VBA_DEBUG__
4743 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4746 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4747 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4748 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4749 if (BytePerPixelC[k] == 0) {
4750 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4751 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4753 if (!IsVertical(SourceRotation[k]))
4754 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4756 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4757 #ifdef __DML_VBA_DEBUG__
4758 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4760 DisplayPipeRequestDeliveryTimeChroma[k] =
4761 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4762 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4763 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4765 #ifdef __DML_VBA_DEBUG__
4766 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4767 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4768 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4769 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4770 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4771 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4772 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4773 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4777 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4778 unsigned int cursor_req_per_width;
4780 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4782 if (NumberOfCursors[k] > 0) {
4783 if (VRatio[k] <= 1) {
4784 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4785 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4787 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4788 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4790 if (VRatioPrefetchY[k] <= 1) {
4791 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4792 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4794 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4795 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4798 CursorRequestDeliveryTime[k] = 0;
4799 CursorRequestDeliveryTimePrefetch[k] = 0;
4801 #ifdef __DML_VBA_DEBUG__
4802 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4803 __func__, k, NumberOfCursors[k]);
4804 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4805 __func__, k, CursorRequestDeliveryTime[k]);
4806 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4807 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4810 } // CalculatePixelDeliveryTimes
4812 void dml32_CalculateMetaAndPTETimes(
4813 bool use_one_row_for_frame[],
4814 unsigned int NumberOfActiveSurfaces,
4816 unsigned int MetaChunkSize,
4817 unsigned int MinMetaChunkSizeBytes,
4818 unsigned int HTotal[],
4820 double VRatioChroma[],
4821 double DestinationLinesToRequestRowInVBlank[],
4822 double DestinationLinesToRequestRowInImmediateFlip[],
4824 double PixelClock[],
4825 unsigned int BytePerPixelY[],
4826 unsigned int BytePerPixelC[],
4827 enum dm_rotation_angle SourceRotation[],
4828 unsigned int dpte_row_height[],
4829 unsigned int dpte_row_height_chroma[],
4830 unsigned int meta_row_width[],
4831 unsigned int meta_row_width_chroma[],
4832 unsigned int meta_row_height[],
4833 unsigned int meta_row_height_chroma[],
4834 unsigned int meta_req_width[],
4835 unsigned int meta_req_width_chroma[],
4836 unsigned int meta_req_height[],
4837 unsigned int meta_req_height_chroma[],
4838 unsigned int dpte_group_bytes[],
4839 unsigned int PTERequestSizeY[],
4840 unsigned int PTERequestSizeC[],
4841 unsigned int PixelPTEReqWidthY[],
4842 unsigned int PixelPTEReqHeightY[],
4843 unsigned int PixelPTEReqWidthC[],
4844 unsigned int PixelPTEReqHeightC[],
4845 unsigned int dpte_row_width_luma_ub[],
4846 unsigned int dpte_row_width_chroma_ub[],
4849 double DST_Y_PER_PTE_ROW_NOM_L[],
4850 double DST_Y_PER_PTE_ROW_NOM_C[],
4851 double DST_Y_PER_META_ROW_NOM_L[],
4852 double DST_Y_PER_META_ROW_NOM_C[],
4853 double TimePerMetaChunkNominal[],
4854 double TimePerChromaMetaChunkNominal[],
4855 double TimePerMetaChunkVBlank[],
4856 double TimePerChromaMetaChunkVBlank[],
4857 double TimePerMetaChunkFlip[],
4858 double TimePerChromaMetaChunkFlip[],
4859 double time_per_pte_group_nom_luma[],
4860 double time_per_pte_group_vblank_luma[],
4861 double time_per_pte_group_flip_luma[],
4862 double time_per_pte_group_nom_chroma[],
4863 double time_per_pte_group_vblank_chroma[],
4864 double time_per_pte_group_flip_chroma[])
4866 unsigned int meta_chunk_width;
4867 unsigned int min_meta_chunk_width;
4868 unsigned int meta_chunk_per_row_int;
4869 unsigned int meta_row_remainder;
4870 unsigned int meta_chunk_threshold;
4871 unsigned int meta_chunks_per_row_ub;
4872 unsigned int meta_chunk_width_chroma;
4873 unsigned int min_meta_chunk_width_chroma;
4874 unsigned int meta_chunk_per_row_int_chroma;
4875 unsigned int meta_row_remainder_chroma;
4876 unsigned int meta_chunk_threshold_chroma;
4877 unsigned int meta_chunks_per_row_ub_chroma;
4878 unsigned int dpte_group_width_luma;
4879 unsigned int dpte_groups_per_row_luma_ub;
4880 unsigned int dpte_group_width_chroma;
4881 unsigned int dpte_groups_per_row_chroma_ub;
4884 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4885 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4886 if (BytePerPixelC[k] == 0)
4887 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4889 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4890 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4891 if (BytePerPixelC[k] == 0)
4892 DST_Y_PER_META_ROW_NOM_C[k] = 0;
4894 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4897 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4898 if (DCCEnable[k] == true) {
4899 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4900 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4901 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4902 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4903 if (!IsVertical(SourceRotation[k]))
4904 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4906 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4908 if (meta_row_remainder <= meta_chunk_threshold)
4909 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4911 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4913 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4914 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4915 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4916 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4917 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4918 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4919 if (BytePerPixelC[k] == 0) {
4920 TimePerChromaMetaChunkNominal[k] = 0;
4921 TimePerChromaMetaChunkVBlank[k] = 0;
4922 TimePerChromaMetaChunkFlip[k] = 0;
4924 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4925 meta_row_height_chroma[k];
4926 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4927 meta_row_height_chroma[k];
4928 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4929 meta_chunk_width_chroma;
4930 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4931 if (!IsVertical(SourceRotation[k])) {
4932 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4933 meta_req_width_chroma[k];
4935 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4936 meta_req_height_chroma[k];
4938 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4939 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4941 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4943 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4944 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4945 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4946 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4947 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4948 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4951 TimePerMetaChunkNominal[k] = 0;
4952 TimePerMetaChunkVBlank[k] = 0;
4953 TimePerMetaChunkFlip[k] = 0;
4954 TimePerChromaMetaChunkNominal[k] = 0;
4955 TimePerChromaMetaChunkVBlank[k] = 0;
4956 TimePerChromaMetaChunkFlip[k] = 0;
4960 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4961 if (GPUVMEnable == true) {
4962 if (!IsVertical(SourceRotation[k])) {
4963 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4964 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
4966 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4967 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
4970 if (use_one_row_for_frame[k]) {
4971 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4972 (double) dpte_group_width_luma / 2.0, 1.0);
4974 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4975 (double) dpte_group_width_luma, 1.0);
4977 #ifdef __DML_VBA_DEBUG__
4978 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
4979 __func__, k, use_one_row_for_frame[k]);
4980 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
4981 __func__, k, dpte_group_bytes[k]);
4982 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
4983 __func__, k, PTERequestSizeY[k]);
4984 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
4985 __func__, k, PixelPTEReqWidthY[k]);
4986 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
4987 __func__, k, PixelPTEReqHeightY[k]);
4988 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
4989 __func__, k, dpte_row_width_luma_ub[k]);
4990 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
4991 __func__, k, dpte_group_width_luma);
4992 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
4993 __func__, k, dpte_groups_per_row_luma_ub);
4996 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
4997 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
4998 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
4999 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5000 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5001 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5002 if (BytePerPixelC[k] == 0) {
5003 time_per_pte_group_nom_chroma[k] = 0;
5004 time_per_pte_group_vblank_chroma[k] = 0;
5005 time_per_pte_group_flip_chroma[k] = 0;
5007 if (!IsVertical(SourceRotation[k])) {
5008 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5009 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5011 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5012 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5015 if (use_one_row_for_frame[k]) {
5016 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5017 (double) dpte_group_width_chroma / 2.0, 1.0);
5019 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5020 (double) dpte_group_width_chroma, 1.0);
5022 #ifdef __DML_VBA_DEBUG__
5023 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
5024 __func__, k, dpte_row_width_chroma_ub[k]);
5025 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
5026 __func__, k, dpte_group_width_chroma);
5027 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
5028 __func__, k, dpte_groups_per_row_chroma_ub);
5030 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5031 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5032 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5033 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5034 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5035 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5038 time_per_pte_group_nom_luma[k] = 0;
5039 time_per_pte_group_vblank_luma[k] = 0;
5040 time_per_pte_group_flip_luma[k] = 0;
5041 time_per_pte_group_nom_chroma[k] = 0;
5042 time_per_pte_group_vblank_chroma[k] = 0;
5043 time_per_pte_group_flip_chroma[k] = 0;
5045 #ifdef __DML_VBA_DEBUG__
5046 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
5047 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5048 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
5049 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5050 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
5051 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5052 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
5053 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5054 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
5055 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5056 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
5057 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5058 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
5059 __func__, k, TimePerMetaChunkNominal[k]);
5060 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
5061 __func__, k, TimePerMetaChunkVBlank[k]);
5062 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
5063 __func__, k, TimePerMetaChunkFlip[k]);
5064 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
5065 __func__, k, TimePerChromaMetaChunkNominal[k]);
5066 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
5067 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5068 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
5069 __func__, k, TimePerChromaMetaChunkFlip[k]);
5070 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
5071 __func__, k, time_per_pte_group_nom_luma[k]);
5072 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
5073 __func__, k, time_per_pte_group_vblank_luma[k]);
5074 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
5075 __func__, k, time_per_pte_group_flip_luma[k]);
5076 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
5077 __func__, k, time_per_pte_group_nom_chroma[k]);
5078 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5079 __func__, k, time_per_pte_group_vblank_chroma[k]);
5080 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
5081 __func__, k, time_per_pte_group_flip_chroma[k]);
5084 } // CalculateMetaAndPTETimes
5086 void dml32_CalculateVMGroupAndRequestTimes(
5087 unsigned int NumberOfActiveSurfaces,
5089 unsigned int GPUVMMaxPageTableLevels,
5090 unsigned int HTotal[],
5091 unsigned int BytePerPixelC[],
5092 double DestinationLinesToRequestVMInVBlank[],
5093 double DestinationLinesToRequestVMInImmediateFlip[],
5095 double PixelClock[],
5096 unsigned int dpte_row_width_luma_ub[],
5097 unsigned int dpte_row_width_chroma_ub[],
5098 unsigned int vm_group_bytes[],
5099 unsigned int dpde0_bytes_per_frame_ub_l[],
5100 unsigned int dpde0_bytes_per_frame_ub_c[],
5101 unsigned int meta_pte_bytes_per_frame_ub_l[],
5102 unsigned int meta_pte_bytes_per_frame_ub_c[],
5105 double TimePerVMGroupVBlank[],
5106 double TimePerVMGroupFlip[],
5107 double TimePerVMRequestVBlank[],
5108 double TimePerVMRequestFlip[])
5111 unsigned int num_group_per_lower_vm_stage;
5112 unsigned int num_req_per_lower_vm_stage;
5114 #ifdef __DML_VBA_DEBUG__
5115 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5116 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5118 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5120 #ifdef __DML_VBA_DEBUG__
5121 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5122 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5123 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5124 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5125 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5126 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5127 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5128 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5129 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5130 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5133 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5134 if (DCCEnable[k] == false) {
5135 if (BytePerPixelC[k] > 0) {
5136 num_group_per_lower_vm_stage = dml_ceil(
5137 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5138 (double) (vm_group_bytes[k]), 1.0) +
5139 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5140 (double) (vm_group_bytes[k]), 1.0);
5142 num_group_per_lower_vm_stage = dml_ceil(
5143 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5144 (double) (vm_group_bytes[k]), 1.0);
5147 if (GPUVMMaxPageTableLevels == 1) {
5148 if (BytePerPixelC[k] > 0) {
5149 num_group_per_lower_vm_stage = dml_ceil(
5150 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5151 (double) (vm_group_bytes[k]), 1.0) +
5152 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5153 (double) (vm_group_bytes[k]), 1.0);
5155 num_group_per_lower_vm_stage = dml_ceil(
5156 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5157 (double) (vm_group_bytes[k]), 1.0);
5160 if (BytePerPixelC[k] > 0) {
5161 num_group_per_lower_vm_stage = 2 + dml_ceil(
5162 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5163 (double) (vm_group_bytes[k]), 1) +
5164 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5165 (double) (vm_group_bytes[k]), 1) +
5166 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5167 (double) (vm_group_bytes[k]), 1) +
5168 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5169 (double) (vm_group_bytes[k]), 1);
5171 num_group_per_lower_vm_stage = 1 + dml_ceil(
5172 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5173 (double) (vm_group_bytes[k]), 1) + dml_ceil(
5174 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5175 (double) (vm_group_bytes[k]), 1);
5180 if (DCCEnable[k] == false) {
5181 if (BytePerPixelC[k] > 0) {
5182 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5183 dpde0_bytes_per_frame_ub_c[k] / 64;
5185 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5188 if (GPUVMMaxPageTableLevels == 1) {
5189 if (BytePerPixelC[k] > 0) {
5190 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5191 meta_pte_bytes_per_frame_ub_c[k] / 64;
5193 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5196 if (BytePerPixelC[k] > 0) {
5197 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5198 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5199 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5200 meta_pte_bytes_per_frame_ub_c[k] / 64;
5202 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5203 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5208 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5209 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5210 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5211 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5212 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5213 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5214 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5215 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5217 if (GPUVMMaxPageTableLevels > 2) {
5218 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5219 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5220 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5221 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5225 TimePerVMGroupVBlank[k] = 0;
5226 TimePerVMGroupFlip[k] = 0;
5227 TimePerVMRequestVBlank[k] = 0;
5228 TimePerVMRequestFlip[k] = 0;
5231 #ifdef __DML_VBA_DEBUG__
5232 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5233 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5234 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5235 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5238 } // CalculateVMGroupAndRequestTimes
5240 void dml32_CalculateDCCConfiguration(
5242 bool DCCProgrammingAssumesScanDirectionUnknown,
5243 enum source_format_class SourcePixelFormat,
5244 unsigned int SurfaceWidthLuma,
5245 unsigned int SurfaceWidthChroma,
5246 unsigned int SurfaceHeightLuma,
5247 unsigned int SurfaceHeightChroma,
5248 unsigned int nomDETInKByte,
5249 unsigned int RequestHeight256ByteLuma,
5250 unsigned int RequestHeight256ByteChroma,
5251 enum dm_swizzle_mode TilingFormat,
5252 unsigned int BytePerPixelY,
5253 unsigned int BytePerPixelC,
5254 double BytePerPixelDETY,
5255 double BytePerPixelDETC,
5256 enum dm_rotation_angle SourceRotation,
5258 unsigned int *MaxUncompressedBlockLuma,
5259 unsigned int *MaxUncompressedBlockChroma,
5260 unsigned int *MaxCompressedBlockLuma,
5261 unsigned int *MaxCompressedBlockChroma,
5262 unsigned int *IndependentBlockLuma,
5263 unsigned int *IndependentBlockChroma)
5267 REQ_128BytesNonContiguous,
5268 REQ_128BytesContiguous,
5272 RequestType RequestLuma;
5273 RequestType RequestChroma;
5275 unsigned int segment_order_horz_contiguous_luma;
5276 unsigned int segment_order_horz_contiguous_chroma;
5277 unsigned int segment_order_vert_contiguous_luma;
5278 unsigned int segment_order_vert_contiguous_chroma;
5279 unsigned int req128_horz_wc_l;
5280 unsigned int req128_horz_wc_c;
5281 unsigned int req128_vert_wc_l;
5282 unsigned int req128_vert_wc_c;
5283 unsigned int MAS_vp_horz_limit;
5284 unsigned int MAS_vp_vert_limit;
5285 unsigned int max_vp_horz_width;
5286 unsigned int max_vp_vert_height;
5287 unsigned int eff_surf_width_l;
5288 unsigned int eff_surf_width_c;
5289 unsigned int eff_surf_height_l;
5290 unsigned int eff_surf_height_c;
5291 unsigned int full_swath_bytes_horz_wc_l;
5292 unsigned int full_swath_bytes_horz_wc_c;
5293 unsigned int full_swath_bytes_vert_wc_l;
5294 unsigned int full_swath_bytes_vert_wc_c;
5295 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5297 unsigned int yuv420;
5298 unsigned int horz_div_l;
5299 unsigned int horz_div_c;
5300 unsigned int vert_div_l;
5301 unsigned int vert_div_c;
5303 unsigned int swath_buf_size;
5304 double detile_buf_vp_horz_limit;
5305 double detile_buf_vp_vert_limit;
5307 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5308 SourcePixelFormat == dm_420_12) ? 1 : 0);
5314 if (BytePerPixelY == 1)
5316 if (BytePerPixelC == 1)
5319 if (BytePerPixelC == 0) {
5320 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5321 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5322 BytePerPixelY / (1 + horz_div_l));
5323 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5326 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5327 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5328 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5329 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5330 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5331 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5332 (1 + vert_div_c) / (1 + yuv420));
5335 if (SourcePixelFormat == dm_420_10) {
5336 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5337 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5340 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5341 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5343 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5344 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5345 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5346 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5347 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5348 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
5349 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5350 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
5352 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5353 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5354 if (BytePerPixelC > 0) {
5355 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5356 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5358 full_swath_bytes_horz_wc_c = 0;
5359 full_swath_bytes_vert_wc_c = 0;
5362 if (SourcePixelFormat == dm_420_10) {
5363 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5364 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5365 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5366 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5369 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5370 req128_horz_wc_l = 0;
5371 req128_horz_wc_c = 0;
5372 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5373 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5374 req128_horz_wc_l = 0;
5375 req128_horz_wc_c = 1;
5376 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5377 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5378 req128_horz_wc_l = 1;
5379 req128_horz_wc_c = 0;
5381 req128_horz_wc_l = 1;
5382 req128_horz_wc_c = 1;
5385 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5386 req128_vert_wc_l = 0;
5387 req128_vert_wc_c = 0;
5388 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5389 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5390 req128_vert_wc_l = 0;
5391 req128_vert_wc_c = 1;
5392 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5393 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5394 req128_vert_wc_l = 1;
5395 req128_vert_wc_c = 0;
5397 req128_vert_wc_l = 1;
5398 req128_vert_wc_c = 1;
5401 if (BytePerPixelY == 2) {
5402 segment_order_horz_contiguous_luma = 0;
5403 segment_order_vert_contiguous_luma = 1;
5405 segment_order_horz_contiguous_luma = 1;
5406 segment_order_vert_contiguous_luma = 0;
5409 if (BytePerPixelC == 2) {
5410 segment_order_horz_contiguous_chroma = 0;
5411 segment_order_vert_contiguous_chroma = 1;
5413 segment_order_horz_contiguous_chroma = 1;
5414 segment_order_vert_contiguous_chroma = 0;
5416 #ifdef __DML_VBA_DEBUG__
5417 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5418 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5419 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5420 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5421 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5422 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5423 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5424 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5425 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5426 __func__, segment_order_horz_contiguous_chroma);
5429 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5430 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5431 RequestLuma = REQ_256Bytes;
5432 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5433 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5434 RequestLuma = REQ_128BytesNonContiguous;
5436 RequestLuma = REQ_128BytesContiguous;
5438 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5439 RequestChroma = REQ_256Bytes;
5440 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5441 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5442 RequestChroma = REQ_128BytesNonContiguous;
5444 RequestChroma = REQ_128BytesContiguous;
5446 } else if (!IsVertical(SourceRotation)) {
5447 if (req128_horz_wc_l == 0)
5448 RequestLuma = REQ_256Bytes;
5449 else if (segment_order_horz_contiguous_luma == 0)
5450 RequestLuma = REQ_128BytesNonContiguous;
5452 RequestLuma = REQ_128BytesContiguous;
5454 if (req128_horz_wc_c == 0)
5455 RequestChroma = REQ_256Bytes;
5456 else if (segment_order_horz_contiguous_chroma == 0)
5457 RequestChroma = REQ_128BytesNonContiguous;
5459 RequestChroma = REQ_128BytesContiguous;
5462 if (req128_vert_wc_l == 0)
5463 RequestLuma = REQ_256Bytes;
5464 else if (segment_order_vert_contiguous_luma == 0)
5465 RequestLuma = REQ_128BytesNonContiguous;
5467 RequestLuma = REQ_128BytesContiguous;
5469 if (req128_vert_wc_c == 0)
5470 RequestChroma = REQ_256Bytes;
5471 else if (segment_order_vert_contiguous_chroma == 0)
5472 RequestChroma = REQ_128BytesNonContiguous;
5474 RequestChroma = REQ_128BytesContiguous;
5477 if (RequestLuma == REQ_256Bytes) {
5478 *MaxUncompressedBlockLuma = 256;
5479 *MaxCompressedBlockLuma = 256;
5480 *IndependentBlockLuma = 0;
5481 } else if (RequestLuma == REQ_128BytesContiguous) {
5482 *MaxUncompressedBlockLuma = 256;
5483 *MaxCompressedBlockLuma = 128;
5484 *IndependentBlockLuma = 128;
5486 *MaxUncompressedBlockLuma = 256;
5487 *MaxCompressedBlockLuma = 64;
5488 *IndependentBlockLuma = 64;
5491 if (RequestChroma == REQ_256Bytes) {
5492 *MaxUncompressedBlockChroma = 256;
5493 *MaxCompressedBlockChroma = 256;
5494 *IndependentBlockChroma = 0;
5495 } else if (RequestChroma == REQ_128BytesContiguous) {
5496 *MaxUncompressedBlockChroma = 256;
5497 *MaxCompressedBlockChroma = 128;
5498 *IndependentBlockChroma = 128;
5500 *MaxUncompressedBlockChroma = 256;
5501 *MaxCompressedBlockChroma = 64;
5502 *IndependentBlockChroma = 64;
5505 if (DCCEnabled != true || BytePerPixelC == 0) {
5506 *MaxUncompressedBlockChroma = 0;
5507 *MaxCompressedBlockChroma = 0;
5508 *IndependentBlockChroma = 0;
5511 if (DCCEnabled != true) {
5512 *MaxUncompressedBlockLuma = 0;
5513 *MaxCompressedBlockLuma = 0;
5514 *IndependentBlockLuma = 0;
5517 #ifdef __DML_VBA_DEBUG__
5518 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5519 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5520 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5521 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5522 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5523 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5526 } // CalculateDCCConfiguration
5528 void dml32_CalculateStutterEfficiency(
5529 unsigned int CompressedBufferSizeInkByte,
5530 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5531 bool UnboundedRequestEnabled,
5532 unsigned int MetaFIFOSizeInKEntries,
5533 unsigned int ZeroSizeBufferEntries,
5534 unsigned int PixelChunkSizeInKByte,
5535 unsigned int NumberOfActiveSurfaces,
5536 unsigned int ROBBufferSizeInKByte,
5537 double TotalDataReadBandwidth,
5540 unsigned int CompbufReservedSpace64B,
5541 unsigned int CompbufReservedSpaceZs,
5543 double SRExitZ8Time,
5544 bool SynchronizeTimingsFinal,
5545 unsigned int BlendingAndTiming[],
5546 double StutterEnterPlusExitWatermark,
5547 double Z8StutterEnterPlusExitWatermark,
5548 bool ProgressiveToInterlaceUnitInOPP,
5550 double MinTTUVBlank[],
5551 unsigned int DPPPerSurface[],
5552 unsigned int DETBufferSizeY[],
5553 unsigned int BytePerPixelY[],
5554 double BytePerPixelDETY[],
5555 double SwathWidthY[],
5556 unsigned int SwathHeightY[],
5557 unsigned int SwathHeightC[],
5558 double NetDCCRateLuma[],
5559 double NetDCCRateChroma[],
5560 double DCCFractionOfZeroSizeRequestsLuma[],
5561 double DCCFractionOfZeroSizeRequestsChroma[],
5562 unsigned int HTotal[],
5563 unsigned int VTotal[],
5564 double PixelClock[],
5566 enum dm_rotation_angle SourceRotation[],
5567 unsigned int BlockHeight256BytesY[],
5568 unsigned int BlockWidth256BytesY[],
5569 unsigned int BlockHeight256BytesC[],
5570 unsigned int BlockWidth256BytesC[],
5571 unsigned int DCCYMaxUncompressedBlock[],
5572 unsigned int DCCCMaxUncompressedBlock[],
5573 unsigned int VActive[],
5575 bool WritebackEnable[],
5576 double ReadBandwidthSurfaceLuma[],
5577 double ReadBandwidthSurfaceChroma[],
5578 double meta_row_bw[],
5579 double dpte_row_bw[],
5582 double *StutterEfficiencyNotIncludingVBlank,
5583 double *StutterEfficiency,
5584 unsigned int *NumberOfStutterBurstsPerFrame,
5585 double *Z8StutterEfficiencyNotIncludingVBlank,
5586 double *Z8StutterEfficiency,
5587 unsigned int *Z8NumberOfStutterBurstsPerFrame,
5588 double *StutterPeriod,
5589 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5592 bool FoundCriticalSurface = false;
5593 unsigned int SwathSizeCriticalSurface = 0;
5594 unsigned int LastChunkOfSwathSize;
5595 unsigned int MissingPartOfLastSwathOfDETSize;
5596 double LastZ8StutterPeriod = 0.0;
5597 double LastStutterPeriod = 0.0;
5598 unsigned int TotalNumberOfActiveOTG = 0;
5599 double doublePixelClock;
5600 unsigned int doubleHTotal;
5601 unsigned int doubleVTotal;
5602 bool SameTiming = true;
5603 double DETBufferingTimeY;
5604 double SwathWidthYCriticalSurface = 0.0;
5605 double SwathHeightYCriticalSurface = 0.0;
5606 double VActiveTimeCriticalSurface = 0.0;
5607 double FrameTimeCriticalSurface = 0.0;
5608 unsigned int BytePerPixelYCriticalSurface = 0;
5609 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5610 unsigned int DETBufferSizeYCriticalSurface = 0;
5611 double MinTTUVBlankCriticalSurface = 0.0;
5612 unsigned int BlockWidth256BytesYCriticalSurface = 0;
5613 bool doublePlaneCriticalSurface = 0;
5614 bool doublePipeCriticalSurface = 0;
5615 double TotalCompressedReadBandwidth;
5616 double TotalRowReadBandwidth;
5617 double AverageDCCCompressionRate;
5618 double EffectiveCompressedBufferSize;
5619 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5620 double StutterBurstTime;
5621 unsigned int TotalActiveWriteback;
5623 double LinesInDETYRoundedDownToSwath;
5624 double MaximumEffectiveCompressionLuma;
5625 double MaximumEffectiveCompressionChroma;
5626 double TotalZeroSizeRequestReadBandwidth;
5627 double TotalZeroSizeCompressedReadBandwidth;
5628 double AverageDCCZeroSizeFraction;
5629 double AverageZeroSizeCompressionRate;
5632 TotalZeroSizeRequestReadBandwidth = 0;
5633 TotalZeroSizeCompressedReadBandwidth = 0;
5634 TotalRowReadBandwidth = 0;
5635 TotalCompressedReadBandwidth = 0;
5637 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5638 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5639 if (DCCEnable[k] == true) {
5640 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5641 || (!IsVertical(SourceRotation[k])
5642 && BlockHeight256BytesY[k] > SwathHeightY[k])
5643 || DCCYMaxUncompressedBlock[k] < 256) {
5644 MaximumEffectiveCompressionLuma = 2;
5646 MaximumEffectiveCompressionLuma = 4;
5648 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5649 + ReadBandwidthSurfaceLuma[k]
5650 / dml_min(NetDCCRateLuma[k],
5651 MaximumEffectiveCompressionLuma);
5652 #ifdef __DML_VBA_DEBUG__
5653 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5654 __func__, k, ReadBandwidthSurfaceLuma[k]);
5655 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5656 __func__, k, NetDCCRateLuma[k]);
5657 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5658 __func__, k, MaximumEffectiveCompressionLuma);
5660 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5661 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5662 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5663 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5664 / MaximumEffectiveCompressionLuma;
5666 if (ReadBandwidthSurfaceChroma[k] > 0) {
5667 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5668 || (!IsVertical(SourceRotation[k])
5669 && BlockHeight256BytesC[k] > SwathHeightC[k])
5670 || DCCCMaxUncompressedBlock[k] < 256) {
5671 MaximumEffectiveCompressionChroma = 2;
5673 MaximumEffectiveCompressionChroma = 4;
5675 TotalCompressedReadBandwidth =
5676 TotalCompressedReadBandwidth
5677 + ReadBandwidthSurfaceChroma[k]
5678 / dml_min(NetDCCRateChroma[k],
5679 MaximumEffectiveCompressionChroma);
5680 #ifdef __DML_VBA_DEBUG__
5681 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5682 __func__, k, ReadBandwidthSurfaceChroma[k]);
5683 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5684 __func__, k, NetDCCRateChroma[k]);
5685 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5686 __func__, k, MaximumEffectiveCompressionChroma);
5688 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5689 + ReadBandwidthSurfaceChroma[k]
5690 * DCCFractionOfZeroSizeRequestsChroma[k];
5691 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5692 + ReadBandwidthSurfaceChroma[k]
5693 * DCCFractionOfZeroSizeRequestsChroma[k]
5694 / MaximumEffectiveCompressionChroma;
5697 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5698 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5700 TotalRowReadBandwidth = TotalRowReadBandwidth
5701 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5705 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5706 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5708 #ifdef __DML_VBA_DEBUG__
5709 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5710 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5711 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5712 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5713 __func__, TotalZeroSizeCompressedReadBandwidth);
5714 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5715 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5716 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5717 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5718 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5719 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5720 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5722 if (AverageDCCZeroSizeFraction == 1) {
5723 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5724 / TotalZeroSizeCompressedReadBandwidth;
5725 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5726 * AverageZeroSizeCompressionRate
5727 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5728 * AverageZeroSizeCompressionRate;
5729 } else if (AverageDCCZeroSizeFraction > 0) {
5730 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5731 / TotalZeroSizeCompressedReadBandwidth;
5732 EffectiveCompressedBufferSize = dml_min(
5733 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5734 (double) MetaFIFOSizeInKEntries * 1024 * 64
5735 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5736 + 1 / AverageDCCCompressionRate))
5737 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5738 * AverageDCCCompressionRate,
5739 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5740 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5742 #ifdef __DML_VBA_DEBUG__
5743 dml_print("DML::%s: min 1 = %f\n", __func__,
5744 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5745 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5746 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5747 AverageDCCCompressionRate));
5748 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5749 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5750 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5751 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5754 EffectiveCompressedBufferSize = dml_min(
5755 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5756 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5757 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5758 * AverageDCCCompressionRate;
5760 #ifdef __DML_VBA_DEBUG__
5761 dml_print("DML::%s: min 1 = %f\n", __func__,
5762 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5763 dml_print("DML::%s: min 2 = %f\n", __func__,
5764 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5768 #ifdef __DML_VBA_DEBUG__
5769 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5770 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5771 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5776 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5777 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5778 LinesInDETY = ((double) DETBufferSizeY[k]
5779 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5780 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5781 / BytePerPixelDETY[k] / SwathWidthY[k];
5782 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5783 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5785 #ifdef __DML_VBA_DEBUG__
5786 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5787 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5788 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5789 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5790 __func__, k, ReadBandwidthSurfaceLuma[k]);
5791 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5792 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5793 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5794 __func__, k, LinesInDETYRoundedDownToSwath);
5795 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5796 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5797 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5798 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5799 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5802 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5803 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5805 FoundCriticalSurface = true;
5806 *StutterPeriod = DETBufferingTimeY;
5807 FrameTimeCriticalSurface = (
5809 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5810 * (double) HTotal[k] / PixelClock[k];
5811 VActiveTimeCriticalSurface = (
5813 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5814 * (double) HTotal[k] / PixelClock[k];
5815 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5816 SwathWidthYCriticalSurface = SwathWidthY[k];
5817 SwathHeightYCriticalSurface = SwathHeightY[k];
5818 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5819 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5820 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5821 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5822 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5823 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5824 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5826 #ifdef __DML_VBA_DEBUG__
5827 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
5828 __func__, k, FoundCriticalSurface);
5829 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
5830 __func__, k, *StutterPeriod);
5831 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
5832 __func__, k, MinTTUVBlankCriticalSurface);
5833 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
5834 __func__, k, FrameTimeCriticalSurface);
5835 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
5836 __func__, k, VActiveTimeCriticalSurface);
5837 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
5838 __func__, k, BytePerPixelYCriticalSurface);
5839 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
5840 __func__, k, SwathWidthYCriticalSurface);
5841 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
5842 __func__, k, SwathHeightYCriticalSurface);
5843 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
5844 __func__, k, BlockWidth256BytesYCriticalSurface);
5845 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
5846 __func__, k, doublePlaneCriticalSurface);
5847 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
5848 __func__, k, doublePipeCriticalSurface);
5849 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5850 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5856 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5857 EffectiveCompressedBufferSize);
5858 #ifdef __DML_VBA_DEBUG__
5859 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5860 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5861 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5862 __func__, *StutterPeriod * TotalDataReadBandwidth);
5863 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5864 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5865 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5866 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5867 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5868 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5869 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5872 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5874 + (*StutterPeriod * TotalDataReadBandwidth
5875 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5876 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5877 #ifdef __DML_VBA_DEBUG__
5878 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5879 AverageDCCCompressionRate / ReturnBW);
5880 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5881 __func__, (*StutterPeriod * TotalDataReadBandwidth));
5882 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5883 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5884 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5885 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5887 StutterBurstTime = dml_max(StutterBurstTime,
5888 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5889 * SwathWidthYCriticalSurface / ReturnBW);
5891 #ifdef __DML_VBA_DEBUG__
5892 dml_print("DML::%s: Time to finish residue swath=%f\n",
5894 LinesToFinishSwathTransferStutterCriticalSurface *
5895 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5898 TotalActiveWriteback = 0;
5899 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5900 if (WritebackEnable[k])
5901 TotalActiveWriteback = TotalActiveWriteback + 1;
5904 if (TotalActiveWriteback == 0) {
5905 #ifdef __DML_VBA_DEBUG__
5906 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5907 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5908 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5909 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5911 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5912 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5913 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5914 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5915 *NumberOfStutterBurstsPerFrame = (
5916 *StutterEfficiencyNotIncludingVBlank > 0 ?
5917 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5918 *Z8NumberOfStutterBurstsPerFrame = (
5919 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5920 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5922 *StutterEfficiencyNotIncludingVBlank = 0.;
5923 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5924 *NumberOfStutterBurstsPerFrame = 0;
5925 *Z8NumberOfStutterBurstsPerFrame = 0;
5927 #ifdef __DML_VBA_DEBUG__
5928 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5929 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5930 __func__, *StutterEfficiencyNotIncludingVBlank);
5931 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5932 __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5933 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5934 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5937 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5938 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5939 if (BlendingAndTiming[k] == k) {
5940 if (TotalNumberOfActiveOTG == 0) {
5941 doublePixelClock = PixelClock[k];
5942 doubleHTotal = HTotal[k];
5943 doubleVTotal = VTotal[k];
5944 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5945 || doubleVTotal != VTotal[k]) {
5948 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5953 if (*StutterEfficiencyNotIncludingVBlank > 0) {
5954 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5956 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
5957 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
5958 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
5959 + StutterBurstTime * VActiveTimeCriticalSurface
5960 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5962 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
5965 *StutterEfficiency = 0;
5968 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
5969 LastZ8StutterPeriod = VActiveTimeCriticalSurface
5970 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5971 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
5972 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
5973 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
5974 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5976 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
5979 *Z8StutterEfficiency = 0.;
5982 #ifdef __DML_VBA_DEBUG__
5983 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
5984 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
5985 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5986 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5987 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
5988 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
5989 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5990 __func__, *StutterEfficiencyNotIncludingVBlank);
5991 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5994 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
5995 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
5996 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
5997 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
5998 - DETBufferSizeYCriticalSurface;
6000 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6001 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6002 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6003 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6005 #ifdef __DML_VBA_DEBUG__
6006 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6007 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6008 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6009 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6011 } // CalculateStutterEfficiency
6013 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6014 unsigned int ConfigReturnBufferSizeInKByte,
6015 unsigned int ROBBufferSizeInKByte,
6016 unsigned int MaxNumDPP,
6017 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6018 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
6021 unsigned int *MaxTotalDETInKByte,
6022 unsigned int *nomDETInKByte,
6023 unsigned int *MinCompressedBufferSizeInKByte)
6025 bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
6026 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
6028 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6029 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6030 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6031 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6033 #ifdef __DML_VBA_DEBUG__
6034 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6035 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6036 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6037 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6038 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6039 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6042 if (det_buff_size_override_en) {
6043 *nomDETInKByte = det_buff_size_override_val;
6044 #ifdef __DML_VBA_DEBUG__
6045 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6048 } // CalculateMaxDETAndMinCompressedBufferSize
6050 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6052 bool NotUrgentLatencyHiding[],
6053 double ReadBandwidthLuma[],
6054 double ReadBandwidthChroma[],
6056 double meta_row_bandwidth[],
6057 double dpte_row_bandwidth[],
6058 unsigned int NumberOfDPP[],
6059 double UrgentBurstFactorLuma[],
6060 double UrgentBurstFactorChroma[],
6061 double UrgentBurstFactorCursor[])
6064 bool NotEnoughUrgentLatencyHiding = false;
6065 bool CalculateVActiveBandwithSupport_val = false;
6066 double VActiveBandwith = 0;
6068 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6069 if (NotUrgentLatencyHiding[k]) {
6070 NotEnoughUrgentLatencyHiding = true;
6074 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6075 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6078 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6080 #ifdef __DML_VBA_DEBUG__
6081 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6082 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
6083 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6084 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6086 return CalculateVActiveBandwithSupport_val;
6089 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6091 bool NotUrgentLatencyHiding[],
6092 double ReadBandwidthLuma[],
6093 double ReadBandwidthChroma[],
6094 double PrefetchBandwidthLuma[],
6095 double PrefetchBandwidthChroma[],
6097 double meta_row_bandwidth[],
6098 double dpte_row_bandwidth[],
6099 double cursor_bw_pre[],
6100 double prefetch_vmrow_bw[],
6101 unsigned int NumberOfDPP[],
6102 double UrgentBurstFactorLuma[],
6103 double UrgentBurstFactorChroma[],
6104 double UrgentBurstFactorCursor[],
6105 double UrgentBurstFactorLumaPre[],
6106 double UrgentBurstFactorChromaPre[],
6107 double UrgentBurstFactorCursorPre[],
6110 double *PrefetchBandwidth,
6111 double *FractionOfUrgentBandwidth,
6112 bool *PrefetchBandwidthSupport)
6115 bool NotEnoughUrgentLatencyHiding = false;
6116 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6117 if (NotUrgentLatencyHiding[k]) {
6118 NotEnoughUrgentLatencyHiding = true;
6122 *PrefetchBandwidth = 0;
6123 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6124 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6125 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6126 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6129 *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6130 *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6133 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6135 double ReadBandwidthLuma[],
6136 double ReadBandwidthChroma[],
6137 double PrefetchBandwidthLuma[],
6138 double PrefetchBandwidthChroma[],
6140 double cursor_bw_pre[],
6141 unsigned int NumberOfDPP[],
6142 double UrgentBurstFactorLuma[],
6143 double UrgentBurstFactorChroma[],
6144 double UrgentBurstFactorCursor[],
6145 double UrgentBurstFactorLumaPre[],
6146 double UrgentBurstFactorChromaPre[],
6147 double UrgentBurstFactorCursorPre[])
6150 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6152 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6153 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6154 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6157 return CalculateBandwidthAvailableForImmediateFlip_val;
6160 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6162 enum immediate_flip_requirement ImmediateFlipRequirement[],
6163 double final_flip_bw[],
6164 double ReadBandwidthLuma[],
6165 double ReadBandwidthChroma[],
6166 double PrefetchBandwidthLuma[],
6167 double PrefetchBandwidthChroma[],
6169 double meta_row_bandwidth[],
6170 double dpte_row_bandwidth[],
6171 double cursor_bw_pre[],
6172 double prefetch_vmrow_bw[],
6173 unsigned int NumberOfDPP[],
6174 double UrgentBurstFactorLuma[],
6175 double UrgentBurstFactorChroma[],
6176 double UrgentBurstFactorCursor[],
6177 double UrgentBurstFactorLumaPre[],
6178 double UrgentBurstFactorChromaPre[],
6179 double UrgentBurstFactorCursorPre[],
6182 double *TotalBandwidth,
6183 double *FractionOfUrgentBandwidth,
6184 bool *ImmediateFlipBandwidthSupport)
6187 *TotalBandwidth = 0;
6188 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6189 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6190 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6191 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6192 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6194 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6195 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6196 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6199 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6200 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;