2 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
28 #include "../display_mode_lib.h"
29 #include "display_mode_vba_31.h"
30 #include "../dml_inline_defs.h"
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
46 // For DML-C changes that hasn't been propagated to VBA yet
47 //#define __DML_VBA_ALLOW_DELTA__
49 // Move these to ip paramaters/constant
51 // At which vstartup the DML start to try if the mode can be supported
52 #define __DML_VBA_MIN_VSTARTUP__ 9
54 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
55 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
57 // fudge factor for min dcfclk calclation
58 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
64 double DCFCLKDeepSleep;
65 unsigned int DPPPerPlane;
67 enum scan_direction_class SourceScan;
68 unsigned int BlockWidth256BytesY;
69 unsigned int BlockHeight256BytesY;
70 unsigned int BlockWidth256BytesC;
71 unsigned int BlockHeight256BytesC;
72 unsigned int InterlaceEnable;
73 unsigned int NumberOfCursors;
76 unsigned int DCCEnable;
77 bool ODMCombineIsEnabled;
78 enum source_format_class SourcePixelFormat;
81 bool ProgressiveToInterlaceUnitInOPP;
85 #define BPP_BLENDED_PIPE 0xffffffff
87 static bool CalculateBytePerPixelAnd256BBlockSizes(
88 enum source_format_class SourcePixelFormat,
89 enum dm_swizzle_mode SurfaceTiling,
90 unsigned int *BytePerPixelY,
91 unsigned int *BytePerPixelC,
92 double *BytePerPixelDETY,
93 double *BytePerPixelDETC,
94 unsigned int *BlockHeight256BytesY,
95 unsigned int *BlockHeight256BytesC,
96 unsigned int *BlockWidth256BytesY,
97 unsigned int *BlockWidth256BytesC);
98 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
99 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
100 static unsigned int dscceComputeDelay(
103 unsigned int sliceWidth,
104 unsigned int numSlices,
105 enum output_format_class pixelFormat,
106 enum output_encoder_class Output);
107 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
108 static bool CalculatePrefetchSchedule(
109 struct display_mode_lib *mode_lib,
110 double HostVMInefficiencyFactor,
112 unsigned int DSCDelay,
113 double DPPCLKDelaySubtotalPlusCNVCFormater,
114 double DPPCLKDelaySCL,
115 double DPPCLKDelaySCLLBOnly,
116 double DPPCLKDelayCNVCCursor,
117 double DISPCLKDelaySubtotal,
118 unsigned int DPP_RECOUT_WIDTH,
119 enum output_format_class OutputFormat,
120 unsigned int MaxInterDCNTileRepeaters,
121 unsigned int VStartup,
122 unsigned int MaxVStartup,
123 unsigned int GPUVMPageTableLevels,
126 unsigned int HostVMMaxNonCachedPageTableLevels,
127 double HostVMMinPageSize,
128 bool DynamicMetadataEnable,
129 bool DynamicMetadataVMEnabled,
130 int DynamicMetadataLinesBeforeActiveRequired,
131 unsigned int DynamicMetadataTransmittedBytes,
132 double UrgentLatency,
133 double UrgentExtraLatency,
135 unsigned int PDEAndMetaPTEBytesFrame,
136 unsigned int MetaRowByte,
137 unsigned int PixelPTEBytesPerRow,
138 double PrefetchSourceLinesY,
139 unsigned int SwathWidthY,
140 double VInitPreFillY,
141 unsigned int MaxNumSwathY,
142 double PrefetchSourceLinesC,
143 unsigned int SwathWidthC,
144 double VInitPreFillC,
145 unsigned int MaxNumSwathC,
146 int swath_width_luma_ub,
147 int swath_width_chroma_ub,
148 unsigned int SwathHeightY,
149 unsigned int SwathHeightC,
151 double *DSTXAfterScaler,
152 double *DSTYAfterScaler,
153 double *DestinationLinesForPrefetch,
154 double *PrefetchBandwidth,
155 double *DestinationLinesToRequestVMInVBlank,
156 double *DestinationLinesToRequestRowInVBlank,
157 double *VRatioPrefetchY,
158 double *VRatioPrefetchC,
159 double *RequiredPrefetchPixDataBWLuma,
160 double *RequiredPrefetchPixDataBWChroma,
161 bool *NotEnoughTimeForDynamicMetadata,
163 double *prefetch_vmrow_bw,
167 int *VUpdateOffsetPix,
168 double *VUpdateWidthPix,
169 double *VReadyOffsetPix);
170 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
171 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
172 static void CalculateDCCConfiguration(
174 bool DCCProgrammingAssumesScanDirectionUnknown,
175 enum source_format_class SourcePixelFormat,
176 unsigned int SurfaceWidthLuma,
177 unsigned int SurfaceWidthChroma,
178 unsigned int SurfaceHeightLuma,
179 unsigned int SurfaceHeightChroma,
180 double DETBufferSize,
181 unsigned int RequestHeight256ByteLuma,
182 unsigned int RequestHeight256ByteChroma,
183 enum dm_swizzle_mode TilingFormat,
184 unsigned int BytePerPixelY,
185 unsigned int BytePerPixelC,
186 double BytePerPixelDETY,
187 double BytePerPixelDETC,
188 enum scan_direction_class ScanOrientation,
189 unsigned int *MaxUncompressedBlockLuma,
190 unsigned int *MaxUncompressedBlockChroma,
191 unsigned int *MaxCompressedBlockLuma,
192 unsigned int *MaxCompressedBlockChroma,
193 unsigned int *IndependentBlockLuma,
194 unsigned int *IndependentBlockChroma);
195 static double CalculatePrefetchSourceLines(
196 struct display_mode_lib *mode_lib,
200 bool ProgressiveToInterlaceUnitInOPP,
201 unsigned int SwathHeight,
202 unsigned int ViewportYStart,
203 double *VInitPreFill,
204 unsigned int *MaxNumSwath);
205 static unsigned int CalculateVMAndRowBytes(
206 struct display_mode_lib *mode_lib,
208 unsigned int BlockHeight256Bytes,
209 unsigned int BlockWidth256Bytes,
210 enum source_format_class SourcePixelFormat,
211 unsigned int SurfaceTiling,
212 unsigned int BytePerPixel,
213 enum scan_direction_class ScanDirection,
214 unsigned int SwathWidth,
215 unsigned int ViewportHeight,
218 unsigned int HostVMMaxNonCachedPageTableLevels,
219 unsigned int GPUVMMinPageSize,
220 unsigned int HostVMMinPageSize,
221 unsigned int PTEBufferSizeInRequests,
223 unsigned int DCCMetaPitch,
224 unsigned int *MacroTileWidth,
225 unsigned int *MetaRowByte,
226 unsigned int *PixelPTEBytesPerRow,
227 bool *PTEBufferSizeNotExceeded,
228 int *dpte_row_width_ub,
229 unsigned int *dpte_row_height,
230 unsigned int *MetaRequestWidth,
231 unsigned int *MetaRequestHeight,
232 unsigned int *meta_row_width,
233 unsigned int *meta_row_height,
235 unsigned int *dpte_group_bytes,
236 unsigned int *PixelPTEReqWidth,
237 unsigned int *PixelPTEReqHeight,
238 unsigned int *PTERequestSize,
239 int *DPDE0BytesFrame,
240 int *MetaPTEBytesFrame);
241 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
242 static void CalculateRowBandwidth(
244 enum source_format_class SourcePixelFormat,
249 unsigned int MetaRowByteLuma,
250 unsigned int MetaRowByteChroma,
251 unsigned int meta_row_height_luma,
252 unsigned int meta_row_height_chroma,
253 unsigned int PixelPTEBytesPerRowLuma,
254 unsigned int PixelPTEBytesPerRowChroma,
255 unsigned int dpte_row_height_luma,
256 unsigned int dpte_row_height_chroma,
258 double *dpte_row_bw);
260 static void CalculateFlipSchedule(
261 struct display_mode_lib *mode_lib,
262 double HostVMInefficiencyFactor,
263 double UrgentExtraLatency,
264 double UrgentLatency,
265 unsigned int GPUVMMaxPageTableLevels,
267 unsigned int HostVMMaxNonCachedPageTableLevels,
269 double HostVMMinPageSize,
270 double PDEAndMetaPTEBytesPerFrame,
272 double DPTEBytesPerRow,
273 double BandwidthAvailableForImmediateFlip,
274 unsigned int TotImmediateFlipBytes,
275 enum source_format_class SourcePixelFormat,
281 unsigned int dpte_row_height,
282 unsigned int meta_row_height,
283 unsigned int dpte_row_height_chroma,
284 unsigned int meta_row_height_chroma,
285 double *DestinationLinesToRequestVMInImmediateFlip,
286 double *DestinationLinesToRequestRowInImmediateFlip,
287 double *final_flip_bw,
288 bool *ImmediateFlipSupportedForPipe);
289 static double CalculateWriteBackDelay(
290 enum source_format_class WritebackPixelFormat,
291 double WritebackHRatio,
292 double WritebackVRatio,
293 unsigned int WritebackVTaps,
294 int WritebackDestinationWidth,
295 int WritebackDestinationHeight,
296 int WritebackSourceHeight,
297 unsigned int HTotal);
299 static void CalculateVupdateAndDynamicMetadataParameters(
300 int MaxInterDCNTileRepeaters,
303 double DCFClkDeepSleep,
307 int DynamicMetadataTransmittedBytes,
308 int DynamicMetadataLinesBeforeActiveRequired,
310 bool ProgressiveToInterlaceUnitInOPP,
315 int *VUpdateOffsetPix,
316 double *VUpdateWidthPix,
317 double *VReadyOffsetPix);
319 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
320 struct display_mode_lib *mode_lib,
321 unsigned int PrefetchMode,
322 unsigned int NumberOfActivePlanes,
323 unsigned int MaxLineBufferLines,
324 unsigned int LineBufferSize,
325 unsigned int WritebackInterfaceBufferSize,
328 bool SynchronizedVBlank,
329 unsigned int dpte_group_bytes[],
330 unsigned int MetaChunkSize,
331 double UrgentLatency,
333 double WritebackLatency,
334 double WritebackChunkSize,
336 double DRAMClockChangeLatency,
338 double SREnterPlusExitTime,
340 double SREnterPlusExitZ8Time,
341 double DCFCLKDeepSleep,
342 unsigned int DETBufferSizeY[],
343 unsigned int DETBufferSizeC[],
344 unsigned int SwathHeightY[],
345 unsigned int SwathHeightC[],
346 unsigned int LBBitPerPixel[],
347 double SwathWidthY[],
348 double SwathWidthC[],
350 double HRatioChroma[],
351 unsigned int vtaps[],
352 unsigned int VTAPsChroma[],
354 double VRatioChroma[],
355 unsigned int HTotal[],
357 unsigned int BlendingAndTiming[],
358 unsigned int DPPPerPlane[],
359 double BytePerPixelDETY[],
360 double BytePerPixelDETC[],
361 double DSTXAfterScaler[],
362 double DSTYAfterScaler[],
363 bool WritebackEnable[],
364 enum source_format_class WritebackPixelFormat[],
365 double WritebackDestinationWidth[],
366 double WritebackDestinationHeight[],
367 double WritebackSourceHeight[],
368 bool UnboundedRequestEnabled,
369 int unsigned CompressedBufferSizeInkByte,
370 enum clock_change_support *DRAMClockChangeSupport,
371 double *UrgentWatermark,
372 double *WritebackUrgentWatermark,
373 double *DRAMClockChangeWatermark,
374 double *WritebackDRAMClockChangeWatermark,
375 double *StutterExitWatermark,
376 double *StutterEnterPlusExitWatermark,
377 double *Z8StutterExitWatermark,
378 double *Z8StutterEnterPlusExitWatermark,
379 double *MinActiveDRAMClockChangeLatencySupported);
381 static void CalculateDCFCLKDeepSleep(
382 struct display_mode_lib *mode_lib,
383 unsigned int NumberOfActivePlanes,
387 double VRatioChroma[],
388 double SwathWidthY[],
389 double SwathWidthC[],
390 unsigned int DPPPerPlane[],
392 double HRatioChroma[],
394 double PSCL_THROUGHPUT[],
395 double PSCL_THROUGHPUT_CHROMA[],
397 double ReadBandwidthLuma[],
398 double ReadBandwidthChroma[],
400 double *DCFCLKDeepSleep);
402 static void CalculateUrgentBurstFactor(
403 int swath_width_luma_ub,
404 int swath_width_chroma_ub,
405 unsigned int SwathHeightY,
406 unsigned int SwathHeightC,
408 double UrgentLatency,
409 double CursorBufferSize,
410 unsigned int CursorWidth,
411 unsigned int CursorBPP,
414 double BytePerPixelInDETY,
415 double BytePerPixelInDETC,
416 double DETBufferSizeY,
417 double DETBufferSizeC,
418 double *UrgentBurstFactorCursor,
419 double *UrgentBurstFactorLuma,
420 double *UrgentBurstFactorChroma,
421 bool *NotEnoughUrgentLatencyHiding);
423 static void UseMinimumDCFCLK(
424 struct display_mode_lib *mode_lib,
425 int MaxInterDCNTileRepeaters,
427 double FinalDRAMClockChangeLatency,
428 double SREnterPlusExitTime,
430 int RoundTripPingLatencyCycles,
432 int PixelChunkSizeInKByte,
435 int GPUVMMaxPageTableLevels,
437 int NumberOfActivePlanes,
438 double HostVMMinPageSize,
439 int HostVMMaxNonCachedPageTableLevels,
440 bool DynamicMetadataVMEnabled,
441 enum immediate_flip_requirement ImmediateFlipRequirement,
442 bool ProgressiveToInterlaceUnitInOPP,
443 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
444 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
447 int DynamicMetadataTransmittedBytes[],
448 int DynamicMetadataLinesBeforeActiveRequired[],
450 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
451 double RequiredDISPCLK[][2],
453 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
454 double ProjectedDCFCLKDeepSleep[][2],
455 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
456 double TotalVActivePixelBandwidth[][2],
457 double TotalVActiveCursorBandwidth[][2],
458 double TotalMetaRowBandwidth[][2],
459 double TotalDPTERowBandwidth[][2],
460 unsigned int TotalNumberOfActiveDPP[][2],
461 unsigned int TotalNumberOfDCCActiveDPP[][2],
462 int dpte_group_bytes[],
463 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
464 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
465 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
466 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
471 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
472 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
473 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
474 bool DynamicMetadataEnable[],
475 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
476 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
477 double ReadBandwidthLuma[],
478 double ReadBandwidthChroma[],
479 double DCFCLKPerState[],
480 double DCFCLKState[][2]);
482 static void CalculatePixelDeliveryTimes(
483 unsigned int NumberOfActivePlanes,
485 double VRatioChroma[],
486 double VRatioPrefetchY[],
487 double VRatioPrefetchC[],
488 unsigned int swath_width_luma_ub[],
489 unsigned int swath_width_chroma_ub[],
490 unsigned int DPPPerPlane[],
492 double HRatioChroma[],
494 double PSCL_THROUGHPUT[],
495 double PSCL_THROUGHPUT_CHROMA[],
498 enum scan_direction_class SourceScan[],
499 unsigned int NumberOfCursors[],
500 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
501 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
502 unsigned int BlockWidth256BytesY[],
503 unsigned int BlockHeight256BytesY[],
504 unsigned int BlockWidth256BytesC[],
505 unsigned int BlockHeight256BytesC[],
506 double DisplayPipeLineDeliveryTimeLuma[],
507 double DisplayPipeLineDeliveryTimeChroma[],
508 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
509 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
510 double DisplayPipeRequestDeliveryTimeLuma[],
511 double DisplayPipeRequestDeliveryTimeChroma[],
512 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
513 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
514 double CursorRequestDeliveryTime[],
515 double CursorRequestDeliveryTimePrefetch[]);
517 static void CalculateMetaAndPTETimes(
518 int NumberOfActivePlanes,
521 int MinMetaChunkSizeBytes,
524 double VRatioChroma[],
525 double DestinationLinesToRequestRowInVBlank[],
526 double DestinationLinesToRequestRowInImmediateFlip[],
531 enum scan_direction_class SourceScan[],
532 int dpte_row_height[],
533 int dpte_row_height_chroma[],
534 int meta_row_width[],
535 int meta_row_width_chroma[],
536 int meta_row_height[],
537 int meta_row_height_chroma[],
538 int meta_req_width[],
539 int meta_req_width_chroma[],
540 int meta_req_height[],
541 int meta_req_height_chroma[],
542 int dpte_group_bytes[],
543 int PTERequestSizeY[],
544 int PTERequestSizeC[],
545 int PixelPTEReqWidthY[],
546 int PixelPTEReqHeightY[],
547 int PixelPTEReqWidthC[],
548 int PixelPTEReqHeightC[],
549 int dpte_row_width_luma_ub[],
550 int dpte_row_width_chroma_ub[],
551 double DST_Y_PER_PTE_ROW_NOM_L[],
552 double DST_Y_PER_PTE_ROW_NOM_C[],
553 double DST_Y_PER_META_ROW_NOM_L[],
554 double DST_Y_PER_META_ROW_NOM_C[],
555 double TimePerMetaChunkNominal[],
556 double TimePerChromaMetaChunkNominal[],
557 double TimePerMetaChunkVBlank[],
558 double TimePerChromaMetaChunkVBlank[],
559 double TimePerMetaChunkFlip[],
560 double TimePerChromaMetaChunkFlip[],
561 double time_per_pte_group_nom_luma[],
562 double time_per_pte_group_vblank_luma[],
563 double time_per_pte_group_flip_luma[],
564 double time_per_pte_group_nom_chroma[],
565 double time_per_pte_group_vblank_chroma[],
566 double time_per_pte_group_flip_chroma[]);
568 static void CalculateVMGroupAndRequestTimes(
569 unsigned int NumberOfActivePlanes,
571 unsigned int GPUVMMaxPageTableLevels,
572 unsigned int HTotal[],
574 double DestinationLinesToRequestVMInVBlank[],
575 double DestinationLinesToRequestVMInImmediateFlip[],
578 int dpte_row_width_luma_ub[],
579 int dpte_row_width_chroma_ub[],
580 int vm_group_bytes[],
581 unsigned int dpde0_bytes_per_frame_ub_l[],
582 unsigned int dpde0_bytes_per_frame_ub_c[],
583 int meta_pte_bytes_per_frame_ub_l[],
584 int meta_pte_bytes_per_frame_ub_c[],
585 double TimePerVMGroupVBlank[],
586 double TimePerVMGroupFlip[],
587 double TimePerVMRequestVBlank[],
588 double TimePerVMRequestFlip[]);
590 static void CalculateStutterEfficiency(
591 struct display_mode_lib *mode_lib,
592 int CompressedBufferSizeInkByte,
593 bool UnboundedRequestEnabled,
594 int ConfigReturnBufferSizeInKByte,
595 int MetaFIFOSizeInKEntries,
596 int ZeroSizeBufferEntries,
597 int NumberOfActivePlanes,
598 int ROBBufferSizeInKByte,
599 double TotalDataReadBandwidth,
602 double COMPBUF_RESERVED_SPACE_64B,
603 double COMPBUF_RESERVED_SPACE_ZS,
606 bool SynchronizedVBlank,
607 double Z8StutterEnterPlusExitWatermark,
608 double StutterEnterPlusExitWatermark,
609 bool ProgressiveToInterlaceUnitInOPP,
611 double MinTTUVBlank[],
613 unsigned int DETBufferSizeY[],
615 double BytePerPixelDETY[],
616 double SwathWidthY[],
619 double NetDCCRateLuma[],
620 double NetDCCRateChroma[],
621 double DCCFractionOfZeroSizeRequestsLuma[],
622 double DCCFractionOfZeroSizeRequestsChroma[],
627 enum scan_direction_class SourceScan[],
628 int BlockHeight256BytesY[],
629 int BlockWidth256BytesY[],
630 int BlockHeight256BytesC[],
631 int BlockWidth256BytesC[],
632 int DCCYMaxUncompressedBlock[],
633 int DCCCMaxUncompressedBlock[],
636 bool WritebackEnable[],
637 double ReadBandwidthPlaneLuma[],
638 double ReadBandwidthPlaneChroma[],
639 double meta_row_bw[],
640 double dpte_row_bw[],
641 double *StutterEfficiencyNotIncludingVBlank,
642 double *StutterEfficiency,
643 int *NumberOfStutterBurstsPerFrame,
644 double *Z8StutterEfficiencyNotIncludingVBlank,
645 double *Z8StutterEfficiency,
646 int *Z8NumberOfStutterBurstsPerFrame,
647 double *StutterPeriod);
649 static void CalculateSwathAndDETConfiguration(
651 int NumberOfActivePlanes,
652 unsigned int DETBufferSizeInKByte,
653 double MaximumSwathWidthLuma[],
654 double MaximumSwathWidthChroma[],
655 enum scan_direction_class SourceScan[],
656 enum source_format_class SourcePixelFormat[],
657 enum dm_swizzle_mode SurfaceTiling[],
659 int ViewportHeight[],
662 int SurfaceHeightY[],
663 int SurfaceHeightC[],
664 int Read256BytesBlockHeightY[],
665 int Read256BytesBlockHeightC[],
666 int Read256BytesBlockWidthY[],
667 int Read256BytesBlockWidthC[],
668 enum odm_combine_mode ODMCombineEnabled[],
669 int BlendingAndTiming[],
672 double BytePerPixDETY[],
673 double BytePerPixDETC[],
676 double HRatioChroma[],
678 int swath_width_luma_ub[],
679 int swath_width_chroma_ub[],
681 double SwathWidthChroma[],
684 unsigned int DETBufferSizeY[],
685 unsigned int DETBufferSizeC[],
686 bool ViewportSizeSupportPerPlane[],
687 bool *ViewportSizeSupport);
688 static void CalculateSwathWidth(
690 int NumberOfActivePlanes,
691 enum source_format_class SourcePixelFormat[],
692 enum scan_direction_class SourceScan[],
694 int ViewportHeight[],
697 int SurfaceHeightY[],
698 int SurfaceHeightC[],
699 enum odm_combine_mode ODMCombineEnabled[],
702 int Read256BytesBlockHeightY[],
703 int Read256BytesBlockHeightC[],
704 int Read256BytesBlockWidthY[],
705 int Read256BytesBlockWidthC[],
706 int BlendingAndTiming[],
710 double SwathWidthSingleDPPY[],
711 double SwathWidthSingleDPPC[],
712 double SwathWidthY[],
713 double SwathWidthC[],
714 int MaximumSwathHeightY[],
715 int MaximumSwathHeightC[],
716 int swath_width_luma_ub[],
717 int swath_width_chroma_ub[]);
719 static double CalculateExtraLatency(
720 int RoundTripPingLatencyCycles,
723 int TotalNumberOfActiveDPP,
724 int PixelChunkSizeInKByte,
725 int TotalNumberOfDCCActiveDPP,
730 int NumberOfActivePlanes,
732 int dpte_group_bytes[],
733 double HostVMInefficiencyFactor,
734 double HostVMMinPageSize,
735 int HostVMMaxNonCachedPageTableLevels);
737 static double CalculateExtraLatencyBytes(
739 int TotalNumberOfActiveDPP,
740 int PixelChunkSizeInKByte,
741 int TotalNumberOfDCCActiveDPP,
745 int NumberOfActivePlanes,
747 int dpte_group_bytes[],
748 double HostVMInefficiencyFactor,
749 double HostVMMinPageSize,
750 int HostVMMaxNonCachedPageTableLevels);
752 static double CalculateUrgentLatency(
753 double UrgentLatencyPixelDataOnly,
754 double UrgentLatencyPixelMixedWithVMData,
755 double UrgentLatencyVMDataOnly,
756 bool DoUrgentLatencyAdjustment,
757 double UrgentLatencyAdjustmentFabricClockComponent,
758 double UrgentLatencyAdjustmentFabricClockReference,
759 double FabricClockSingle);
761 static void CalculateUnboundedRequestAndCompressedBufferSize(
762 unsigned int DETBufferSizeInKByte,
763 int ConfigReturnBufferSizeInKByte,
764 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
768 int CompressedBufferSegmentSizeInkByteFinal,
769 enum output_encoder_class *Output,
770 bool *UnboundedRequestEnabled,
771 int *CompressedBufferSizeInkByte);
773 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
775 void dml31_recalculate(struct display_mode_lib *mode_lib)
777 ModeSupportAndSystemConfiguration(mode_lib);
778 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
779 DisplayPipeConfiguration(mode_lib);
780 #ifdef __DML_VBA_DEBUG__
781 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
783 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
786 static unsigned int dscceComputeDelay(
789 unsigned int sliceWidth,
790 unsigned int numSlices,
791 enum output_format_class pixelFormat,
792 enum output_encoder_class Output)
794 // valid bpc = source bits per component in the set of {8, 10, 12}
795 // valid bpp = increments of 1/16 of a bit
796 // min = 6/7/8 in N420/N422/444, respectively
797 // max = such that compression is 1:1
798 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
799 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
800 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
803 unsigned int rcModelSize = 8192;
805 // N422/N420 operate at 2 pixels per clock
806 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
808 if (pixelFormat == dm_420)
810 else if (pixelFormat == dm_444)
812 else if (pixelFormat == dm_n422)
814 // #all other modes operate at 1 pixel per clock
818 //initial transmit delay as per PPS
819 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
829 //divide by pixel per cycle to compute slice width as seen by DSC
830 w = sliceWidth / pixelsPerClock;
832 //422 mode has an additional cycle of delay
833 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
838 //main calculation for the dscce
839 ix = initalXmitDelay + 45;
844 ax = (a + 2) / 3 + D + 6 + 1;
845 L = (ax + wx - 1) / wx;
846 if ((ix % w) == 0 && P != 0)
850 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
852 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
853 pixels = Delay * 3 * pixelsPerClock;
857 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
859 unsigned int Delay = 0;
861 if (pixelFormat == dm_420) {
866 // dscc - input deserializer
868 // dscc gets pixels every other cycle
870 // dscc - input cdc fifo
872 // dscc gets pixels every other cycle
874 // dscc - cdc uncertainty
876 // dscc - output cdc fifo
878 // dscc gets pixels every other cycle
880 // dscc - cdc uncertainty
882 // dscc - output serializer
886 } else if (pixelFormat == dm_n422) {
891 // dscc - input deserializer
893 // dscc - input cdc fifo
895 // dscc - cdc uncertainty
897 // dscc - output cdc fifo
899 // dscc - cdc uncertainty
901 // dscc - output serializer
910 // dscc - input deserializer
912 // dscc - input cdc fifo
914 // dscc - cdc uncertainty
916 // dscc - output cdc fifo
918 // dscc - output serializer
920 // dscc - cdc uncertainty
929 static bool CalculatePrefetchSchedule(
930 struct display_mode_lib *mode_lib,
931 double HostVMInefficiencyFactor,
933 unsigned int DSCDelay,
934 double DPPCLKDelaySubtotalPlusCNVCFormater,
935 double DPPCLKDelaySCL,
936 double DPPCLKDelaySCLLBOnly,
937 double DPPCLKDelayCNVCCursor,
938 double DISPCLKDelaySubtotal,
939 unsigned int DPP_RECOUT_WIDTH,
940 enum output_format_class OutputFormat,
941 unsigned int MaxInterDCNTileRepeaters,
942 unsigned int VStartup,
943 unsigned int MaxVStartup,
944 unsigned int GPUVMPageTableLevels,
947 unsigned int HostVMMaxNonCachedPageTableLevels,
948 double HostVMMinPageSize,
949 bool DynamicMetadataEnable,
950 bool DynamicMetadataVMEnabled,
951 int DynamicMetadataLinesBeforeActiveRequired,
952 unsigned int DynamicMetadataTransmittedBytes,
953 double UrgentLatency,
954 double UrgentExtraLatency,
956 unsigned int PDEAndMetaPTEBytesFrame,
957 unsigned int MetaRowByte,
958 unsigned int PixelPTEBytesPerRow,
959 double PrefetchSourceLinesY,
960 unsigned int SwathWidthY,
961 double VInitPreFillY,
962 unsigned int MaxNumSwathY,
963 double PrefetchSourceLinesC,
964 unsigned int SwathWidthC,
965 double VInitPreFillC,
966 unsigned int MaxNumSwathC,
967 int swath_width_luma_ub,
968 int swath_width_chroma_ub,
969 unsigned int SwathHeightY,
970 unsigned int SwathHeightC,
972 double *DSTXAfterScaler,
973 double *DSTYAfterScaler,
974 double *DestinationLinesForPrefetch,
975 double *PrefetchBandwidth,
976 double *DestinationLinesToRequestVMInVBlank,
977 double *DestinationLinesToRequestRowInVBlank,
978 double *VRatioPrefetchY,
979 double *VRatioPrefetchC,
980 double *RequiredPrefetchPixDataBWLuma,
981 double *RequiredPrefetchPixDataBWChroma,
982 bool *NotEnoughTimeForDynamicMetadata,
984 double *prefetch_vmrow_bw,
988 int *VUpdateOffsetPix,
989 double *VUpdateWidthPix,
990 double *VReadyOffsetPix)
992 bool MyError = false;
993 unsigned int DPPCycles, DISPCLKCycles;
994 double DSTTotalPixelsAfterScaler;
996 double dst_y_prefetch_equ;
998 double prefetch_bw_oto;
1001 double Tvm_oto_lines;
1002 double Tr0_oto_lines;
1003 double dst_y_prefetch_oto;
1004 double TimeForFetchingMetaPTE = 0;
1005 double TimeForFetchingRowInVBlank = 0;
1006 double LinesToRequestPrefetchPixelData = 0;
1007 unsigned int HostVMDynamicLevelsTrips;
1011 double Tvm_trips_rounded;
1012 double Tr0_trips_rounded;
1014 double Tpre_rounded;
1015 double prefetch_bw_equ;
1021 double prefetch_sw_bytes;
1024 int max_vratio_pre = 4;
1026 double Tsw_est1 = 0;
1027 double Tsw_est3 = 0;
1029 if (GPUVMEnable == true && HostVMEnable == true) {
1030 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
1032 HostVMDynamicLevelsTrips = 0;
1034 #ifdef __DML_VBA_DEBUG__
1035 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
1037 CalculateVupdateAndDynamicMetadataParameters(
1038 MaxInterDCNTileRepeaters,
1041 myPipe->DCFCLKDeepSleep,
1045 DynamicMetadataTransmittedBytes,
1046 DynamicMetadataLinesBeforeActiveRequired,
1047 myPipe->InterlaceEnable,
1048 myPipe->ProgressiveToInterlaceUnitInOPP,
1057 LineTime = myPipe->HTotal / myPipe->PixelClock;
1058 trip_to_mem = UrgentLatency;
1059 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
1061 #ifdef __DML_VBA_ALLOW_DELTA__
1062 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
1064 if (DynamicMetadataVMEnabled == true) {
1066 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
1068 *Tdmdl = TWait + UrgentExtraLatency;
1071 #ifdef __DML_VBA_ALLOW_DELTA__
1072 if (DynamicMetadataEnable == false) {
1077 if (DynamicMetadataEnable == true) {
1078 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1079 *NotEnoughTimeForDynamicMetadata = true;
1080 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
1081 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
1082 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
1083 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
1084 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
1086 *NotEnoughTimeForDynamicMetadata = false;
1089 *NotEnoughTimeForDynamicMetadata = false;
1092 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1094 if (myPipe->ScalerEnabled)
1095 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1097 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1099 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1101 DISPCLKCycles = DISPCLKDelaySubtotal;
1103 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1106 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1108 #ifdef __DML_VBA_DEBUG__
1109 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1110 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1111 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1112 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1113 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1114 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1115 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1116 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1119 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1121 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1122 *DSTYAfterScaler = 1;
1124 *DSTYAfterScaler = 0;
1126 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1127 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1128 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1130 #ifdef __DML_VBA_DEBUG__
1131 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1136 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1137 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1138 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1140 #ifdef __DML_VBA_ALLOW_DELTA__
1141 if (!myPipe->DCCEnable) {
1143 Tr0_trips_rounded = 0.0;
1149 Tvm_trips_rounded = 0.0;
1153 if (GPUVMPageTableLevels >= 3) {
1154 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1158 } else if (!myPipe->DCCEnable) {
1161 *Tno_bw = LineTime / 4;
1164 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1165 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1167 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1169 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1170 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1172 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
1173 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1174 Tsw_oto = Lsw_oto * LineTime;
1176 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1178 #ifdef __DML_VBA_DEBUG__
1179 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1180 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1181 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1182 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1183 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1184 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1187 if (GPUVMEnable == true)
1188 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1190 Tvm_oto = LineTime / 4.0;
1192 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1193 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1197 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1200 #ifdef __DML_VBA_DEBUG__
1201 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1202 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1203 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1204 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1205 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1206 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1207 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1208 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1209 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1212 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1213 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1214 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1215 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1216 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1217 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1219 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1221 if (prefetch_sw_bytes < dep_bytes)
1222 prefetch_sw_bytes = 2 * dep_bytes;
1224 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1225 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1226 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1227 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1228 dml_print("DML: LineTime: %f\n", LineTime);
1229 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1231 dml_print("DML: LineTime: %f\n", LineTime);
1232 dml_print("DML: VStartup: %d\n", VStartup);
1233 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1234 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1235 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1236 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1237 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1238 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1239 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1240 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1241 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1242 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1243 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1245 *PrefetchBandwidth = 0;
1246 *DestinationLinesToRequestVMInVBlank = 0;
1247 *DestinationLinesToRequestRowInVBlank = 0;
1248 *VRatioPrefetchY = 0;
1249 *VRatioPrefetchC = 0;
1250 *RequiredPrefetchPixDataBWLuma = 0;
1251 if (dst_y_prefetch_equ > 1) {
1252 double PrefetchBandwidth1;
1253 double PrefetchBandwidth2;
1254 double PrefetchBandwidth3;
1255 double PrefetchBandwidth4;
1257 if (Tpre_rounded - *Tno_bw > 0) {
1258 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1259 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1260 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1262 PrefetchBandwidth1 = 0;
1265 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1266 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1267 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1270 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1271 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1273 PrefetchBandwidth2 = 0;
1275 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1276 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1277 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1278 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1280 PrefetchBandwidth3 = 0;
1283 #ifdef __DML_VBA_DEBUG__
1284 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1285 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1286 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1288 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1289 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1290 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1293 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1294 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1296 PrefetchBandwidth4 = 0;
1303 if (PrefetchBandwidth1 > 0) {
1304 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1305 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1314 if (PrefetchBandwidth2 > 0) {
1315 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1316 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1325 if (PrefetchBandwidth3 > 0) {
1326 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1327 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1337 prefetch_bw_equ = PrefetchBandwidth1;
1338 } else if (Case2OK) {
1339 prefetch_bw_equ = PrefetchBandwidth2;
1340 } else if (Case3OK) {
1341 prefetch_bw_equ = PrefetchBandwidth3;
1343 prefetch_bw_equ = PrefetchBandwidth4;
1346 #ifdef __DML_VBA_DEBUG__
1347 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1348 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1349 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1350 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1353 if (prefetch_bw_equ > 0) {
1354 if (GPUVMEnable == true) {
1355 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1357 Tvm_equ = LineTime / 4;
1360 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1362 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1364 (LineTime - Tvm_equ) / 2,
1367 Tr0_equ = (LineTime - Tvm_equ) / 2;
1372 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1376 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1377 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1378 TimeForFetchingMetaPTE = Tvm_oto;
1379 TimeForFetchingRowInVBlank = Tr0_oto;
1380 *PrefetchBandwidth = prefetch_bw_oto;
1382 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1383 TimeForFetchingMetaPTE = Tvm_equ;
1384 TimeForFetchingRowInVBlank = Tr0_equ;
1385 *PrefetchBandwidth = prefetch_bw_equ;
1388 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1390 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1392 #ifdef __DML_VBA_ALLOW_DELTA__
1393 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1394 // See note above dated 5/30/2018
1395 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1396 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1398 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1401 #ifdef __DML_VBA_DEBUG__
1402 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1403 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1404 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1405 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1406 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1407 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1408 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1411 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1413 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1414 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1415 #ifdef __DML_VBA_DEBUG__
1416 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1417 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1418 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1420 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1421 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1422 *VRatioPrefetchY = dml_max(
1423 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1424 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1425 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1428 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1429 *VRatioPrefetchY = 0;
1431 #ifdef __DML_VBA_DEBUG__
1432 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1433 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1434 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1438 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1439 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1441 #ifdef __DML_VBA_DEBUG__
1442 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1443 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1444 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1446 if ((SwathHeightC > 4)) {
1447 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1448 *VRatioPrefetchC = dml_max(
1450 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1451 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1454 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1455 *VRatioPrefetchC = 0;
1457 #ifdef __DML_VBA_DEBUG__
1458 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1459 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1460 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1464 #ifdef __DML_VBA_DEBUG__
1465 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1466 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1467 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1470 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1472 #ifdef __DML_VBA_DEBUG__
1473 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1476 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1480 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1481 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1482 *VRatioPrefetchY = 0;
1483 *VRatioPrefetchC = 0;
1484 *RequiredPrefetchPixDataBWLuma = 0;
1485 *RequiredPrefetchPixDataBWChroma = 0;
1489 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1490 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1491 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1492 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1494 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1495 (double) LinesToRequestPrefetchPixelData * LineTime);
1496 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1497 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1498 (double) myPipe->HTotal)) * LineTime);
1499 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1500 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1501 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1502 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1503 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1507 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1511 double prefetch_vm_bw;
1512 double prefetch_row_bw;
1514 if (PDEAndMetaPTEBytesFrame == 0) {
1516 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1517 #ifdef __DML_VBA_DEBUG__
1518 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1519 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1520 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1521 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1523 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1524 #ifdef __DML_VBA_DEBUG__
1525 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1530 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1533 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1534 prefetch_row_bw = 0;
1535 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1536 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1538 #ifdef __DML_VBA_DEBUG__
1539 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1540 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1541 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1542 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1545 prefetch_row_bw = 0;
1547 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1550 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1554 *PrefetchBandwidth = 0;
1555 TimeForFetchingMetaPTE = 0;
1556 TimeForFetchingRowInVBlank = 0;
1557 *DestinationLinesToRequestVMInVBlank = 0;
1558 *DestinationLinesToRequestRowInVBlank = 0;
1559 *DestinationLinesForPrefetch = 0;
1560 LinesToRequestPrefetchPixelData = 0;
1561 *VRatioPrefetchY = 0;
1562 *VRatioPrefetchC = 0;
1563 *RequiredPrefetchPixDataBWLuma = 0;
1564 *RequiredPrefetchPixDataBWChroma = 0;
1570 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1572 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1575 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1577 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1580 static void CalculateDCCConfiguration(
1582 bool DCCProgrammingAssumesScanDirectionUnknown,
1583 enum source_format_class SourcePixelFormat,
1584 unsigned int SurfaceWidthLuma,
1585 unsigned int SurfaceWidthChroma,
1586 unsigned int SurfaceHeightLuma,
1587 unsigned int SurfaceHeightChroma,
1588 double DETBufferSize,
1589 unsigned int RequestHeight256ByteLuma,
1590 unsigned int RequestHeight256ByteChroma,
1591 enum dm_swizzle_mode TilingFormat,
1592 unsigned int BytePerPixelY,
1593 unsigned int BytePerPixelC,
1594 double BytePerPixelDETY,
1595 double BytePerPixelDETC,
1596 enum scan_direction_class ScanOrientation,
1597 unsigned int *MaxUncompressedBlockLuma,
1598 unsigned int *MaxUncompressedBlockChroma,
1599 unsigned int *MaxCompressedBlockLuma,
1600 unsigned int *MaxCompressedBlockChroma,
1601 unsigned int *IndependentBlockLuma,
1602 unsigned int *IndependentBlockChroma)
1611 double detile_buf_vp_horz_limit;
1612 double detile_buf_vp_vert_limit;
1614 int MAS_vp_horz_limit;
1615 int MAS_vp_vert_limit;
1616 int max_vp_horz_width;
1617 int max_vp_vert_height;
1618 int eff_surf_width_l;
1619 int eff_surf_width_c;
1620 int eff_surf_height_l;
1621 int eff_surf_height_c;
1623 int full_swath_bytes_horz_wc_l;
1624 int full_swath_bytes_horz_wc_c;
1625 int full_swath_bytes_vert_wc_l;
1626 int full_swath_bytes_vert_wc_c;
1627 int req128_horz_wc_l;
1628 int req128_horz_wc_c;
1629 int req128_vert_wc_l;
1630 int req128_vert_wc_c;
1631 int segment_order_horz_contiguous_luma;
1632 int segment_order_horz_contiguous_chroma;
1633 int segment_order_vert_contiguous_luma;
1634 int segment_order_vert_contiguous_chroma;
1637 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1639 RequestType RequestLuma;
1640 RequestType RequestChroma;
1642 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1648 if (BytePerPixelY == 1)
1650 if (BytePerPixelC == 1)
1652 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1654 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1657 if (BytePerPixelC == 0) {
1658 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1659 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1660 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1662 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1663 detile_buf_vp_horz_limit = (double) swath_buf_size
1664 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1665 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1666 detile_buf_vp_vert_limit = (double) swath_buf_size
1667 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1670 if (SourcePixelFormat == dm_420_10) {
1671 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1672 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1675 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1676 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1678 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1679 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1680 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1681 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1682 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1683 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1684 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1685 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1687 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1688 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1689 if (BytePerPixelC > 0) {
1690 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1691 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1693 full_swath_bytes_horz_wc_c = 0;
1694 full_swath_bytes_vert_wc_c = 0;
1697 if (SourcePixelFormat == dm_420_10) {
1698 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1699 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1700 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1701 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1704 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1705 req128_horz_wc_l = 0;
1706 req128_horz_wc_c = 0;
1707 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1708 req128_horz_wc_l = 0;
1709 req128_horz_wc_c = 1;
1710 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1711 req128_horz_wc_l = 1;
1712 req128_horz_wc_c = 0;
1714 req128_horz_wc_l = 1;
1715 req128_horz_wc_c = 1;
1718 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1719 req128_vert_wc_l = 0;
1720 req128_vert_wc_c = 0;
1721 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1722 req128_vert_wc_l = 0;
1723 req128_vert_wc_c = 1;
1724 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1725 req128_vert_wc_l = 1;
1726 req128_vert_wc_c = 0;
1728 req128_vert_wc_l = 1;
1729 req128_vert_wc_c = 1;
1732 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1733 segment_order_horz_contiguous_luma = 0;
1735 segment_order_horz_contiguous_luma = 1;
1737 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1738 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1739 segment_order_vert_contiguous_luma = 0;
1741 segment_order_vert_contiguous_luma = 1;
1743 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1744 segment_order_horz_contiguous_chroma = 0;
1746 segment_order_horz_contiguous_chroma = 1;
1748 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1749 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1750 segment_order_vert_contiguous_chroma = 0;
1752 segment_order_vert_contiguous_chroma = 1;
1755 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1756 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1757 RequestLuma = REQ_256Bytes;
1758 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1759 RequestLuma = REQ_128BytesNonContiguous;
1761 RequestLuma = REQ_128BytesContiguous;
1763 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1764 RequestChroma = REQ_256Bytes;
1765 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1766 RequestChroma = REQ_128BytesNonContiguous;
1768 RequestChroma = REQ_128BytesContiguous;
1770 } else if (ScanOrientation != dm_vert) {
1771 if (req128_horz_wc_l == 0) {
1772 RequestLuma = REQ_256Bytes;
1773 } else if (segment_order_horz_contiguous_luma == 0) {
1774 RequestLuma = REQ_128BytesNonContiguous;
1776 RequestLuma = REQ_128BytesContiguous;
1778 if (req128_horz_wc_c == 0) {
1779 RequestChroma = REQ_256Bytes;
1780 } else if (segment_order_horz_contiguous_chroma == 0) {
1781 RequestChroma = REQ_128BytesNonContiguous;
1783 RequestChroma = REQ_128BytesContiguous;
1786 if (req128_vert_wc_l == 0) {
1787 RequestLuma = REQ_256Bytes;
1788 } else if (segment_order_vert_contiguous_luma == 0) {
1789 RequestLuma = REQ_128BytesNonContiguous;
1791 RequestLuma = REQ_128BytesContiguous;
1793 if (req128_vert_wc_c == 0) {
1794 RequestChroma = REQ_256Bytes;
1795 } else if (segment_order_vert_contiguous_chroma == 0) {
1796 RequestChroma = REQ_128BytesNonContiguous;
1798 RequestChroma = REQ_128BytesContiguous;
1802 if (RequestLuma == REQ_256Bytes) {
1803 *MaxUncompressedBlockLuma = 256;
1804 *MaxCompressedBlockLuma = 256;
1805 *IndependentBlockLuma = 0;
1806 } else if (RequestLuma == REQ_128BytesContiguous) {
1807 *MaxUncompressedBlockLuma = 256;
1808 *MaxCompressedBlockLuma = 128;
1809 *IndependentBlockLuma = 128;
1811 *MaxUncompressedBlockLuma = 256;
1812 *MaxCompressedBlockLuma = 64;
1813 *IndependentBlockLuma = 64;
1816 if (RequestChroma == REQ_256Bytes) {
1817 *MaxUncompressedBlockChroma = 256;
1818 *MaxCompressedBlockChroma = 256;
1819 *IndependentBlockChroma = 0;
1820 } else if (RequestChroma == REQ_128BytesContiguous) {
1821 *MaxUncompressedBlockChroma = 256;
1822 *MaxCompressedBlockChroma = 128;
1823 *IndependentBlockChroma = 128;
1825 *MaxUncompressedBlockChroma = 256;
1826 *MaxCompressedBlockChroma = 64;
1827 *IndependentBlockChroma = 64;
1830 if (DCCEnabled != true || BytePerPixelC == 0) {
1831 *MaxUncompressedBlockChroma = 0;
1832 *MaxCompressedBlockChroma = 0;
1833 *IndependentBlockChroma = 0;
1836 if (DCCEnabled != true) {
1837 *MaxUncompressedBlockLuma = 0;
1838 *MaxCompressedBlockLuma = 0;
1839 *IndependentBlockLuma = 0;
1843 static double CalculatePrefetchSourceLines(
1844 struct display_mode_lib *mode_lib,
1848 bool ProgressiveToInterlaceUnitInOPP,
1849 unsigned int SwathHeight,
1850 unsigned int ViewportYStart,
1851 double *VInitPreFill,
1852 unsigned int *MaxNumSwath)
1854 struct vba_vars_st *v = &mode_lib->vba;
1855 unsigned int MaxPartialSwath;
1857 if (ProgressiveToInterlaceUnitInOPP)
1858 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1860 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1862 if (!v->IgnoreViewportPositioning) {
1864 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1866 if (*VInitPreFill > 1.0)
1867 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1869 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1870 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1874 if (ViewportYStart != 0)
1875 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1877 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1879 if (*VInitPreFill > 1.0)
1880 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1882 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1885 #ifdef __DML_VBA_DEBUG__
1886 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1887 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1888 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1889 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1890 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1891 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1892 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1893 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1894 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1896 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1899 static unsigned int CalculateVMAndRowBytes(
1900 struct display_mode_lib *mode_lib,
1902 unsigned int BlockHeight256Bytes,
1903 unsigned int BlockWidth256Bytes,
1904 enum source_format_class SourcePixelFormat,
1905 unsigned int SurfaceTiling,
1906 unsigned int BytePerPixel,
1907 enum scan_direction_class ScanDirection,
1908 unsigned int SwathWidth,
1909 unsigned int ViewportHeight,
1912 unsigned int HostVMMaxNonCachedPageTableLevels,
1913 unsigned int GPUVMMinPageSize,
1914 unsigned int HostVMMinPageSize,
1915 unsigned int PTEBufferSizeInRequests,
1917 unsigned int DCCMetaPitch,
1918 unsigned int *MacroTileWidth,
1919 unsigned int *MetaRowByte,
1920 unsigned int *PixelPTEBytesPerRow,
1921 bool *PTEBufferSizeNotExceeded,
1922 int *dpte_row_width_ub,
1923 unsigned int *dpte_row_height,
1924 unsigned int *MetaRequestWidth,
1925 unsigned int *MetaRequestHeight,
1926 unsigned int *meta_row_width,
1927 unsigned int *meta_row_height,
1928 int *vm_group_bytes,
1929 unsigned int *dpte_group_bytes,
1930 unsigned int *PixelPTEReqWidth,
1931 unsigned int *PixelPTEReqHeight,
1932 unsigned int *PTERequestSize,
1933 int *DPDE0BytesFrame,
1934 int *MetaPTEBytesFrame)
1936 struct vba_vars_st *v = &mode_lib->vba;
1937 unsigned int MPDEBytesFrame;
1938 unsigned int DCCMetaSurfaceBytes;
1939 unsigned int MacroTileSizeBytes;
1940 unsigned int MacroTileHeight;
1941 unsigned int ExtraDPDEBytesFrame;
1942 unsigned int PDEAndMetaPTEBytesFrame;
1943 unsigned int PixelPTEReqHeightPTEs = 0;
1944 unsigned int HostVMDynamicLevels = 0;
1945 double FractionOfPTEReturnDrop;
1947 if (GPUVMEnable == true && HostVMEnable == true) {
1948 if (HostVMMinPageSize < 2048) {
1949 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1950 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1951 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1953 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1957 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1958 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1959 if (ScanDirection != dm_vert) {
1960 *meta_row_height = *MetaRequestHeight;
1961 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1962 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1964 *meta_row_height = *MetaRequestWidth;
1965 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1966 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1968 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1969 if (GPUVMEnable == true) {
1970 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1971 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1973 *MetaPTEBytesFrame = 0;
1977 if (DCCEnable != true) {
1978 *MetaPTEBytesFrame = 0;
1983 if (SurfaceTiling == dm_sw_linear) {
1984 MacroTileSizeBytes = 256;
1985 MacroTileHeight = BlockHeight256Bytes;
1987 MacroTileSizeBytes = 65536;
1988 MacroTileHeight = 16 * BlockHeight256Bytes;
1990 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1992 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1993 if (ScanDirection != dm_vert) {
1994 *DPDE0BytesFrame = 64
1996 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
2000 *DPDE0BytesFrame = 64
2002 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
2006 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
2008 *DPDE0BytesFrame = 0;
2009 ExtraDPDEBytesFrame = 0;
2012 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2014 #ifdef __DML_VBA_DEBUG__
2015 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2016 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2017 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2018 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2019 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2022 if (HostVMEnable == true) {
2023 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2025 #ifdef __DML_VBA_DEBUG__
2026 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2029 if (SurfaceTiling == dm_sw_linear) {
2030 PixelPTEReqHeightPTEs = 1;
2031 *PixelPTEReqHeight = 1;
2032 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
2033 *PTERequestSize = 64;
2034 FractionOfPTEReturnDrop = 0;
2035 } else if (MacroTileSizeBytes == 4096) {
2036 PixelPTEReqHeightPTEs = 1;
2037 *PixelPTEReqHeight = MacroTileHeight;
2038 *PixelPTEReqWidth = 8 * *MacroTileWidth;
2039 *PTERequestSize = 64;
2040 if (ScanDirection != dm_vert)
2041 FractionOfPTEReturnDrop = 0;
2043 FractionOfPTEReturnDrop = 7 / 8;
2044 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
2045 PixelPTEReqHeightPTEs = 16;
2046 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2047 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2048 *PTERequestSize = 128;
2049 FractionOfPTEReturnDrop = 0;
2051 PixelPTEReqHeightPTEs = 1;
2052 *PixelPTEReqHeight = MacroTileHeight;
2053 *PixelPTEReqWidth = 8 * *MacroTileWidth;
2054 *PTERequestSize = 64;
2055 FractionOfPTEReturnDrop = 0;
2058 if (SurfaceTiling == dm_sw_linear) {
2059 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2060 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2061 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2062 } else if (ScanDirection != dm_vert) {
2063 *dpte_row_height = *PixelPTEReqHeight;
2064 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2065 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2067 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
2068 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
2069 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2072 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
2073 *PTEBufferSizeNotExceeded = true;
2075 *PTEBufferSizeNotExceeded = false;
2078 if (GPUVMEnable != true) {
2079 *PixelPTEBytesPerRow = 0;
2080 *PTEBufferSizeNotExceeded = true;
2083 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
2085 if (HostVMEnable == true) {
2086 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2089 if (HostVMEnable == true) {
2090 *vm_group_bytes = 512;
2091 *dpte_group_bytes = 512;
2092 } else if (GPUVMEnable == true) {
2093 *vm_group_bytes = 2048;
2094 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2095 *dpte_group_bytes = 512;
2097 *dpte_group_bytes = 2048;
2100 *vm_group_bytes = 0;
2101 *dpte_group_bytes = 0;
2103 return PDEAndMetaPTEBytesFrame;
2106 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2108 struct vba_vars_st *v = &mode_lib->vba;
2110 double HostVMInefficiencyFactor = 1.0;
2111 bool NoChromaPlanes = true;
2113 double VMDataOnlyReturnBW;
2114 double MaxTotalRDBandwidth = 0;
2115 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2117 v->WritebackDISPCLK = 0.0;
2118 v->DISPCLKWithRamping = 0;
2119 v->DISPCLKWithoutRamping = 0;
2120 v->GlobalDPPCLK = 0.0;
2121 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2123 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2124 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2125 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2126 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2127 if (v->HostVMEnable != true) {
2128 v->ReturnBW = dml_min(
2129 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2130 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2132 v->ReturnBW = dml_min(
2133 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2134 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2137 /* End DAL custom code */
2139 // DISPCLK and DPPCLK Calculation
2141 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2142 if (v->WritebackEnable[k]) {
2143 v->WritebackDISPCLK = dml_max(
2144 v->WritebackDISPCLK,
2145 dml31_CalculateWriteBackDISPCLK(
2146 v->WritebackPixelFormat[k],
2148 v->WritebackHRatio[k],
2149 v->WritebackVRatio[k],
2150 v->WritebackHTaps[k],
2151 v->WritebackVTaps[k],
2152 v->WritebackSourceWidth[k],
2153 v->WritebackDestinationWidth[k],
2155 v->WritebackLineBufferSize));
2159 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2160 if (v->HRatio[k] > 1) {
2161 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2162 v->MaxDCHUBToPSCLThroughput,
2163 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2165 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2168 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2170 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2171 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2173 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2174 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2177 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2178 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2179 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2180 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2182 if (v->HRatioChroma[k] > 1) {
2183 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2184 v->MaxDCHUBToPSCLThroughput,
2185 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2187 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2189 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2191 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2192 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2195 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2196 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2199 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2203 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2204 if (v->BlendingAndTiming[k] != k)
2206 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2207 v->DISPCLKWithRamping = dml_max(
2208 v->DISPCLKWithRamping,
2209 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2210 * (1 + v->DISPCLKRampingMargin / 100));
2211 v->DISPCLKWithoutRamping = dml_max(
2212 v->DISPCLKWithoutRamping,
2213 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2214 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2215 v->DISPCLKWithRamping = dml_max(
2216 v->DISPCLKWithRamping,
2217 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2218 * (1 + v->DISPCLKRampingMargin / 100));
2219 v->DISPCLKWithoutRamping = dml_max(
2220 v->DISPCLKWithoutRamping,
2221 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2223 v->DISPCLKWithRamping = dml_max(
2224 v->DISPCLKWithRamping,
2225 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2226 v->DISPCLKWithoutRamping = dml_max(
2227 v->DISPCLKWithoutRamping,
2228 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2232 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2233 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2235 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2236 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2237 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2238 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2239 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2240 v->DISPCLKDPPCLKVCOSpeed);
2241 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2242 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2243 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2244 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2246 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2248 v->DISPCLK = v->DISPCLK_calculated;
2249 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2251 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2252 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2253 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2255 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2256 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2257 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2258 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2261 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2262 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2265 // Urgent and B P-State/DRAM Clock Change Watermark
2266 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2267 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2269 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2270 CalculateBytePerPixelAnd256BBlockSizes(
2271 v->SourcePixelFormat[k],
2272 v->SurfaceTiling[k],
2273 &v->BytePerPixelY[k],
2274 &v->BytePerPixelC[k],
2275 &v->BytePerPixelDETY[k],
2276 &v->BytePerPixelDETC[k],
2277 &v->BlockHeight256BytesY[k],
2278 &v->BlockHeight256BytesC[k],
2279 &v->BlockWidth256BytesY[k],
2280 &v->BlockWidth256BytesC[k]);
2283 CalculateSwathWidth(
2285 v->NumberOfActivePlanes,
2286 v->SourcePixelFormat,
2294 v->ODMCombineEnabled,
2297 v->BlockHeight256BytesY,
2298 v->BlockHeight256BytesC,
2299 v->BlockWidth256BytesY,
2300 v->BlockWidth256BytesC,
2301 v->BlendingAndTiming,
2305 v->SwathWidthSingleDPPY,
2306 v->SwathWidthSingleDPPC,
2311 v->swath_width_luma_ub,
2312 v->swath_width_chroma_ub);
2314 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2315 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2317 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2318 * v->VRatioChroma[k];
2319 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2322 // DCFCLK Deep Sleep
2323 CalculateDCFCLKDeepSleep(
2325 v->NumberOfActivePlanes,
2336 v->PSCL_THROUGHPUT_LUMA,
2337 v->PSCL_THROUGHPUT_CHROMA,
2339 v->ReadBandwidthPlaneLuma,
2340 v->ReadBandwidthPlaneChroma,
2342 &v->DCFCLKDeepSleep);
2345 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2346 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2347 v->DSCCLK_calculated[k] = 0.0;
2349 if (v->OutputFormat[k] == dm_420)
2350 v->DSCFormatFactor = 2;
2351 else if (v->OutputFormat[k] == dm_444)
2352 v->DSCFormatFactor = 1;
2353 else if (v->OutputFormat[k] == dm_n422)
2354 v->DSCFormatFactor = 2;
2356 v->DSCFormatFactor = 1;
2357 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2358 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2359 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2360 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2361 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2362 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2364 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2365 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2370 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2371 double BPP = v->OutputBpp[k];
2373 if (v->DSCEnabled[k] && BPP != 0) {
2374 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2375 v->DSCDelay[k] = dscceComputeDelay(
2376 v->DSCInputBitPerComponent[k],
2378 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2379 v->NumberOfDSCSlices[k],
2381 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2382 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2384 * (dscceComputeDelay(
2385 v->DSCInputBitPerComponent[k],
2387 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2388 v->NumberOfDSCSlices[k] / 2.0,
2390 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2393 * (dscceComputeDelay(
2394 v->DSCInputBitPerComponent[k],
2396 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2397 v->NumberOfDSCSlices[k] / 4.0,
2399 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2401 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2407 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2408 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2409 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2410 v->DSCDelay[k] = v->DSCDelay[j];
2413 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2414 unsigned int PDEAndMetaPTEBytesFrameY;
2415 unsigned int PixelPTEBytesPerRowY;
2416 unsigned int MetaRowByteY;
2417 unsigned int MetaRowByteC;
2418 unsigned int PDEAndMetaPTEBytesFrameC;
2419 unsigned int PixelPTEBytesPerRowC;
2420 bool PTEBufferSizeNotExceededY;
2421 bool PTEBufferSizeNotExceededC;
2423 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2424 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2425 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2426 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2427 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2429 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2430 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2433 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2436 v->BlockHeight256BytesC[k],
2437 v->BlockWidth256BytesC[k],
2438 v->SourcePixelFormat[k],
2439 v->SurfaceTiling[k],
2440 v->BytePerPixelC[k],
2443 v->ViewportHeightChroma[k],
2446 v->HostVMMaxNonCachedPageTableLevels,
2447 v->GPUVMMinPageSize,
2448 v->HostVMMinPageSize,
2449 v->PTEBufferSizeInRequestsForChroma,
2451 v->DCCMetaPitchC[k],
2452 &v->MacroTileWidthC[k],
2454 &PixelPTEBytesPerRowC,
2455 &PTEBufferSizeNotExceededC,
2456 &v->dpte_row_width_chroma_ub[k],
2457 &v->dpte_row_height_chroma[k],
2458 &v->meta_req_width_chroma[k],
2459 &v->meta_req_height_chroma[k],
2460 &v->meta_row_width_chroma[k],
2461 &v->meta_row_height_chroma[k],
2464 &v->PixelPTEReqWidthC[k],
2465 &v->PixelPTEReqHeightC[k],
2466 &v->PTERequestSizeC[k],
2467 &v->dpde0_bytes_per_frame_ub_c[k],
2468 &v->meta_pte_bytes_per_frame_ub_c[k]);
2470 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2475 v->ProgressiveToInterlaceUnitInOPP,
2477 v->ViewportYStartC[k],
2478 &v->VInitPreFillC[k],
2479 &v->MaxNumSwathC[k]);
2481 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2482 v->PTEBufferSizeInRequestsForChroma = 0;
2483 PixelPTEBytesPerRowC = 0;
2484 PDEAndMetaPTEBytesFrameC = 0;
2486 v->MaxNumSwathC[k] = 0;
2487 v->PrefetchSourceLinesC[k] = 0;
2490 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2493 v->BlockHeight256BytesY[k],
2494 v->BlockWidth256BytesY[k],
2495 v->SourcePixelFormat[k],
2496 v->SurfaceTiling[k],
2497 v->BytePerPixelY[k],
2500 v->ViewportHeight[k],
2503 v->HostVMMaxNonCachedPageTableLevels,
2504 v->GPUVMMinPageSize,
2505 v->HostVMMinPageSize,
2506 v->PTEBufferSizeInRequestsForLuma,
2508 v->DCCMetaPitchY[k],
2509 &v->MacroTileWidthY[k],
2511 &PixelPTEBytesPerRowY,
2512 &PTEBufferSizeNotExceededY,
2513 &v->dpte_row_width_luma_ub[k],
2514 &v->dpte_row_height[k],
2515 &v->meta_req_width[k],
2516 &v->meta_req_height[k],
2517 &v->meta_row_width[k],
2518 &v->meta_row_height[k],
2519 &v->vm_group_bytes[k],
2520 &v->dpte_group_bytes[k],
2521 &v->PixelPTEReqWidthY[k],
2522 &v->PixelPTEReqHeightY[k],
2523 &v->PTERequestSizeY[k],
2524 &v->dpde0_bytes_per_frame_ub_l[k],
2525 &v->meta_pte_bytes_per_frame_ub_l[k]);
2527 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2532 v->ProgressiveToInterlaceUnitInOPP,
2534 v->ViewportYStartY[k],
2535 &v->VInitPreFillY[k],
2536 &v->MaxNumSwathY[k]);
2537 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2538 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2539 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2541 CalculateRowBandwidth(
2543 v->SourcePixelFormat[k],
2547 v->HTotal[k] / v->PixelClock[k],
2550 v->meta_row_height[k],
2551 v->meta_row_height_chroma[k],
2552 PixelPTEBytesPerRowY,
2553 PixelPTEBytesPerRowC,
2554 v->dpte_row_height[k],
2555 v->dpte_row_height_chroma[k],
2557 &v->dpte_row_bw[k]);
2560 v->TotalDCCActiveDPP = 0;
2561 v->TotalActiveDPP = 0;
2562 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2563 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2564 if (v->DCCEnable[k])
2565 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2566 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2567 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2568 NoChromaPlanes = false;
2571 ReorderBytes = v->NumberOfChannels
2573 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2574 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2575 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2577 VMDataOnlyReturnBW = dml_min(
2578 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2579 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2580 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2581 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2583 #ifdef __DML_VBA_DEBUG__
2584 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2585 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2586 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2587 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2588 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2589 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2590 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2591 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2592 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2593 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2594 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2597 if (v->GPUVMEnable && v->HostVMEnable)
2598 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2600 v->UrgentExtraLatency = CalculateExtraLatency(
2601 v->RoundTripPingLatencyCycles,
2605 v->PixelChunkSizeInKByte,
2606 v->TotalDCCActiveDPP,
2611 v->NumberOfActivePlanes,
2613 v->dpte_group_bytes,
2614 HostVMInefficiencyFactor,
2615 v->HostVMMinPageSize,
2616 v->HostVMMaxNonCachedPageTableLevels);
2618 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2620 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2621 if (v->BlendingAndTiming[k] == k) {
2622 if (v->WritebackEnable[k] == true) {
2623 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2624 + CalculateWriteBackDelay(
2625 v->WritebackPixelFormat[k],
2626 v->WritebackHRatio[k],
2627 v->WritebackVRatio[k],
2628 v->WritebackVTaps[k],
2629 v->WritebackDestinationWidth[k],
2630 v->WritebackDestinationHeight[k],
2631 v->WritebackSourceHeight[k],
2632 v->HTotal[k]) / v->DISPCLK;
2634 v->WritebackDelay[v->VoltageLevel][k] = 0;
2635 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2636 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2637 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2638 v->WritebackDelay[v->VoltageLevel][k],
2640 + CalculateWriteBackDelay(
2641 v->WritebackPixelFormat[j],
2642 v->WritebackHRatio[j],
2643 v->WritebackVRatio[j],
2644 v->WritebackVTaps[j],
2645 v->WritebackDestinationWidth[j],
2646 v->WritebackDestinationHeight[j],
2647 v->WritebackSourceHeight[j],
2648 v->HTotal[k]) / v->DISPCLK);
2654 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2655 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2656 if (v->BlendingAndTiming[k] == j)
2657 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2659 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2660 v->MaxVStartupLines[k] =
2661 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2662 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2663 v->VTotal[k] - v->VActive[k]
2667 (double) v->WritebackDelay[v->VoltageLevel][k]
2668 / (v->HTotal[k] / v->PixelClock[k]),
2670 if (v->MaxVStartupLines[k] > 1023)
2671 v->MaxVStartupLines[k] = 1023;
2673 #ifdef __DML_VBA_DEBUG__
2674 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2675 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2676 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2680 v->MaximumMaxVStartupLines = 0;
2681 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2682 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2685 // We don't really care to iterate between the various prefetch modes
2686 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2688 v->UrgentLatency = CalculateUrgentLatency(
2689 v->UrgentLatencyPixelDataOnly,
2690 v->UrgentLatencyPixelMixedWithVMData,
2691 v->UrgentLatencyVMDataOnly,
2692 v->DoUrgentLatencyAdjustment,
2693 v->UrgentLatencyAdjustmentFabricClockComponent,
2694 v->UrgentLatencyAdjustmentFabricClockReference,
2697 v->FractionOfUrgentBandwidth = 0.0;
2698 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2700 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2703 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2704 bool DestinationLineTimesForPrefetchLessThan2 = false;
2705 bool VRatioPrefetchMoreThan4 = false;
2706 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2707 MaxTotalRDBandwidth = 0;
2709 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2711 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2714 myPipe.DPPCLK = v->DPPCLK[k];
2715 myPipe.DISPCLK = v->DISPCLK;
2716 myPipe.PixelClock = v->PixelClock[k];
2717 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2718 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2719 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2720 myPipe.SourceScan = v->SourceScan[k];
2721 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2722 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2723 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2724 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2725 myPipe.InterlaceEnable = v->Interlace[k];
2726 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2727 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2728 myPipe.HTotal = v->HTotal[k];
2729 myPipe.DCCEnable = v->DCCEnable[k];
2730 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2731 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2732 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2733 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2734 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2735 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2736 v->ErrorResult[k] = CalculatePrefetchSchedule(
2738 HostVMInefficiencyFactor,
2741 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2743 v->DPPCLKDelaySCLLBOnly,
2744 v->DPPCLKDelayCNVCCursor,
2745 v->DISPCLKDelaySubtotal,
2746 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2748 v->MaxInterDCNTileRepeaters,
2749 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2750 v->MaxVStartupLines[k],
2751 v->GPUVMMaxPageTableLevels,
2754 v->HostVMMaxNonCachedPageTableLevels,
2755 v->HostVMMinPageSize,
2756 v->DynamicMetadataEnable[k],
2757 v->DynamicMetadataVMEnabled,
2758 v->DynamicMetadataLinesBeforeActiveRequired[k],
2759 v->DynamicMetadataTransmittedBytes[k],
2761 v->UrgentExtraLatency,
2763 v->PDEAndMetaPTEBytesFrame[k],
2765 v->PixelPTEBytesPerRow[k],
2766 v->PrefetchSourceLinesY[k],
2768 v->VInitPreFillY[k],
2770 v->PrefetchSourceLinesC[k],
2772 v->VInitPreFillC[k],
2774 v->swath_width_luma_ub[k],
2775 v->swath_width_chroma_ub[k],
2779 &v->DSTXAfterScaler[k],
2780 &v->DSTYAfterScaler[k],
2781 &v->DestinationLinesForPrefetch[k],
2782 &v->PrefetchBandwidth[k],
2783 &v->DestinationLinesToRequestVMInVBlank[k],
2784 &v->DestinationLinesToRequestRowInVBlank[k],
2785 &v->VRatioPrefetchY[k],
2786 &v->VRatioPrefetchC[k],
2787 &v->RequiredPrefetchPixDataBWLuma[k],
2788 &v->RequiredPrefetchPixDataBWChroma[k],
2789 &v->NotEnoughTimeForDynamicMetadata[k],
2791 &v->prefetch_vmrow_bw[k],
2795 &v->VUpdateOffsetPix[k],
2796 &v->VUpdateWidthPix[k],
2797 &v->VReadyOffsetPix[k]);
2799 #ifdef __DML_VBA_DEBUG__
2800 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2802 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2805 v->NoEnoughUrgentLatencyHiding = false;
2806 v->NoEnoughUrgentLatencyHidingPre = false;
2808 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2809 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2810 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2811 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2812 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2814 CalculateUrgentBurstFactor(
2815 v->swath_width_luma_ub[k],
2816 v->swath_width_chroma_ub[k],
2819 v->HTotal[k] / v->PixelClock[k],
2821 v->CursorBufferSize,
2822 v->CursorWidth[k][0],
2826 v->BytePerPixelDETY[k],
2827 v->BytePerPixelDETC[k],
2828 v->DETBufferSizeY[k],
2829 v->DETBufferSizeC[k],
2830 &v->UrgBurstFactorCursor[k],
2831 &v->UrgBurstFactorLuma[k],
2832 &v->UrgBurstFactorChroma[k],
2833 &v->NoUrgentLatencyHiding[k]);
2835 CalculateUrgentBurstFactor(
2836 v->swath_width_luma_ub[k],
2837 v->swath_width_chroma_ub[k],
2840 v->HTotal[k] / v->PixelClock[k],
2842 v->CursorBufferSize,
2843 v->CursorWidth[k][0],
2845 v->VRatioPrefetchY[k],
2846 v->VRatioPrefetchC[k],
2847 v->BytePerPixelDETY[k],
2848 v->BytePerPixelDETC[k],
2849 v->DETBufferSizeY[k],
2850 v->DETBufferSizeC[k],
2851 &v->UrgBurstFactorCursorPre[k],
2852 &v->UrgBurstFactorLumaPre[k],
2853 &v->UrgBurstFactorChromaPre[k],
2854 &v->NoUrgentLatencyHidingPre[k]);
2856 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2858 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2859 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2860 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2861 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2862 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2864 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2865 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2866 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2868 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2870 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2871 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2872 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2873 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2874 + v->cursor_bw_pre[k]);
2876 #ifdef __DML_VBA_DEBUG__
2877 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2878 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2879 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2880 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2881 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2883 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2884 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2886 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2887 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2888 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2889 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2890 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2891 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2892 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2893 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2894 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2895 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2898 if (v->DestinationLinesForPrefetch[k] < 2)
2899 DestinationLineTimesForPrefetchLessThan2 = true;
2901 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2902 VRatioPrefetchMoreThan4 = true;
2904 if (v->NoUrgentLatencyHiding[k] == true)
2905 v->NoEnoughUrgentLatencyHiding = true;
2907 if (v->NoUrgentLatencyHidingPre[k] == true)
2908 v->NoEnoughUrgentLatencyHidingPre = true;
2911 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2913 #ifdef __DML_VBA_DEBUG__
2914 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2915 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2916 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2919 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2920 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2921 v->PrefetchModeSupported = true;
2923 v->PrefetchModeSupported = false;
2924 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2925 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2926 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2927 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2931 // This error result check was done after the PrefetchModeSupported. So we will
2932 // still try to calculate flip schedule even prefetch mode not supported
2933 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2934 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2935 v->PrefetchModeSupported = false;
2936 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2940 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2941 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2942 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2943 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2945 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2946 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2947 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2949 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2950 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2951 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2954 v->TotImmediateFlipBytes = 0;
2955 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2956 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2957 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2959 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2960 CalculateFlipSchedule(
2962 HostVMInefficiencyFactor,
2963 v->UrgentExtraLatency,
2965 v->GPUVMMaxPageTableLevels,
2967 v->HostVMMaxNonCachedPageTableLevels,
2969 v->HostVMMinPageSize,
2970 v->PDEAndMetaPTEBytesFrame[k],
2972 v->PixelPTEBytesPerRow[k],
2973 v->BandwidthAvailableForImmediateFlip,
2974 v->TotImmediateFlipBytes,
2975 v->SourcePixelFormat[k],
2976 v->HTotal[k] / v->PixelClock[k],
2981 v->dpte_row_height[k],
2982 v->meta_row_height[k],
2983 v->dpte_row_height_chroma[k],
2984 v->meta_row_height_chroma[k],
2985 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2986 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2987 &v->final_flip_bw[k],
2988 &v->ImmediateFlipSupportedForPipe[k]);
2991 v->total_dcn_read_bw_with_flip = 0.0;
2992 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2993 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2994 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2996 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2997 v->DPPPerPlane[k] * v->final_flip_bw[k]
2998 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2999 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
3000 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
3002 * (v->final_flip_bw[k]
3003 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
3004 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
3005 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
3006 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
3008 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
3009 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
3010 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
3012 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
3013 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
3015 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
3017 v->ImmediateFlipSupported = true;
3018 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
3019 #ifdef __DML_VBA_DEBUG__
3020 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
3022 v->ImmediateFlipSupported = false;
3023 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
3025 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3026 if (v->ImmediateFlipSupportedForPipe[k] == false) {
3027 #ifdef __DML_VBA_DEBUG__
3028 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
3031 v->ImmediateFlipSupported = false;
3035 v->ImmediateFlipSupported = false;
3038 v->PrefetchAndImmediateFlipSupported =
3039 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
3040 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
3041 v->ImmediateFlipSupported)) ? true : false;
3042 #ifdef __DML_VBA_DEBUG__
3043 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
3044 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
3045 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
3046 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
3047 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
3048 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
3050 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
3052 v->VStartupLines = v->VStartupLines + 1;
3053 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
3054 ASSERT(v->PrefetchAndImmediateFlipSupported);
3056 // Unbounded Request Enabled
3057 CalculateUnboundedRequestAndCompressedBufferSize(
3058 v->DETBufferSizeInKByte[0],
3059 v->ConfigReturnBufferSizeInKByte,
3060 v->UseUnboundedRequesting,
3064 v->CompressedBufferSegmentSizeInkByte,
3066 &v->UnboundedRequestEnabled,
3067 &v->CompressedBufferSizeInkByte);
3069 //Watermarks and NB P-State/DRAM Clock Change Support
3071 enum clock_change_support DRAMClockChangeSupport; // dummy
3072 CalculateWatermarksAndDRAMSpeedChangeSupport(
3075 v->NumberOfActivePlanes,
3076 v->MaxLineBufferLines,
3078 v->WritebackInterfaceBufferSize,
3081 v->SynchronizedVBlank,
3082 v->dpte_group_bytes,
3085 v->UrgentExtraLatency,
3086 v->WritebackLatency,
3087 v->WritebackChunkSize,
3089 v->DRAMClockChangeLatency,
3091 v->SREnterPlusExitTime,
3093 v->SREnterPlusExitZ8Time,
3110 v->BlendingAndTiming,
3112 v->BytePerPixelDETY,
3113 v->BytePerPixelDETC,
3117 v->WritebackPixelFormat,
3118 v->WritebackDestinationWidth,
3119 v->WritebackDestinationHeight,
3120 v->WritebackSourceHeight,
3121 v->UnboundedRequestEnabled,
3122 v->CompressedBufferSizeInkByte,
3123 &DRAMClockChangeSupport,
3124 &v->UrgentWatermark,
3125 &v->WritebackUrgentWatermark,
3126 &v->DRAMClockChangeWatermark,
3127 &v->WritebackDRAMClockChangeWatermark,
3128 &v->StutterExitWatermark,
3129 &v->StutterEnterPlusExitWatermark,
3130 &v->Z8StutterExitWatermark,
3131 &v->Z8StutterEnterPlusExitWatermark,
3132 &v->MinActiveDRAMClockChangeLatencySupported);
3134 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3135 if (v->WritebackEnable[k] == true) {
3136 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
3138 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
3140 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3145 //Display Pipeline Delivery Time in Prefetch, Groups
3146 CalculatePixelDeliveryTimes(
3147 v->NumberOfActivePlanes,
3152 v->swath_width_luma_ub,
3153 v->swath_width_chroma_ub,
3158 v->PSCL_THROUGHPUT_LUMA,
3159 v->PSCL_THROUGHPUT_CHROMA,
3166 v->BlockWidth256BytesY,
3167 v->BlockHeight256BytesY,
3168 v->BlockWidth256BytesC,
3169 v->BlockHeight256BytesC,
3170 v->DisplayPipeLineDeliveryTimeLuma,
3171 v->DisplayPipeLineDeliveryTimeChroma,
3172 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3173 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3174 v->DisplayPipeRequestDeliveryTimeLuma,
3175 v->DisplayPipeRequestDeliveryTimeChroma,
3176 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3177 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3178 v->CursorRequestDeliveryTime,
3179 v->CursorRequestDeliveryTimePrefetch);
3181 CalculateMetaAndPTETimes(
3182 v->NumberOfActivePlanes,
3185 v->MinMetaChunkSizeBytes,
3189 v->DestinationLinesToRequestRowInVBlank,
3190 v->DestinationLinesToRequestRowInImmediateFlip,
3197 v->dpte_row_height_chroma,
3199 v->meta_row_width_chroma,
3201 v->meta_row_height_chroma,
3203 v->meta_req_width_chroma,
3205 v->meta_req_height_chroma,
3206 v->dpte_group_bytes,
3209 v->PixelPTEReqWidthY,
3210 v->PixelPTEReqHeightY,
3211 v->PixelPTEReqWidthC,
3212 v->PixelPTEReqHeightC,
3213 v->dpte_row_width_luma_ub,
3214 v->dpte_row_width_chroma_ub,
3215 v->DST_Y_PER_PTE_ROW_NOM_L,
3216 v->DST_Y_PER_PTE_ROW_NOM_C,
3217 v->DST_Y_PER_META_ROW_NOM_L,
3218 v->DST_Y_PER_META_ROW_NOM_C,
3219 v->TimePerMetaChunkNominal,
3220 v->TimePerChromaMetaChunkNominal,
3221 v->TimePerMetaChunkVBlank,
3222 v->TimePerChromaMetaChunkVBlank,
3223 v->TimePerMetaChunkFlip,
3224 v->TimePerChromaMetaChunkFlip,
3225 v->time_per_pte_group_nom_luma,
3226 v->time_per_pte_group_vblank_luma,
3227 v->time_per_pte_group_flip_luma,
3228 v->time_per_pte_group_nom_chroma,
3229 v->time_per_pte_group_vblank_chroma,
3230 v->time_per_pte_group_flip_chroma);
3232 CalculateVMGroupAndRequestTimes(
3233 v->NumberOfActivePlanes,
3235 v->GPUVMMaxPageTableLevels,
3238 v->DestinationLinesToRequestVMInVBlank,
3239 v->DestinationLinesToRequestVMInImmediateFlip,
3242 v->dpte_row_width_luma_ub,
3243 v->dpte_row_width_chroma_ub,
3245 v->dpde0_bytes_per_frame_ub_l,
3246 v->dpde0_bytes_per_frame_ub_c,
3247 v->meta_pte_bytes_per_frame_ub_l,
3248 v->meta_pte_bytes_per_frame_ub_c,
3249 v->TimePerVMGroupVBlank,
3250 v->TimePerVMGroupFlip,
3251 v->TimePerVMRequestVBlank,
3252 v->TimePerVMRequestFlip);
3255 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3256 if (PrefetchMode == 0) {
3257 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3258 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3259 v->MinTTUVBlank[k] = dml_max(
3260 v->DRAMClockChangeWatermark,
3261 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3262 } else if (PrefetchMode == 1) {
3263 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3264 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3265 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3267 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3268 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3269 v->MinTTUVBlank[k] = v->UrgentWatermark;
3271 if (!v->DynamicMetadataEnable[k])
3272 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3275 // DCC Configuration
3277 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3278 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3279 v->SourcePixelFormat[k],
3280 v->SurfaceWidthY[k],
3281 v->SurfaceWidthC[k],
3282 v->SurfaceHeightY[k],
3283 v->SurfaceHeightC[k],
3284 v->DETBufferSizeInKByte[0] * 1024,
3285 v->BlockHeight256BytesY[k],
3286 v->BlockHeight256BytesC[k],
3287 v->SurfaceTiling[k],
3288 v->BytePerPixelY[k],
3289 v->BytePerPixelC[k],
3290 v->BytePerPixelDETY[k],
3291 v->BytePerPixelDETC[k],
3293 &v->DCCYMaxUncompressedBlock[k],
3294 &v->DCCCMaxUncompressedBlock[k],
3295 &v->DCCYMaxCompressedBlock[k],
3296 &v->DCCCMaxCompressedBlock[k],
3297 &v->DCCYIndependentBlock[k],
3298 &v->DCCCIndependentBlock[k]);
3301 // VStartup Adjustment
3302 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3303 bool isInterlaceTiming;
3304 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3305 #ifdef __DML_VBA_DEBUG__
3306 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3309 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3311 #ifdef __DML_VBA_DEBUG__
3312 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3313 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3314 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3315 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3318 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3319 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3320 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3323 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3325 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3326 - v->VFrontPorch[k])
3327 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3328 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3330 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3332 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3333 <= (isInterlaceTiming ?
3334 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3335 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3336 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3338 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3340 #ifdef __DML_VBA_DEBUG__
3341 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3342 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3343 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3344 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3345 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3346 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3347 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3348 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3349 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3350 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3351 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3356 //Maximum Bandwidth Used
3357 double TotalWRBandwidth = 0;
3358 double MaxPerPlaneVActiveWRBandwidth = 0;
3359 double WRBandwidth = 0;
3360 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3361 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3362 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3363 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3364 } else if (v->WritebackEnable[k] == true) {
3365 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3366 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3368 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3369 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3372 v->TotalDataReadBandwidth = 0;
3373 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3374 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3377 // Stutter Efficiency
3378 CalculateStutterEfficiency(
3380 v->CompressedBufferSizeInkByte,
3381 v->UnboundedRequestEnabled,
3382 v->ConfigReturnBufferSizeInKByte,
3383 v->MetaFIFOSizeInKEntries,
3384 v->ZeroSizeBufferEntries,
3385 v->NumberOfActivePlanes,
3386 v->ROBBufferSizeInKByte,
3387 v->TotalDataReadBandwidth,
3390 v->COMPBUF_RESERVED_SPACE_64B,
3391 v->COMPBUF_RESERVED_SPACE_ZS,
3394 v->SynchronizedVBlank,
3395 v->StutterEnterPlusExitWatermark,
3396 v->Z8StutterEnterPlusExitWatermark,
3397 v->ProgressiveToInterlaceUnitInOPP,
3403 v->BytePerPixelDETY,
3409 v->DCCFractionOfZeroSizeRequestsLuma,
3410 v->DCCFractionOfZeroSizeRequestsChroma,
3416 v->BlockHeight256BytesY,
3417 v->BlockWidth256BytesY,
3418 v->BlockHeight256BytesC,
3419 v->BlockWidth256BytesC,
3420 v->DCCYMaxUncompressedBlock,
3421 v->DCCCMaxUncompressedBlock,
3425 v->ReadBandwidthPlaneLuma,
3426 v->ReadBandwidthPlaneChroma,
3429 &v->StutterEfficiencyNotIncludingVBlank,
3430 &v->StutterEfficiency,
3431 &v->NumberOfStutterBurstsPerFrame,
3432 &v->Z8StutterEfficiencyNotIncludingVBlank,
3433 &v->Z8StutterEfficiency,
3434 &v->Z8NumberOfStutterBurstsPerFrame,
3438 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3440 struct vba_vars_st *v = &mode_lib->vba;
3441 // Display Pipe Configuration
3442 double BytePerPixDETY[DC__NUM_DPP__MAX];
3443 double BytePerPixDETC[DC__NUM_DPP__MAX];
3444 int BytePerPixY[DC__NUM_DPP__MAX];
3445 int BytePerPixC[DC__NUM_DPP__MAX];
3446 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3447 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3448 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3449 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3450 double dummy1[DC__NUM_DPP__MAX];
3451 double dummy2[DC__NUM_DPP__MAX];
3452 double dummy3[DC__NUM_DPP__MAX];
3453 double dummy4[DC__NUM_DPP__MAX];
3454 int dummy5[DC__NUM_DPP__MAX];
3455 int dummy6[DC__NUM_DPP__MAX];
3456 bool dummy7[DC__NUM_DPP__MAX];
3457 bool dummysinglestring;
3461 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3463 CalculateBytePerPixelAnd256BBlockSizes(
3464 v->SourcePixelFormat[k],
3465 v->SurfaceTiling[k],
3470 &Read256BytesBlockHeightY[k],
3471 &Read256BytesBlockHeightC[k],
3472 &Read256BytesBlockWidthY[k],
3473 &Read256BytesBlockWidthC[k]);
3476 CalculateSwathAndDETConfiguration(
3478 v->NumberOfActivePlanes,
3479 v->DETBufferSizeInKByte[0],
3483 v->SourcePixelFormat,
3491 Read256BytesBlockHeightY,
3492 Read256BytesBlockHeightC,
3493 Read256BytesBlockWidthY,
3494 Read256BytesBlockWidthC,
3495 v->ODMCombineEnabled,
3496 v->BlendingAndTiming,
3514 &dummysinglestring);
3517 static bool CalculateBytePerPixelAnd256BBlockSizes(
3518 enum source_format_class SourcePixelFormat,
3519 enum dm_swizzle_mode SurfaceTiling,
3520 unsigned int *BytePerPixelY,
3521 unsigned int *BytePerPixelC,
3522 double *BytePerPixelDETY,
3523 double *BytePerPixelDETC,
3524 unsigned int *BlockHeight256BytesY,
3525 unsigned int *BlockHeight256BytesC,
3526 unsigned int *BlockWidth256BytesY,
3527 unsigned int *BlockWidth256BytesC)
3529 if (SourcePixelFormat == dm_444_64) {
3530 *BytePerPixelDETY = 8;
3531 *BytePerPixelDETC = 0;
3534 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3535 *BytePerPixelDETY = 4;
3536 *BytePerPixelDETC = 0;
3539 } else if (SourcePixelFormat == dm_444_16) {
3540 *BytePerPixelDETY = 2;
3541 *BytePerPixelDETC = 0;
3544 } else if (SourcePixelFormat == dm_444_8) {
3545 *BytePerPixelDETY = 1;
3546 *BytePerPixelDETC = 0;
3549 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3550 *BytePerPixelDETY = 4;
3551 *BytePerPixelDETC = 1;
3554 } else if (SourcePixelFormat == dm_420_8) {
3555 *BytePerPixelDETY = 1;
3556 *BytePerPixelDETC = 2;
3559 } else if (SourcePixelFormat == dm_420_12) {
3560 *BytePerPixelDETY = 2;
3561 *BytePerPixelDETC = 4;
3565 *BytePerPixelDETY = 4.0 / 3;
3566 *BytePerPixelDETC = 8.0 / 3;
3571 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3572 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3573 if (SurfaceTiling == dm_sw_linear) {
3574 *BlockHeight256BytesY = 1;
3575 } else if (SourcePixelFormat == dm_444_64) {
3576 *BlockHeight256BytesY = 4;
3577 } else if (SourcePixelFormat == dm_444_8) {
3578 *BlockHeight256BytesY = 16;
3580 *BlockHeight256BytesY = 8;
3582 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3583 *BlockHeight256BytesC = 0;
3584 *BlockWidth256BytesC = 0;
3586 if (SurfaceTiling == dm_sw_linear) {
3587 *BlockHeight256BytesY = 1;
3588 *BlockHeight256BytesC = 1;
3589 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3590 *BlockHeight256BytesY = 8;
3591 *BlockHeight256BytesC = 16;
3592 } else if (SourcePixelFormat == dm_420_8) {
3593 *BlockHeight256BytesY = 16;
3594 *BlockHeight256BytesC = 8;
3596 *BlockHeight256BytesY = 8;
3597 *BlockHeight256BytesC = 8;
3599 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3600 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3605 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3607 if (PrefetchMode == 0) {
3608 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3609 } else if (PrefetchMode == 1) {
3610 return dml_max(SREnterPlusExitTime, UrgentLatency);
3612 return UrgentLatency;
3616 double dml31_CalculateWriteBackDISPCLK(
3617 enum source_format_class WritebackPixelFormat,
3619 double WritebackHRatio,
3620 double WritebackVRatio,
3621 unsigned int WritebackHTaps,
3622 unsigned int WritebackVTaps,
3623 long WritebackSourceWidth,
3624 long WritebackDestinationWidth,
3625 unsigned int HTotal,
3626 unsigned int WritebackLineBufferSize)
3628 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3630 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3631 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3632 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3633 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3636 static double CalculateWriteBackDelay(
3637 enum source_format_class WritebackPixelFormat,
3638 double WritebackHRatio,
3639 double WritebackVRatio,
3640 unsigned int WritebackVTaps,
3641 int WritebackDestinationWidth,
3642 int WritebackDestinationHeight,
3643 int WritebackSourceHeight,
3644 unsigned int HTotal)
3646 double CalculateWriteBackDelay;
3648 double Output_lines_last_notclamped;
3649 double WritebackVInit;
3651 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3652 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3653 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3654 if (Output_lines_last_notclamped < 0) {
3655 CalculateWriteBackDelay = 0;
3657 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3659 return CalculateWriteBackDelay;
3662 static void CalculateVupdateAndDynamicMetadataParameters(
3663 int MaxInterDCNTileRepeaters,
3666 double DCFClkDeepSleep,
3670 int DynamicMetadataTransmittedBytes,
3671 int DynamicMetadataLinesBeforeActiveRequired,
3672 int InterlaceEnable,
3673 bool ProgressiveToInterlaceUnitInOPP,
3678 int *VUpdateOffsetPix,
3679 double *VUpdateWidthPix,
3680 double *VReadyOffsetPix)
3682 double TotalRepeaterDelayTime;
3684 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3685 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3686 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3687 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3688 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3689 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3690 *Tdmec = HTotal / PixelClock;
3691 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3692 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3694 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3696 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3697 *Tdmsks = *Tdmsks / 2;
3699 #ifdef __DML_VBA_DEBUG__
3700 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3701 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3702 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3706 static void CalculateRowBandwidth(
3708 enum source_format_class SourcePixelFormat,
3710 double VRatioChroma,
3713 unsigned int MetaRowByteLuma,
3714 unsigned int MetaRowByteChroma,
3715 unsigned int meta_row_height_luma,
3716 unsigned int meta_row_height_chroma,
3717 unsigned int PixelPTEBytesPerRowLuma,
3718 unsigned int PixelPTEBytesPerRowChroma,
3719 unsigned int dpte_row_height_luma,
3720 unsigned int dpte_row_height_chroma,
3721 double *meta_row_bw,
3722 double *dpte_row_bw)
3724 if (DCCEnable != true) {
3726 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3727 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3729 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3732 if (GPUVMEnable != true) {
3734 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3735 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3736 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3738 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3742 static void CalculateFlipSchedule(
3743 struct display_mode_lib *mode_lib,
3744 double HostVMInefficiencyFactor,
3745 double UrgentExtraLatency,
3746 double UrgentLatency,
3747 unsigned int GPUVMMaxPageTableLevels,
3749 unsigned int HostVMMaxNonCachedPageTableLevels,
3751 double HostVMMinPageSize,
3752 double PDEAndMetaPTEBytesPerFrame,
3753 double MetaRowBytes,
3754 double DPTEBytesPerRow,
3755 double BandwidthAvailableForImmediateFlip,
3756 unsigned int TotImmediateFlipBytes,
3757 enum source_format_class SourcePixelFormat,
3760 double VRatioChroma,
3763 unsigned int dpte_row_height,
3764 unsigned int meta_row_height,
3765 unsigned int dpte_row_height_chroma,
3766 unsigned int meta_row_height_chroma,
3767 double *DestinationLinesToRequestVMInImmediateFlip,
3768 double *DestinationLinesToRequestRowInImmediateFlip,
3769 double *final_flip_bw,
3770 bool *ImmediateFlipSupportedForPipe)
3772 double min_row_time = 0.0;
3773 unsigned int HostVMDynamicLevelsTrips;
3774 double TimeForFetchingMetaPTEImmediateFlip;
3775 double TimeForFetchingRowInVBlankImmediateFlip;
3776 double ImmediateFlipBW;
3778 if (GPUVMEnable == true && HostVMEnable == true) {
3779 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3781 HostVMDynamicLevelsTrips = 0;
3784 if (GPUVMEnable == true || DCCEnable == true) {
3785 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3788 if (GPUVMEnable == true) {
3789 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3790 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3791 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3794 TimeForFetchingMetaPTEImmediateFlip = 0;
3797 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3798 if ((GPUVMEnable == true || DCCEnable == true)) {
3799 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3800 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3801 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3804 TimeForFetchingRowInVBlankImmediateFlip = 0;
3807 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3809 if (GPUVMEnable == true) {
3810 *final_flip_bw = dml_max(
3811 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3812 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3813 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3814 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3819 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3820 if (GPUVMEnable == true && DCCEnable != true) {
3821 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3822 } else if (GPUVMEnable != true && DCCEnable == true) {
3823 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3825 min_row_time = dml_min4(
3826 dpte_row_height * LineTime / VRatio,
3827 meta_row_height * LineTime / VRatio,
3828 dpte_row_height_chroma * LineTime / VRatioChroma,
3829 meta_row_height_chroma * LineTime / VRatioChroma);
3832 if (GPUVMEnable == true && DCCEnable != true) {
3833 min_row_time = dpte_row_height * LineTime / VRatio;
3834 } else if (GPUVMEnable != true && DCCEnable == true) {
3835 min_row_time = meta_row_height * LineTime / VRatio;
3837 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3841 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3842 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3843 *ImmediateFlipSupportedForPipe = false;
3845 *ImmediateFlipSupportedForPipe = true;
3848 #ifdef __DML_VBA_DEBUG__
3849 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
3850 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
3851 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3852 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3853 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3854 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
3859 static double TruncToValidBPP(
3867 enum output_encoder_class Output,
3868 enum output_format_class Format,
3869 unsigned int DSCInputBitPerComponent,
3873 enum odm_combine_mode ODMCombine)
3882 if (Format == dm_420) {
3887 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3888 } else if (Format == dm_444) {
3893 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3895 if (Output == dm_hdmi) {
3904 if (Format == dm_n422) {
3906 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3909 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3913 if (DSCEnable && Output == dm_dp) {
3914 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3916 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3919 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3921 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3925 if (DesiredBPP == 0) {
3927 if (MaxLinkBPP < MinDSCBPP) {
3929 } else if (MaxLinkBPP >= MaxDSCBPP) {
3932 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3935 if (MaxLinkBPP >= NonDSCBPP2) {
3937 } else if (MaxLinkBPP >= NonDSCBPP1) {
3939 } else if (MaxLinkBPP >= NonDSCBPP0) {
3946 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3947 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3956 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3958 struct vba_vars_st *v = &mode_lib->vba;
3962 int ReorderingBytes;
3963 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3964 bool NoChroma = true;
3965 bool EnoughWritebackUnits = true;
3966 bool P2IWith420 = false;
3967 bool DSCOnlyIfNecessaryWithBPP = false;
3968 bool DSC422NativeNotSupported = false;
3969 double MaxTotalVActiveRDBandwidth;
3970 bool ViewportExceedsSurface = false;
3971 bool FMTBufferExceeded = false;
3973 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3975 CalculateMinAndMaxPrefetchMode(
3976 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3977 &MinPrefetchMode, &MaxPrefetchMode);
3979 /*Scale Ratio, taps Support Check*/
3981 v->ScaleRatioAndTapsSupport = true;
3982 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3983 if (v->ScalerEnabled[k] == false
3984 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3985 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3986 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3987 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3988 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3989 v->ScaleRatioAndTapsSupport = false;
3990 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3991 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3992 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3993 || v->VRatio[k] > v->vtaps[k]
3994 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3995 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3996 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3997 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3998 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3999 || v->HRatioChroma[k] > v->MaxHSCLRatio
4000 || v->VRatioChroma[k] > v->MaxVSCLRatio
4001 || v->HRatioChroma[k] > v->HTAPsChroma[k]
4002 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
4003 v->ScaleRatioAndTapsSupport = false;
4006 /*Source Format, Pixel Format and Scan Support Check*/
4008 v->SourceFormatPixelAndScanSupport = true;
4009 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4010 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
4011 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
4012 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
4013 v->SourceFormatPixelAndScanSupport = false;
4016 /*Bandwidth Support Check*/
4018 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4019 CalculateBytePerPixelAnd256BBlockSizes(
4020 v->SourcePixelFormat[k],
4021 v->SurfaceTiling[k],
4022 &v->BytePerPixelY[k],
4023 &v->BytePerPixelC[k],
4024 &v->BytePerPixelInDETY[k],
4025 &v->BytePerPixelInDETC[k],
4026 &v->Read256BlockHeightY[k],
4027 &v->Read256BlockHeightC[k],
4028 &v->Read256BlockWidthY[k],
4029 &v->Read256BlockWidthC[k]);
4031 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4032 if (v->SourceScan[k] != dm_vert) {
4033 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
4034 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
4036 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
4037 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
4040 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4041 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
4042 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4043 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
4044 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
4046 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4047 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
4048 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4049 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
4050 } else if (v->WritebackEnable[k] == true) {
4051 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4052 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
4054 v->WriteBandwidth[k] = 0.0;
4058 /*Writeback Latency support check*/
4060 v->WritebackLatencySupport = true;
4061 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4062 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
4063 v->WritebackLatencySupport = false;
4067 /*Writeback Mode Support Check*/
4069 v->TotalNumberOfActiveWriteback = 0;
4070 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4071 if (v->WritebackEnable[k] == true) {
4072 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4076 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4077 EnoughWritebackUnits = false;
4080 /*Writeback Scale Ratio and Taps Support Check*/
4082 v->WritebackScaleRatioAndTapsSupport = true;
4083 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4084 if (v->WritebackEnable[k] == true) {
4085 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4086 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4087 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4088 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4089 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4090 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4091 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4092 v->WritebackScaleRatioAndTapsSupport = false;
4094 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4095 v->WritebackScaleRatioAndTapsSupport = false;
4099 /*Maximum DISPCLK/DPPCLK Support check*/
4101 v->WritebackRequiredDISPCLK = 0.0;
4102 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4103 if (v->WritebackEnable[k] == true) {
4104 v->WritebackRequiredDISPCLK = dml_max(
4105 v->WritebackRequiredDISPCLK,
4106 dml31_CalculateWriteBackDISPCLK(
4107 v->WritebackPixelFormat[k],
4109 v->WritebackHRatio[k],
4110 v->WritebackVRatio[k],
4111 v->WritebackHTaps[k],
4112 v->WritebackVTaps[k],
4113 v->WritebackSourceWidth[k],
4114 v->WritebackDestinationWidth[k],
4116 v->WritebackLineBufferSize));
4119 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4120 if (v->HRatio[k] > 1.0) {
4121 v->PSCL_FACTOR[k] = dml_min(
4122 v->MaxDCHUBToPSCLThroughput,
4123 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4125 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4127 if (v->BytePerPixelC[k] == 0.0) {
4128 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4129 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4131 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4132 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4134 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4135 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4138 if (v->HRatioChroma[k] > 1.0) {
4139 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4140 v->MaxDCHUBToPSCLThroughput,
4141 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4143 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4145 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4147 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4148 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4149 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4150 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4152 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4153 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4154 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4158 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4159 int MaximumSwathWidthSupportLuma;
4160 int MaximumSwathWidthSupportChroma;
4162 if (v->SurfaceTiling[k] == dm_sw_linear) {
4163 MaximumSwathWidthSupportLuma = 8192.0;
4164 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4165 MaximumSwathWidthSupportLuma = 2880.0;
4166 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4167 MaximumSwathWidthSupportLuma = 3840.0;
4169 MaximumSwathWidthSupportLuma = 5760.0;
4172 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4173 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4175 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4177 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4178 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4179 if (v->BytePerPixelC[k] == 0.0) {
4180 v->MaximumSwathWidthInLineBufferChroma = 0;
4182 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4183 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4185 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4186 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4189 CalculateSwathAndDETConfiguration(
4191 v->NumberOfActivePlanes,
4192 v->DETBufferSizeInKByte[0],
4193 v->MaximumSwathWidthLuma,
4194 v->MaximumSwathWidthChroma,
4196 v->SourcePixelFormat,
4204 v->Read256BlockHeightY,
4205 v->Read256BlockHeightC,
4206 v->Read256BlockWidthY,
4207 v->Read256BlockWidthC,
4208 v->odm_combine_dummy,
4209 v->BlendingAndTiming,
4212 v->BytePerPixelInDETY,
4213 v->BytePerPixelInDETC,
4217 v->NoOfDPPThisState,
4218 v->swath_width_luma_ub_this_state,
4219 v->swath_width_chroma_ub_this_state,
4220 v->SwathWidthYThisState,
4221 v->SwathWidthCThisState,
4222 v->SwathHeightYThisState,
4223 v->SwathHeightCThisState,
4224 v->DETBufferSizeYThisState,
4225 v->DETBufferSizeCThisState,
4226 v->SingleDPPViewportSizeSupportPerPlane,
4227 &v->ViewportSizeSupport[0][0]);
4229 for (i = 0; i < v->soc.num_states; i++) {
4230 for (j = 0; j < 2; j++) {
4231 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4232 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4233 v->RequiredDISPCLK[i][j] = 0.0;
4234 v->DISPCLK_DPPCLK_Support[i][j] = true;
4235 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4236 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4237 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4238 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4239 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4240 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4241 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4242 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4244 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4245 * (1 + v->DISPCLKRampingMargin / 100.0);
4246 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4247 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4248 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4249 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4250 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4252 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4253 * (1 + v->DISPCLKRampingMargin / 100.0);
4254 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4255 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4256 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4257 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4258 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4261 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4262 || !(v->Output[k] == dm_dp ||
4263 v->Output[k] == dm_edp)) {
4264 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4265 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4267 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4268 FMTBufferExceeded = true;
4269 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4270 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4271 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4272 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4273 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4274 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4275 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4276 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4277 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4278 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4280 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4281 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4283 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4284 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4285 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4286 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4287 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4289 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4290 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4293 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4294 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4295 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4296 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4297 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4299 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4300 FMTBufferExceeded = true;
4302 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4303 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4306 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4307 v->MPCCombine[i][j][k] = false;
4308 v->NoOfDPP[i][j][k] = 4;
4309 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4310 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4311 v->MPCCombine[i][j][k] = false;
4312 v->NoOfDPP[i][j][k] = 2;
4313 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4314 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4315 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4316 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4317 v->MPCCombine[i][j][k] = false;
4318 v->NoOfDPP[i][j][k] = 1;
4319 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4321 v->MPCCombine[i][j][k] = true;
4322 v->NoOfDPP[i][j][k] = 2;
4323 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4325 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4326 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4327 > v->MaxDppclkRoundedDownToDFSGranularity)
4328 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4329 v->DISPCLK_DPPCLK_Support[i][j] = false;
4332 v->TotalNumberOfActiveDPP[i][j] = 0;
4333 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4334 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4335 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4336 if (v->NoOfDPP[i][j][k] == 1)
4337 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4338 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4339 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4344 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4345 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4346 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4347 double BWOfNonSplitPlaneOfMaximumBandwidth;
4348 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4349 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4350 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4351 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4352 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4353 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4354 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4355 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4358 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4359 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4360 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4361 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4362 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4363 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4364 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4367 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4368 v->RequiredDISPCLK[i][j] = 0.0;
4369 v->DISPCLK_DPPCLK_Support[i][j] = true;
4370 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4371 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4372 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4373 v->MPCCombine[i][j][k] = true;
4374 v->NoOfDPP[i][j][k] = 2;
4375 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4376 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4378 v->MPCCombine[i][j][k] = false;
4379 v->NoOfDPP[i][j][k] = 1;
4380 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4381 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4383 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4384 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4385 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4386 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4388 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4390 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4391 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4392 > v->MaxDppclkRoundedDownToDFSGranularity)
4393 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4394 v->DISPCLK_DPPCLK_Support[i][j] = false;
4397 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4398 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4399 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4402 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4403 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4404 v->DISPCLK_DPPCLK_Support[i][j] = false;
4409 /*Total Available Pipes Support Check*/
4411 for (i = 0; i < v->soc.num_states; i++) {
4412 for (j = 0; j < 2; j++) {
4413 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4414 v->TotalAvailablePipesSupport[i][j] = true;
4416 v->TotalAvailablePipesSupport[i][j] = false;
4420 /*Display IO and DSC Support Check*/
4422 v->NonsupportedDSCInputBPC = false;
4423 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4424 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4425 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4426 v->NonsupportedDSCInputBPC = true;
4430 /*Number Of DSC Slices*/
4431 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4432 if (v->BlendingAndTiming[k] == k) {
4433 if (v->PixelClockBackEnd[k] > 3200) {
4434 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4435 } else if (v->PixelClockBackEnd[k] > 1360) {
4436 v->NumberOfDSCSlices[k] = 8;
4437 } else if (v->PixelClockBackEnd[k] > 680) {
4438 v->NumberOfDSCSlices[k] = 4;
4439 } else if (v->PixelClockBackEnd[k] > 340) {
4440 v->NumberOfDSCSlices[k] = 2;
4442 v->NumberOfDSCSlices[k] = 1;
4445 v->NumberOfDSCSlices[k] = 0;
4449 for (i = 0; i < v->soc.num_states; i++) {
4450 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4451 v->RequiresDSC[i][k] = false;
4452 v->RequiresFEC[i][k] = false;
4453 if (v->BlendingAndTiming[k] == k) {
4454 if (v->Output[k] == dm_hdmi) {
4455 v->RequiresDSC[i][k] = false;
4456 v->RequiresFEC[i][k] = false;
4457 v->OutputBppPerState[i][k] = TruncToValidBPP(
4458 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4462 v->PixelClockBackEnd[k],
4463 v->ForcedOutputLinkBPP[k],
4467 v->DSCInputBitPerComponent[k],
4468 v->NumberOfDSCSlices[k],
4469 v->AudioSampleRate[k],
4470 v->AudioSampleLayout[k],
4471 v->ODMCombineEnablePerState[i][k]);
4472 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4473 if (v->DSCEnable[k] == true) {
4474 v->RequiresDSC[i][k] = true;
4475 v->LinkDSCEnable = true;
4476 if (v->Output[k] == dm_dp) {
4477 v->RequiresFEC[i][k] = true;
4479 v->RequiresFEC[i][k] = false;
4482 v->RequiresDSC[i][k] = false;
4483 v->LinkDSCEnable = false;
4484 v->RequiresFEC[i][k] = false;
4487 v->Outbpp = BPP_INVALID;
4488 if (v->PHYCLKPerState[i] >= 270.0) {
4489 v->Outbpp = TruncToValidBPP(
4490 (1.0 - v->Downspreading / 100.0) * 2700,
4491 v->OutputLinkDPLanes[k],
4494 v->PixelClockBackEnd[k],
4495 v->ForcedOutputLinkBPP[k],
4499 v->DSCInputBitPerComponent[k],
4500 v->NumberOfDSCSlices[k],
4501 v->AudioSampleRate[k],
4502 v->AudioSampleLayout[k],
4503 v->ODMCombineEnablePerState[i][k]);
4504 v->OutputBppPerState[i][k] = v->Outbpp;
4505 // TODO: Need some other way to handle this nonsense
4506 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4508 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4509 v->Outbpp = TruncToValidBPP(
4510 (1.0 - v->Downspreading / 100.0) * 5400,
4511 v->OutputLinkDPLanes[k],
4514 v->PixelClockBackEnd[k],
4515 v->ForcedOutputLinkBPP[k],
4519 v->DSCInputBitPerComponent[k],
4520 v->NumberOfDSCSlices[k],
4521 v->AudioSampleRate[k],
4522 v->AudioSampleLayout[k],
4523 v->ODMCombineEnablePerState[i][k]);
4524 v->OutputBppPerState[i][k] = v->Outbpp;
4525 // TODO: Need some other way to handle this nonsense
4526 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4528 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4529 v->Outbpp = TruncToValidBPP(
4530 (1.0 - v->Downspreading / 100.0) * 8100,
4531 v->OutputLinkDPLanes[k],
4534 v->PixelClockBackEnd[k],
4535 v->ForcedOutputLinkBPP[k],
4539 v->DSCInputBitPerComponent[k],
4540 v->NumberOfDSCSlices[k],
4541 v->AudioSampleRate[k],
4542 v->AudioSampleLayout[k],
4543 v->ODMCombineEnablePerState[i][k]);
4544 v->OutputBppPerState[i][k] = v->Outbpp;
4545 // TODO: Need some other way to handle this nonsense
4546 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4548 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
4549 v->Outbpp = TruncToValidBPP(
4550 (1.0 - v->Downspreading / 100.0) * 10000,
4554 v->PixelClockBackEnd[k],
4555 v->ForcedOutputLinkBPP[k],
4559 v->DSCInputBitPerComponent[k],
4560 v->NumberOfDSCSlices[k],
4561 v->AudioSampleRate[k],
4562 v->AudioSampleLayout[k],
4563 v->ODMCombineEnablePerState[i][k]);
4564 v->OutputBppPerState[i][k] = v->Outbpp;
4565 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
4567 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
4568 v->Outbpp = TruncToValidBPP(
4573 v->PixelClockBackEnd[k],
4574 v->ForcedOutputLinkBPP[k],
4578 v->DSCInputBitPerComponent[k],
4579 v->NumberOfDSCSlices[k],
4580 v->AudioSampleRate[k],
4581 v->AudioSampleLayout[k],
4582 v->ODMCombineEnablePerState[i][k]);
4583 v->OutputBppPerState[i][k] = v->Outbpp;
4584 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
4588 v->OutputBppPerState[i][k] = 0;
4593 for (i = 0; i < v->soc.num_states; i++) {
4594 v->LinkCapacitySupport[i] = true;
4595 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4596 if (v->BlendingAndTiming[k] == k
4597 && (v->Output[k] == dm_dp ||
4598 v->Output[k] == dm_edp ||
4599 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4600 v->LinkCapacitySupport[i] = false;
4606 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4607 if (v->BlendingAndTiming[k] == k
4608 && (v->Output[k] == dm_dp ||
4609 v->Output[k] == dm_edp ||
4610 v->Output[k] == dm_hdmi)) {
4611 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4614 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4615 && !v->DSC422NativeSupport) {
4616 DSC422NativeNotSupported = true;
4621 for (i = 0; i < v->soc.num_states; ++i) {
4622 v->ODMCombine4To1SupportCheckOK[i] = true;
4623 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4624 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4625 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4626 || v->Output[k] == dm_hdmi)) {
4627 v->ODMCombine4To1SupportCheckOK[i] = false;
4632 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4634 for (i = 0; i < v->soc.num_states; i++) {
4635 v->NotEnoughDSCUnits[i] = false;
4636 v->TotalDSCUnitsRequired = 0.0;
4637 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4638 if (v->RequiresDSC[i][k] == true) {
4639 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4640 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4641 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4642 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4644 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4648 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4649 v->NotEnoughDSCUnits[i] = true;
4652 /*DSC Delay per state*/
4654 for (i = 0; i < v->soc.num_states; i++) {
4655 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4656 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4659 v->BPP = v->OutputBppPerState[i][k];
4661 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4662 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4663 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4664 v->DSCInputBitPerComponent[k],
4666 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4667 v->NumberOfDSCSlices[k],
4669 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4670 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4671 v->DSCDelayPerState[i][k] = 2.0
4672 * (dscceComputeDelay(
4673 v->DSCInputBitPerComponent[k],
4675 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4676 v->NumberOfDSCSlices[k] / 2,
4678 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4680 v->DSCDelayPerState[i][k] = 4.0
4681 * (dscceComputeDelay(
4682 v->DSCInputBitPerComponent[k],
4684 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4685 v->NumberOfDSCSlices[k] / 4,
4687 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4689 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4691 v->DSCDelayPerState[i][k] = 0.0;
4694 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4695 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4696 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4697 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4703 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4705 for (i = 0; i < v->soc.num_states; ++i) {
4706 for (j = 0; j <= 1; ++j) {
4707 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4708 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4709 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4710 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4713 CalculateSwathAndDETConfiguration(
4715 v->NumberOfActivePlanes,
4716 v->DETBufferSizeInKByte[0],
4717 v->MaximumSwathWidthLuma,
4718 v->MaximumSwathWidthChroma,
4720 v->SourcePixelFormat,
4728 v->Read256BlockHeightY,
4729 v->Read256BlockHeightC,
4730 v->Read256BlockWidthY,
4731 v->Read256BlockWidthC,
4732 v->ODMCombineEnableThisState,
4733 v->BlendingAndTiming,
4736 v->BytePerPixelInDETY,
4737 v->BytePerPixelInDETC,
4741 v->NoOfDPPThisState,
4742 v->swath_width_luma_ub_this_state,
4743 v->swath_width_chroma_ub_this_state,
4744 v->SwathWidthYThisState,
4745 v->SwathWidthCThisState,
4746 v->SwathHeightYThisState,
4747 v->SwathHeightCThisState,
4748 v->DETBufferSizeYThisState,
4749 v->DETBufferSizeCThisState,
4751 &v->ViewportSizeSupport[i][j]);
4753 CalculateDCFCLKDeepSleep(
4755 v->NumberOfActivePlanes,
4760 v->SwathWidthYThisState,
4761 v->SwathWidthCThisState,
4762 v->NoOfDPPThisState,
4767 v->PSCL_FACTOR_CHROMA,
4768 v->RequiredDPPCLKThisState,
4769 v->ReadBandwidthLuma,
4770 v->ReadBandwidthChroma,
4772 &v->ProjectedDCFCLKDeepSleep[i][j]);
4774 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4775 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4776 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4777 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4778 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4779 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4780 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4781 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4782 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4787 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4788 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4789 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4792 for (i = 0; i < v->soc.num_states; i++) {
4793 for (j = 0; j < 2; j++) {
4794 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4796 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4797 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4798 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4799 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4800 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4801 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4802 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4803 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4804 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4807 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4808 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4809 if (v->DCCEnable[k] == true) {
4810 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4814 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4815 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4816 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4818 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4819 && v->SourceScan[k] != dm_vert) {
4820 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4822 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4824 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4825 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4828 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4831 v->Read256BlockHeightC[k],
4832 v->Read256BlockWidthC[k],
4833 v->SourcePixelFormat[k],
4834 v->SurfaceTiling[k],
4835 v->BytePerPixelC[k],
4837 v->SwathWidthCThisState[k],
4838 v->ViewportHeightChroma[k],
4841 v->HostVMMaxNonCachedPageTableLevels,
4842 v->GPUVMMinPageSize,
4843 v->HostVMMinPageSize,
4844 v->PTEBufferSizeInRequestsForChroma,
4847 &v->MacroTileWidthC[k],
4849 &v->DPTEBytesPerRowC,
4850 &v->PTEBufferSizeNotExceededC[i][j][k],
4852 &v->dpte_row_height_chroma[k],
4856 &v->meta_row_height_chroma[k],
4863 &v->dummyinteger11);
4865 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4870 v->ProgressiveToInterlaceUnitInOPP,
4871 v->SwathHeightCThisState[k],
4872 v->ViewportYStartC[k],
4876 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4877 v->PTEBufferSizeInRequestsForChroma = 0;
4878 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4879 v->MetaRowBytesC = 0.0;
4880 v->DPTEBytesPerRowC = 0.0;
4881 v->PrefetchLinesC[i][j][k] = 0.0;
4882 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4884 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4887 v->Read256BlockHeightY[k],
4888 v->Read256BlockWidthY[k],
4889 v->SourcePixelFormat[k],
4890 v->SurfaceTiling[k],
4891 v->BytePerPixelY[k],
4893 v->SwathWidthYThisState[k],
4894 v->ViewportHeight[k],
4897 v->HostVMMaxNonCachedPageTableLevels,
4898 v->GPUVMMinPageSize,
4899 v->HostVMMinPageSize,
4900 v->PTEBufferSizeInRequestsForLuma,
4902 v->DCCMetaPitchY[k],
4903 &v->MacroTileWidthY[k],
4905 &v->DPTEBytesPerRowY,
4906 &v->PTEBufferSizeNotExceededY[i][j][k],
4908 &v->dpte_row_height[k],
4912 &v->meta_row_height[k],
4914 &v->dpte_group_bytes[k],
4920 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4925 v->ProgressiveToInterlaceUnitInOPP,
4926 v->SwathHeightYThisState[k],
4927 v->ViewportYStartY[k],
4930 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4931 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4932 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4934 CalculateRowBandwidth(
4936 v->SourcePixelFormat[k],
4940 v->HTotal[k] / v->PixelClock[k],
4943 v->meta_row_height[k],
4944 v->meta_row_height_chroma[k],
4945 v->DPTEBytesPerRowY,
4946 v->DPTEBytesPerRowC,
4947 v->dpte_row_height[k],
4948 v->dpte_row_height_chroma[k],
4949 &v->meta_row_bandwidth[i][j][k],
4950 &v->dpte_row_bandwidth[i][j][k]);
4952 v->UrgLatency[i] = CalculateUrgentLatency(
4953 v->UrgentLatencyPixelDataOnly,
4954 v->UrgentLatencyPixelMixedWithVMData,
4955 v->UrgentLatencyVMDataOnly,
4956 v->DoUrgentLatencyAdjustment,
4957 v->UrgentLatencyAdjustmentFabricClockComponent,
4958 v->UrgentLatencyAdjustmentFabricClockReference,
4959 v->FabricClockPerState[i]);
4961 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4962 CalculateUrgentBurstFactor(
4963 v->swath_width_luma_ub_this_state[k],
4964 v->swath_width_chroma_ub_this_state[k],
4965 v->SwathHeightYThisState[k],
4966 v->SwathHeightCThisState[k],
4967 v->HTotal[k] / v->PixelClock[k],
4969 v->CursorBufferSize,
4970 v->CursorWidth[k][0],
4974 v->BytePerPixelInDETY[k],
4975 v->BytePerPixelInDETC[k],
4976 v->DETBufferSizeYThisState[k],
4977 v->DETBufferSizeCThisState[k],
4978 &v->UrgentBurstFactorCursor[k],
4979 &v->UrgentBurstFactorLuma[k],
4980 &v->UrgentBurstFactorChroma[k],
4981 &NotUrgentLatencyHiding[k]);
4984 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4985 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4986 if (NotUrgentLatencyHiding[k]) {
4987 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4991 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4992 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4993 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4994 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4997 v->TotalVActivePixelBandwidth[i][j] = 0;
4998 v->TotalVActiveCursorBandwidth[i][j] = 0;
4999 v->TotalMetaRowBandwidth[i][j] = 0;
5000 v->TotalDPTERowBandwidth[i][j] = 0;
5001 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5002 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5003 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5004 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5005 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5010 //Calculate Return BW
5011 for (i = 0; i < v->soc.num_states; ++i) {
5012 for (j = 0; j <= 1; ++j) {
5013 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5014 if (v->BlendingAndTiming[k] == k) {
5015 if (v->WritebackEnable[k] == true) {
5016 v->WritebackDelayTime[k] = v->WritebackLatency
5017 + CalculateWriteBackDelay(
5018 v->WritebackPixelFormat[k],
5019 v->WritebackHRatio[k],
5020 v->WritebackVRatio[k],
5021 v->WritebackVTaps[k],
5022 v->WritebackDestinationWidth[k],
5023 v->WritebackDestinationHeight[k],
5024 v->WritebackSourceHeight[k],
5025 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5027 v->WritebackDelayTime[k] = 0.0;
5029 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5030 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5031 v->WritebackDelayTime[k] = dml_max(
5032 v->WritebackDelayTime[k],
5034 + CalculateWriteBackDelay(
5035 v->WritebackPixelFormat[m],
5036 v->WritebackHRatio[m],
5037 v->WritebackVRatio[m],
5038 v->WritebackVTaps[m],
5039 v->WritebackDestinationWidth[m],
5040 v->WritebackDestinationHeight[m],
5041 v->WritebackSourceHeight[m],
5042 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5047 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5048 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5049 if (v->BlendingAndTiming[k] == m) {
5050 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5054 v->MaxMaxVStartup[i][j] = 0;
5055 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5056 v->MaximumVStartup[i][j][k] =
5057 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5058 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5059 v->VTotal[k] - v->VActive[k]
5063 1.0 * v->WritebackDelayTime[k]
5065 / v->PixelClock[k]),
5067 if (v->MaximumVStartup[i][j][k] > 1023)
5068 v->MaximumVStartup[i][j][k] = 1023;
5069 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5074 ReorderingBytes = v->NumberOfChannels
5076 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5077 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5078 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5080 for (i = 0; i < v->soc.num_states; ++i) {
5081 for (j = 0; j <= 1; ++j) {
5082 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5086 if (v->UseMinimumRequiredDCFCLK == true) {
5089 v->MaxInterDCNTileRepeaters,
5091 v->DRAMClockChangeLatency,
5092 v->SREnterPlusExitTime,
5094 v->RoundTripPingLatencyCycles,
5096 v->PixelChunkSizeInKByte,
5099 v->GPUVMMaxPageTableLevels,
5101 v->NumberOfActivePlanes,
5102 v->HostVMMinPageSize,
5103 v->HostVMMaxNonCachedPageTableLevels,
5104 v->DynamicMetadataVMEnabled,
5105 v->ImmediateFlipRequirement[0],
5106 v->ProgressiveToInterlaceUnitInOPP,
5107 v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
5108 v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
5111 v->DynamicMetadataTransmittedBytes,
5112 v->DynamicMetadataLinesBeforeActiveRequired,
5118 v->ProjectedDCFCLKDeepSleep,
5120 v->TotalVActivePixelBandwidth,
5121 v->TotalVActiveCursorBandwidth,
5122 v->TotalMetaRowBandwidth,
5123 v->TotalDPTERowBandwidth,
5124 v->TotalNumberOfActiveDPP,
5125 v->TotalNumberOfDCCActiveDPP,
5126 v->dpte_group_bytes,
5129 v->swath_width_luma_ub_all_states,
5130 v->swath_width_chroma_ub_all_states,
5135 v->PDEAndMetaPTEBytesPerFrame,
5138 v->DynamicMetadataEnable,
5139 v->VActivePixelBandwidth,
5140 v->VActiveCursorBandwidth,
5141 v->ReadBandwidthLuma,
5142 v->ReadBandwidthChroma,
5147 for (i = 0; i < v->soc.num_states; ++i) {
5148 for (j = 0; j <= 1; ++j) {
5149 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5150 v->ReturnBusWidth * v->DCFCLKState[i][j],
5151 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5152 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5153 double PixelDataOnlyReturnBWPerState = dml_min(
5154 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5155 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5156 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5157 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5158 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5160 if (v->HostVMEnable != true) {
5161 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5163 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5168 //Re-ordering Buffer Support Check
5169 for (i = 0; i < v->soc.num_states; ++i) {
5170 for (j = 0; j <= 1; ++j) {
5171 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5172 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5173 v->ROBSupport[i][j] = true;
5175 v->ROBSupport[i][j] = false;
5180 //Vertical Active BW support check
5182 MaxTotalVActiveRDBandwidth = 0;
5183 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5184 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5187 for (i = 0; i < v->soc.num_states; ++i) {
5188 for (j = 0; j <= 1; ++j) {
5189 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5191 v->ReturnBusWidth * v->DCFCLKState[i][j],
5192 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5193 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5194 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5195 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5197 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5198 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5200 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5205 v->UrgentLatency = CalculateUrgentLatency(
5206 v->UrgentLatencyPixelDataOnly,
5207 v->UrgentLatencyPixelMixedWithVMData,
5208 v->UrgentLatencyVMDataOnly,
5209 v->DoUrgentLatencyAdjustment,
5210 v->UrgentLatencyAdjustmentFabricClockComponent,
5211 v->UrgentLatencyAdjustmentFabricClockReference,
5214 for (i = 0; i < v->soc.num_states; ++i) {
5215 for (j = 0; j <= 1; ++j) {
5216 double VMDataOnlyReturnBWPerState;
5217 double HostVMInefficiencyFactor = 1;
5218 int NextPrefetchModeState = MinPrefetchMode;
5219 bool UnboundedRequestEnabledThisState = false;
5220 int CompressedBufferSizeInkByteThisState = 0;
5223 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5225 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5226 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5227 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5228 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5231 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5232 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5233 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5234 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5235 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5236 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5237 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5238 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5239 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5240 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5243 VMDataOnlyReturnBWPerState = dml_min(
5245 v->ReturnBusWidth * v->DCFCLKState[i][j],
5246 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5247 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5248 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5249 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5250 if (v->GPUVMEnable && v->HostVMEnable)
5251 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5253 v->ExtraLatency = CalculateExtraLatency(
5254 v->RoundTripPingLatencyCycles,
5256 v->DCFCLKState[i][j],
5257 v->TotalNumberOfActiveDPP[i][j],
5258 v->PixelChunkSizeInKByte,
5259 v->TotalNumberOfDCCActiveDPP[i][j],
5261 v->ReturnBWPerState[i][j],
5264 v->NumberOfActivePlanes,
5265 v->NoOfDPPThisState,
5266 v->dpte_group_bytes,
5267 HostVMInefficiencyFactor,
5268 v->HostVMMinPageSize,
5269 v->HostVMMaxNonCachedPageTableLevels);
5271 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5273 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5274 v->MaxVStartup = v->NextMaxVStartup;
5276 v->TWait = CalculateTWait(
5277 v->PrefetchModePerState[i][j],
5278 v->DRAMClockChangeLatency,
5280 v->SREnterPlusExitTime);
5282 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5285 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
5286 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
5287 myPipe.PixelClock = v->PixelClock[k];
5288 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
5289 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
5290 myPipe.ScalerEnabled = v->ScalerEnabled[k];
5291 myPipe.SourceScan = v->SourceScan[k];
5292 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
5293 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
5294 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
5295 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
5296 myPipe.InterlaceEnable = v->Interlace[k];
5297 myPipe.NumberOfCursors = v->NumberOfCursors[k];
5298 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
5299 myPipe.HTotal = v->HTotal[k];
5300 myPipe.DCCEnable = v->DCCEnable[k];
5301 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
5302 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
5303 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
5304 myPipe.BytePerPixelY = v->BytePerPixelY[k];
5305 myPipe.BytePerPixelC = v->BytePerPixelC[k];
5306 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
5307 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
5309 HostVMInefficiencyFactor,
5311 v->DSCDelayPerState[i][k],
5312 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
5314 v->DPPCLKDelaySCLLBOnly,
5315 v->DPPCLKDelayCNVCCursor,
5316 v->DISPCLKDelaySubtotal,
5317 v->SwathWidthYThisState[k] / v->HRatio[k],
5319 v->MaxInterDCNTileRepeaters,
5320 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
5321 v->MaximumVStartup[i][j][k],
5322 v->GPUVMMaxPageTableLevels,
5325 v->HostVMMaxNonCachedPageTableLevels,
5326 v->HostVMMinPageSize,
5327 v->DynamicMetadataEnable[k],
5328 v->DynamicMetadataVMEnabled,
5329 v->DynamicMetadataLinesBeforeActiveRequired[k],
5330 v->DynamicMetadataTransmittedBytes[k],
5334 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5335 v->MetaRowBytes[i][j][k],
5336 v->DPTEBytesPerRow[i][j][k],
5337 v->PrefetchLinesY[i][j][k],
5338 v->SwathWidthYThisState[k],
5341 v->PrefetchLinesC[i][j][k],
5342 v->SwathWidthCThisState[k],
5345 v->swath_width_luma_ub_this_state[k],
5346 v->swath_width_chroma_ub_this_state[k],
5347 v->SwathHeightYThisState[k],
5348 v->SwathHeightCThisState[k],
5350 &v->DSTXAfterScaler[k],
5351 &v->DSTYAfterScaler[k],
5352 &v->LineTimesForPrefetch[k],
5354 &v->LinesForMetaPTE[k],
5355 &v->LinesForMetaAndDPTERow[k],
5356 &v->VRatioPreY[i][j][k],
5357 &v->VRatioPreC[i][j][k],
5358 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
5359 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
5360 &v->NoTimeForDynamicMetadata[i][j][k],
5362 &v->prefetch_vmrow_bw[k],
5366 &v->VUpdateOffsetPix[k],
5367 &v->VUpdateWidthPix[k],
5368 &v->VReadyOffsetPix[k]);
5371 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5372 CalculateUrgentBurstFactor(
5373 v->swath_width_luma_ub_this_state[k],
5374 v->swath_width_chroma_ub_this_state[k],
5375 v->SwathHeightYThisState[k],
5376 v->SwathHeightCThisState[k],
5377 v->HTotal[k] / v->PixelClock[k],
5379 v->CursorBufferSize,
5380 v->CursorWidth[k][0],
5382 v->VRatioPreY[i][j][k],
5383 v->VRatioPreC[i][j][k],
5384 v->BytePerPixelInDETY[k],
5385 v->BytePerPixelInDETC[k],
5386 v->DETBufferSizeYThisState[k],
5387 v->DETBufferSizeCThisState[k],
5388 &v->UrgentBurstFactorCursorPre[k],
5389 &v->UrgentBurstFactorLumaPre[k],
5390 &v->UrgentBurstFactorChroma[k],
5391 &v->NotUrgentLatencyHidingPre[k]);
5394 v->MaximumReadBandwidthWithPrefetch = 0.0;
5395 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5396 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5397 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5399 v->MaximumReadBandwidthWithPrefetch =
5400 v->MaximumReadBandwidthWithPrefetch
5402 v->VActivePixelBandwidth[i][j][k]
5403 + v->VActiveCursorBandwidth[i][j][k]
5404 + v->NoOfDPP[i][j][k]
5405 * (v->meta_row_bandwidth[i][j][k]
5406 + v->dpte_row_bandwidth[i][j][k]),
5407 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5409 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5410 * v->UrgentBurstFactorLumaPre[k]
5411 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5412 * v->UrgentBurstFactorChromaPre[k])
5413 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5416 v->NotEnoughUrgentLatencyHidingPre = false;
5417 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5418 if (v->NotUrgentLatencyHidingPre[k] == true) {
5419 v->NotEnoughUrgentLatencyHidingPre = true;
5423 v->PrefetchSupported[i][j] = true;
5424 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5425 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5426 v->PrefetchSupported[i][j] = false;
5428 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5429 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5430 || v->NoTimeForPrefetch[i][j][k] == true) {
5431 v->PrefetchSupported[i][j] = false;
5435 v->DynamicMetadataSupported[i][j] = true;
5436 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5437 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5438 v->DynamicMetadataSupported[i][j] = false;
5442 v->VRatioInPrefetchSupported[i][j] = true;
5443 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5444 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5445 v->VRatioInPrefetchSupported[i][j] = false;
5448 v->AnyLinesForVMOrRowTooLarge = false;
5449 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5450 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5451 v->AnyLinesForVMOrRowTooLarge = true;
5455 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5457 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5458 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5459 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5460 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5462 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5464 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5465 * v->UrgentBurstFactorLumaPre[k]
5466 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5467 * v->UrgentBurstFactorChromaPre[k])
5468 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5470 v->TotImmediateFlipBytes = 0.0;
5471 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5472 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5473 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5474 + v->DPTEBytesPerRow[i][j][k];
5477 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5478 CalculateFlipSchedule(
5480 HostVMInefficiencyFactor,
5483 v->GPUVMMaxPageTableLevels,
5485 v->HostVMMaxNonCachedPageTableLevels,
5487 v->HostVMMinPageSize,
5488 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5489 v->MetaRowBytes[i][j][k],
5490 v->DPTEBytesPerRow[i][j][k],
5491 v->BandwidthAvailableForImmediateFlip,
5492 v->TotImmediateFlipBytes,
5493 v->SourcePixelFormat[k],
5494 v->HTotal[k] / v->PixelClock[k],
5499 v->dpte_row_height[k],
5500 v->meta_row_height[k],
5501 v->dpte_row_height_chroma[k],
5502 v->meta_row_height_chroma[k],
5503 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5504 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5505 &v->final_flip_bw[k],
5506 &v->ImmediateFlipSupportedForPipe[k]);
5508 v->total_dcn_read_bw_with_flip = 0.0;
5509 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5510 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5512 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5513 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5514 + v->VActiveCursorBandwidth[i][j][k],
5516 * (v->final_flip_bw[k]
5517 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5518 * v->UrgentBurstFactorLumaPre[k]
5519 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5520 * v->UrgentBurstFactorChromaPre[k])
5521 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5523 v->ImmediateFlipSupportedForState[i][j] = true;
5524 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5525 v->ImmediateFlipSupportedForState[i][j] = false;
5527 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5528 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5529 v->ImmediateFlipSupportedForState[i][j] = false;
5533 v->ImmediateFlipSupportedForState[i][j] = false;
5536 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5537 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5538 NextPrefetchModeState = NextPrefetchModeState + 1;
5540 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5542 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5543 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5544 && ((v->HostVMEnable == false &&
5545 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5546 || v->ImmediateFlipSupportedForState[i][j] == true))
5547 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5549 CalculateUnboundedRequestAndCompressedBufferSize(
5550 v->DETBufferSizeInKByte[0],
5551 v->ConfigReturnBufferSizeInKByte,
5552 v->UseUnboundedRequesting,
5553 v->TotalNumberOfActiveDPP[i][j],
5556 v->CompressedBufferSegmentSizeInkByte,
5558 &UnboundedRequestEnabledThisState,
5559 &CompressedBufferSizeInkByteThisState);
5561 CalculateWatermarksAndDRAMSpeedChangeSupport(
5563 v->PrefetchModePerState[i][j],
5564 v->NumberOfActivePlanes,
5565 v->MaxLineBufferLines,
5567 v->WritebackInterfaceBufferSize,
5568 v->DCFCLKState[i][j],
5569 v->ReturnBWPerState[i][j],
5570 v->SynchronizedVBlank,
5571 v->dpte_group_bytes,
5575 v->WritebackLatency,
5576 v->WritebackChunkSize,
5577 v->SOCCLKPerState[i],
5578 v->DRAMClockChangeLatency,
5580 v->SREnterPlusExitTime,
5582 v->SREnterPlusExitZ8Time,
5583 v->ProjectedDCFCLKDeepSleep[i][j],
5584 v->DETBufferSizeYThisState,
5585 v->DETBufferSizeCThisState,
5586 v->SwathHeightYThisState,
5587 v->SwathHeightCThisState,
5589 v->SwathWidthYThisState,
5590 v->SwathWidthCThisState,
5599 v->BlendingAndTiming,
5600 v->NoOfDPPThisState,
5601 v->BytePerPixelInDETY,
5602 v->BytePerPixelInDETC,
5606 v->WritebackPixelFormat,
5607 v->WritebackDestinationWidth,
5608 v->WritebackDestinationHeight,
5609 v->WritebackSourceHeight,
5610 UnboundedRequestEnabledThisState,
5611 CompressedBufferSizeInkByteThisState,
5612 &v->DRAMClockChangeSupport[i][j],
5613 &v->UrgentWatermark,
5614 &v->WritebackUrgentWatermark,
5615 &v->DRAMClockChangeWatermark,
5616 &v->WritebackDRAMClockChangeWatermark,
5621 &v->MinActiveDRAMClockChangeLatencySupported);
5625 /*PTE Buffer Size Check*/
5626 for (i = 0; i < v->soc.num_states; i++) {
5627 for (j = 0; j < 2; j++) {
5628 v->PTEBufferSizeNotExceeded[i][j] = true;
5629 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5630 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5631 v->PTEBufferSizeNotExceeded[i][j] = false;
5637 /*Cursor Support Check*/
5638 v->CursorSupport = true;
5639 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5640 if (v->CursorWidth[k][0] > 0.0) {
5641 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5642 v->CursorSupport = false;
5647 /*Valid Pitch Check*/
5648 v->PitchSupport = true;
5649 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5650 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5651 if (v->DCCEnable[k] == true) {
5652 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5654 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5656 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5657 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5658 && v->SourcePixelFormat[k] != dm_mono_8) {
5659 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5660 if (v->DCCEnable[k] == true) {
5661 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5662 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5663 64.0 * v->Read256BlockWidthC[k]);
5665 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5668 v->AlignedCPitch[k] = v->PitchC[k];
5669 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5671 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5672 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5673 v->PitchSupport = false;
5677 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5678 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5679 ViewportExceedsSurface = true;
5680 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5681 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5682 && v->SourcePixelFormat[k] != dm_rgbe) {
5683 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5684 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5685 ViewportExceedsSurface = true;
5691 /*Mode Support, Voltage State and SOC Configuration*/
5692 for (i = v->soc.num_states - 1; i >= 0; i--) {
5693 for (j = 0; j < 2; j++) {
5694 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5695 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5696 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5697 && v->DTBCLKRequiredMoreThanSupported[i] == false
5698 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5699 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5700 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5701 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5702 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5703 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5704 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5705 && ((v->HostVMEnable == false
5706 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5707 || v->ImmediateFlipSupportedForState[i][j] == true)
5708 && FMTBufferExceeded == false) {
5709 v->ModeSupport[i][j] = true;
5711 v->ModeSupport[i][j] = false;
5717 unsigned int MaximumMPCCombine = 0;
5718 for (i = v->soc.num_states; i >= 0; i--) {
5719 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5720 v->VoltageLevel = i;
5721 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5722 if (v->ModeSupport[i][0] == true) {
5723 MaximumMPCCombine = 0;
5725 MaximumMPCCombine = 1;
5729 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5730 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5731 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5732 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5734 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5735 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5736 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5737 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5738 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5739 v->maxMpcComb = MaximumMPCCombine;
5743 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5744 struct display_mode_lib *mode_lib,
5745 unsigned int PrefetchMode,
5746 unsigned int NumberOfActivePlanes,
5747 unsigned int MaxLineBufferLines,
5748 unsigned int LineBufferSize,
5749 unsigned int WritebackInterfaceBufferSize,
5752 bool SynchronizedVBlank,
5753 unsigned int dpte_group_bytes[],
5754 unsigned int MetaChunkSize,
5755 double UrgentLatency,
5756 double ExtraLatency,
5757 double WritebackLatency,
5758 double WritebackChunkSize,
5760 double DRAMClockChangeLatency,
5762 double SREnterPlusExitTime,
5763 double SRExitZ8Time,
5764 double SREnterPlusExitZ8Time,
5765 double DCFCLKDeepSleep,
5766 unsigned int DETBufferSizeY[],
5767 unsigned int DETBufferSizeC[],
5768 unsigned int SwathHeightY[],
5769 unsigned int SwathHeightC[],
5770 unsigned int LBBitPerPixel[],
5771 double SwathWidthY[],
5772 double SwathWidthC[],
5774 double HRatioChroma[],
5775 unsigned int vtaps[],
5776 unsigned int VTAPsChroma[],
5778 double VRatioChroma[],
5779 unsigned int HTotal[],
5780 double PixelClock[],
5781 unsigned int BlendingAndTiming[],
5782 unsigned int DPPPerPlane[],
5783 double BytePerPixelDETY[],
5784 double BytePerPixelDETC[],
5785 double DSTXAfterScaler[],
5786 double DSTYAfterScaler[],
5787 bool WritebackEnable[],
5788 enum source_format_class WritebackPixelFormat[],
5789 double WritebackDestinationWidth[],
5790 double WritebackDestinationHeight[],
5791 double WritebackSourceHeight[],
5792 bool UnboundedRequestEnabled,
5793 int unsigned CompressedBufferSizeInkByte,
5794 enum clock_change_support *DRAMClockChangeSupport,
5795 double *UrgentWatermark,
5796 double *WritebackUrgentWatermark,
5797 double *DRAMClockChangeWatermark,
5798 double *WritebackDRAMClockChangeWatermark,
5799 double *StutterExitWatermark,
5800 double *StutterEnterPlusExitWatermark,
5801 double *Z8StutterExitWatermark,
5802 double *Z8StutterEnterPlusExitWatermark,
5803 double *MinActiveDRAMClockChangeLatencySupported)
5805 struct vba_vars_st *v = &mode_lib->vba;
5806 double EffectiveLBLatencyHidingY;
5807 double EffectiveLBLatencyHidingC;
5808 double LinesInDETY[DC__NUM_DPP__MAX];
5810 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5811 unsigned int LinesInDETCRoundedDownToSwath;
5812 double FullDETBufferingTimeY;
5813 double FullDETBufferingTimeC;
5814 double ActiveDRAMClockChangeLatencyMarginY;
5815 double ActiveDRAMClockChangeLatencyMarginC;
5816 double WritebackDRAMClockChangeLatencyMargin;
5817 double PlaneWithMinActiveDRAMClockChangeMargin;
5818 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5819 double WritebackDRAMClockChangeLatencyHiding;
5820 double TotalPixelBW = 0.0;
5823 *UrgentWatermark = UrgentLatency + ExtraLatency;
5825 #ifdef __DML_VBA_DEBUG__
5826 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5827 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5828 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
5831 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5833 #ifdef __DML_VBA_DEBUG__
5834 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
5835 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
5838 v->TotalActiveWriteback = 0;
5839 for (k = 0; k < NumberOfActivePlanes; ++k) {
5840 if (WritebackEnable[k] == true) {
5841 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5845 if (v->TotalActiveWriteback <= 1) {
5846 *WritebackUrgentWatermark = WritebackLatency;
5848 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5851 if (v->TotalActiveWriteback <= 1) {
5852 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5854 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5857 for (k = 0; k < NumberOfActivePlanes; ++k) {
5858 TotalPixelBW = TotalPixelBW
5859 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
5860 / (HTotal[k] / PixelClock[k]);
5863 for (k = 0; k < NumberOfActivePlanes; ++k) {
5864 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5866 v->LBLatencyHidingSourceLinesY = dml_min(
5867 (double) MaxLineBufferLines,
5868 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5870 v->LBLatencyHidingSourceLinesC = dml_min(
5871 (double) MaxLineBufferLines,
5872 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5874 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5876 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5878 if (UnboundedRequestEnabled) {
5879 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5880 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
5883 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5884 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5885 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5886 if (BytePerPixelDETC[k] > 0) {
5887 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5888 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5889 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5892 FullDETBufferingTimeC = 999999;
5895 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5896 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5898 if (NumberOfActivePlanes > 1) {
5899 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5900 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5903 if (BytePerPixelDETC[k] > 0) {
5904 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5905 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5907 if (NumberOfActivePlanes > 1) {
5908 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5909 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5911 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5913 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5916 if (WritebackEnable[k] == true) {
5917 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
5918 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5919 if (WritebackPixelFormat[k] == dm_444_64) {
5920 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5922 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5923 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5927 v->MinActiveDRAMClockChangeMargin = 999999;
5928 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5929 for (k = 0; k < NumberOfActivePlanes; ++k) {
5930 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5931 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5932 if (BlendingAndTiming[k] == k) {
5933 PlaneWithMinActiveDRAMClockChangeMargin = k;
5935 for (j = 0; j < NumberOfActivePlanes; ++j) {
5936 if (BlendingAndTiming[k] == j) {
5937 PlaneWithMinActiveDRAMClockChangeMargin = j;
5944 *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5946 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5947 for (k = 0; k < NumberOfActivePlanes; ++k) {
5948 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5949 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5950 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5954 v->TotalNumberOfActiveOTG = 0;
5956 for (k = 0; k < NumberOfActivePlanes; ++k) {
5957 if (BlendingAndTiming[k] == k) {
5958 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5962 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5963 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5964 } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5965 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5966 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5968 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5971 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5972 *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5973 *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5974 *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5976 #ifdef __DML_VBA_DEBUG__
5977 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5978 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5979 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5980 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5984 static void CalculateDCFCLKDeepSleep(
5985 struct display_mode_lib *mode_lib,
5986 unsigned int NumberOfActivePlanes,
5987 int BytePerPixelY[],
5988 int BytePerPixelC[],
5990 double VRatioChroma[],
5991 double SwathWidthY[],
5992 double SwathWidthC[],
5993 unsigned int DPPPerPlane[],
5995 double HRatioChroma[],
5996 double PixelClock[],
5997 double PSCL_THROUGHPUT[],
5998 double PSCL_THROUGHPUT_CHROMA[],
6000 double ReadBandwidthLuma[],
6001 double ReadBandwidthChroma[],
6003 double *DCFCLKDeepSleep)
6005 struct vba_vars_st *v = &mode_lib->vba;
6006 double DisplayPipeLineDeliveryTimeLuma;
6007 double DisplayPipeLineDeliveryTimeChroma;
6008 double ReadBandwidth = 0.0;
6011 for (k = 0; k < NumberOfActivePlanes; ++k) {
6013 if (VRatio[k] <= 1) {
6014 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6016 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6018 if (BytePerPixelC[k] == 0) {
6019 DisplayPipeLineDeliveryTimeChroma = 0;
6021 if (VRatioChroma[k] <= 1) {
6022 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6024 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6028 if (BytePerPixelC[k] > 0) {
6029 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
6030 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
6032 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
6034 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
6038 for (k = 0; k < NumberOfActivePlanes; ++k) {
6039 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
6042 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
6044 for (k = 0; k < NumberOfActivePlanes; ++k) {
6045 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
6049 static void CalculateUrgentBurstFactor(
6050 int swath_width_luma_ub,
6051 int swath_width_chroma_ub,
6052 unsigned int SwathHeightY,
6053 unsigned int SwathHeightC,
6055 double UrgentLatency,
6056 double CursorBufferSize,
6057 unsigned int CursorWidth,
6058 unsigned int CursorBPP,
6061 double BytePerPixelInDETY,
6062 double BytePerPixelInDETC,
6063 double DETBufferSizeY,
6064 double DETBufferSizeC,
6065 double *UrgentBurstFactorCursor,
6066 double *UrgentBurstFactorLuma,
6067 double *UrgentBurstFactorChroma,
6068 bool *NotEnoughUrgentLatencyHiding)
6070 double LinesInDETLuma;
6071 double LinesInDETChroma;
6072 unsigned int LinesInCursorBuffer;
6073 double CursorBufferSizeInTime;
6074 double DETBufferSizeInTimeLuma;
6075 double DETBufferSizeInTimeChroma;
6077 *NotEnoughUrgentLatencyHiding = 0;
6079 if (CursorWidth > 0) {
6080 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
6082 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
6083 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
6084 *NotEnoughUrgentLatencyHiding = 1;
6085 *UrgentBurstFactorCursor = 0;
6087 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
6090 *UrgentBurstFactorCursor = 1;
6094 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
6096 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
6097 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
6098 *NotEnoughUrgentLatencyHiding = 1;
6099 *UrgentBurstFactorLuma = 0;
6101 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
6104 *UrgentBurstFactorLuma = 1;
6107 if (BytePerPixelInDETC > 0) {
6108 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
6110 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
6111 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
6112 *NotEnoughUrgentLatencyHiding = 1;
6113 *UrgentBurstFactorChroma = 0;
6115 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
6118 *UrgentBurstFactorChroma = 1;
6123 static void CalculatePixelDeliveryTimes(
6124 unsigned int NumberOfActivePlanes,
6126 double VRatioChroma[],
6127 double VRatioPrefetchY[],
6128 double VRatioPrefetchC[],
6129 unsigned int swath_width_luma_ub[],
6130 unsigned int swath_width_chroma_ub[],
6131 unsigned int DPPPerPlane[],
6133 double HRatioChroma[],
6134 double PixelClock[],
6135 double PSCL_THROUGHPUT[],
6136 double PSCL_THROUGHPUT_CHROMA[],
6138 int BytePerPixelC[],
6139 enum scan_direction_class SourceScan[],
6140 unsigned int NumberOfCursors[],
6141 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
6142 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6143 unsigned int BlockWidth256BytesY[],
6144 unsigned int BlockHeight256BytesY[],
6145 unsigned int BlockWidth256BytesC[],
6146 unsigned int BlockHeight256BytesC[],
6147 double DisplayPipeLineDeliveryTimeLuma[],
6148 double DisplayPipeLineDeliveryTimeChroma[],
6149 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6150 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6151 double DisplayPipeRequestDeliveryTimeLuma[],
6152 double DisplayPipeRequestDeliveryTimeChroma[],
6153 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6154 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6155 double CursorRequestDeliveryTime[],
6156 double CursorRequestDeliveryTimePrefetch[])
6158 double req_per_swath_ub;
6161 for (k = 0; k < NumberOfActivePlanes; ++k) {
6162 if (VRatio[k] <= 1) {
6163 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6165 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6168 if (BytePerPixelC[k] == 0) {
6169 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6171 if (VRatioChroma[k] <= 1) {
6172 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6174 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6178 if (VRatioPrefetchY[k] <= 1) {
6179 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6181 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6184 if (BytePerPixelC[k] == 0) {
6185 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6187 if (VRatioPrefetchC[k] <= 1) {
6188 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6190 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6195 for (k = 0; k < NumberOfActivePlanes; ++k) {
6196 if (SourceScan[k] != dm_vert) {
6197 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6199 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6201 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6202 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6203 if (BytePerPixelC[k] == 0) {
6204 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6205 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6207 if (SourceScan[k] != dm_vert) {
6208 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6210 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6212 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6213 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6215 #ifdef __DML_VBA_DEBUG__
6216 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6217 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6218 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6219 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6220 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6221 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6222 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6223 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6224 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6225 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6226 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6227 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6231 for (k = 0; k < NumberOfActivePlanes; ++k) {
6232 int cursor_req_per_width;
6233 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6234 if (NumberOfCursors[k] > 0) {
6235 if (VRatio[k] <= 1) {
6236 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6238 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6240 if (VRatioPrefetchY[k] <= 1) {
6241 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6243 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6246 CursorRequestDeliveryTime[k] = 0;
6247 CursorRequestDeliveryTimePrefetch[k] = 0;
6249 #ifdef __DML_VBA_DEBUG__
6250 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6251 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6252 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6257 static void CalculateMetaAndPTETimes(
6258 int NumberOfActivePlanes,
6261 int MinMetaChunkSizeBytes,
6264 double VRatioChroma[],
6265 double DestinationLinesToRequestRowInVBlank[],
6266 double DestinationLinesToRequestRowInImmediateFlip[],
6268 double PixelClock[],
6269 int BytePerPixelY[],
6270 int BytePerPixelC[],
6271 enum scan_direction_class SourceScan[],
6272 int dpte_row_height[],
6273 int dpte_row_height_chroma[],
6274 int meta_row_width[],
6275 int meta_row_width_chroma[],
6276 int meta_row_height[],
6277 int meta_row_height_chroma[],
6278 int meta_req_width[],
6279 int meta_req_width_chroma[],
6280 int meta_req_height[],
6281 int meta_req_height_chroma[],
6282 int dpte_group_bytes[],
6283 int PTERequestSizeY[],
6284 int PTERequestSizeC[],
6285 int PixelPTEReqWidthY[],
6286 int PixelPTEReqHeightY[],
6287 int PixelPTEReqWidthC[],
6288 int PixelPTEReqHeightC[],
6289 int dpte_row_width_luma_ub[],
6290 int dpte_row_width_chroma_ub[],
6291 double DST_Y_PER_PTE_ROW_NOM_L[],
6292 double DST_Y_PER_PTE_ROW_NOM_C[],
6293 double DST_Y_PER_META_ROW_NOM_L[],
6294 double DST_Y_PER_META_ROW_NOM_C[],
6295 double TimePerMetaChunkNominal[],
6296 double TimePerChromaMetaChunkNominal[],
6297 double TimePerMetaChunkVBlank[],
6298 double TimePerChromaMetaChunkVBlank[],
6299 double TimePerMetaChunkFlip[],
6300 double TimePerChromaMetaChunkFlip[],
6301 double time_per_pte_group_nom_luma[],
6302 double time_per_pte_group_vblank_luma[],
6303 double time_per_pte_group_flip_luma[],
6304 double time_per_pte_group_nom_chroma[],
6305 double time_per_pte_group_vblank_chroma[],
6306 double time_per_pte_group_flip_chroma[])
6308 unsigned int meta_chunk_width;
6309 unsigned int min_meta_chunk_width;
6310 unsigned int meta_chunk_per_row_int;
6311 unsigned int meta_row_remainder;
6312 unsigned int meta_chunk_threshold;
6313 unsigned int meta_chunks_per_row_ub;
6314 unsigned int meta_chunk_width_chroma;
6315 unsigned int min_meta_chunk_width_chroma;
6316 unsigned int meta_chunk_per_row_int_chroma;
6317 unsigned int meta_row_remainder_chroma;
6318 unsigned int meta_chunk_threshold_chroma;
6319 unsigned int meta_chunks_per_row_ub_chroma;
6320 unsigned int dpte_group_width_luma;
6321 unsigned int dpte_groups_per_row_luma_ub;
6322 unsigned int dpte_group_width_chroma;
6323 unsigned int dpte_groups_per_row_chroma_ub;
6326 for (k = 0; k < NumberOfActivePlanes; ++k) {
6327 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6328 if (BytePerPixelC[k] == 0) {
6329 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6331 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6333 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6334 if (BytePerPixelC[k] == 0) {
6335 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6337 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6341 for (k = 0; k < NumberOfActivePlanes; ++k) {
6342 if (DCCEnable[k] == true) {
6343 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6344 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6345 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6346 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6347 if (SourceScan[k] != dm_vert) {
6348 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6350 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6352 if (meta_row_remainder <= meta_chunk_threshold) {
6353 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6355 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6357 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6358 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6359 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6360 if (BytePerPixelC[k] == 0) {
6361 TimePerChromaMetaChunkNominal[k] = 0;
6362 TimePerChromaMetaChunkVBlank[k] = 0;
6363 TimePerChromaMetaChunkFlip[k] = 0;
6365 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6366 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6367 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6368 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6369 if (SourceScan[k] != dm_vert) {
6370 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6372 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6374 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6375 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6377 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6379 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6380 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6381 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6384 TimePerMetaChunkNominal[k] = 0;
6385 TimePerMetaChunkVBlank[k] = 0;
6386 TimePerMetaChunkFlip[k] = 0;
6387 TimePerChromaMetaChunkNominal[k] = 0;
6388 TimePerChromaMetaChunkVBlank[k] = 0;
6389 TimePerChromaMetaChunkFlip[k] = 0;
6393 for (k = 0; k < NumberOfActivePlanes; ++k) {
6394 if (GPUVMEnable == true) {
6395 if (SourceScan[k] != dm_vert) {
6396 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6398 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6400 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6401 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6402 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6403 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6404 if (BytePerPixelC[k] == 0) {
6405 time_per_pte_group_nom_chroma[k] = 0;
6406 time_per_pte_group_vblank_chroma[k] = 0;
6407 time_per_pte_group_flip_chroma[k] = 0;
6409 if (SourceScan[k] != dm_vert) {
6410 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6412 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6414 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6415 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6416 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6417 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6420 time_per_pte_group_nom_luma[k] = 0;
6421 time_per_pte_group_vblank_luma[k] = 0;
6422 time_per_pte_group_flip_luma[k] = 0;
6423 time_per_pte_group_nom_chroma[k] = 0;
6424 time_per_pte_group_vblank_chroma[k] = 0;
6425 time_per_pte_group_flip_chroma[k] = 0;
6430 static void CalculateVMGroupAndRequestTimes(
6431 unsigned int NumberOfActivePlanes,
6433 unsigned int GPUVMMaxPageTableLevels,
6434 unsigned int HTotal[],
6435 int BytePerPixelC[],
6436 double DestinationLinesToRequestVMInVBlank[],
6437 double DestinationLinesToRequestVMInImmediateFlip[],
6439 double PixelClock[],
6440 int dpte_row_width_luma_ub[],
6441 int dpte_row_width_chroma_ub[],
6442 int vm_group_bytes[],
6443 unsigned int dpde0_bytes_per_frame_ub_l[],
6444 unsigned int dpde0_bytes_per_frame_ub_c[],
6445 int meta_pte_bytes_per_frame_ub_l[],
6446 int meta_pte_bytes_per_frame_ub_c[],
6447 double TimePerVMGroupVBlank[],
6448 double TimePerVMGroupFlip[],
6449 double TimePerVMRequestVBlank[],
6450 double TimePerVMRequestFlip[])
6452 int num_group_per_lower_vm_stage;
6453 int num_req_per_lower_vm_stage;
6456 for (k = 0; k < NumberOfActivePlanes; ++k) {
6457 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6458 if (DCCEnable[k] == false) {
6459 if (BytePerPixelC[k] > 0) {
6460 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6461 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6463 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6466 if (GPUVMMaxPageTableLevels == 1) {
6467 if (BytePerPixelC[k] > 0) {
6468 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6469 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6471 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6474 if (BytePerPixelC[k] > 0) {
6475 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6476 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6477 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6478 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6480 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6481 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6486 if (DCCEnable[k] == false) {
6487 if (BytePerPixelC[k] > 0) {
6488 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6490 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6493 if (GPUVMMaxPageTableLevels == 1) {
6494 if (BytePerPixelC[k] > 0) {
6495 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6497 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6500 if (BytePerPixelC[k] > 0) {
6501 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6502 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6504 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6509 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6510 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6511 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6512 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6514 if (GPUVMMaxPageTableLevels > 2) {
6515 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6516 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6517 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6518 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6522 TimePerVMGroupVBlank[k] = 0;
6523 TimePerVMGroupFlip[k] = 0;
6524 TimePerVMRequestVBlank[k] = 0;
6525 TimePerVMRequestFlip[k] = 0;
6530 static void CalculateStutterEfficiency(
6531 struct display_mode_lib *mode_lib,
6532 int CompressedBufferSizeInkByte,
6533 bool UnboundedRequestEnabled,
6534 int ConfigReturnBufferSizeInKByte,
6535 int MetaFIFOSizeInKEntries,
6536 int ZeroSizeBufferEntries,
6537 int NumberOfActivePlanes,
6538 int ROBBufferSizeInKByte,
6539 double TotalDataReadBandwidth,
6542 double COMPBUF_RESERVED_SPACE_64B,
6543 double COMPBUF_RESERVED_SPACE_ZS,
6545 double SRExitZ8Time,
6546 bool SynchronizedVBlank,
6547 double Z8StutterEnterPlusExitWatermark,
6548 double StutterEnterPlusExitWatermark,
6549 bool ProgressiveToInterlaceUnitInOPP,
6551 double MinTTUVBlank[],
6553 unsigned int DETBufferSizeY[],
6554 int BytePerPixelY[],
6555 double BytePerPixelDETY[],
6556 double SwathWidthY[],
6559 double NetDCCRateLuma[],
6560 double NetDCCRateChroma[],
6561 double DCCFractionOfZeroSizeRequestsLuma[],
6562 double DCCFractionOfZeroSizeRequestsChroma[],
6565 double PixelClock[],
6567 enum scan_direction_class SourceScan[],
6568 int BlockHeight256BytesY[],
6569 int BlockWidth256BytesY[],
6570 int BlockHeight256BytesC[],
6571 int BlockWidth256BytesC[],
6572 int DCCYMaxUncompressedBlock[],
6573 int DCCCMaxUncompressedBlock[],
6576 bool WritebackEnable[],
6577 double ReadBandwidthPlaneLuma[],
6578 double ReadBandwidthPlaneChroma[],
6579 double meta_row_bw[],
6580 double dpte_row_bw[],
6581 double *StutterEfficiencyNotIncludingVBlank,
6582 double *StutterEfficiency,
6583 int *NumberOfStutterBurstsPerFrame,
6584 double *Z8StutterEfficiencyNotIncludingVBlank,
6585 double *Z8StutterEfficiency,
6586 int *Z8NumberOfStutterBurstsPerFrame,
6587 double *StutterPeriod)
6589 struct vba_vars_st *v = &mode_lib->vba;
6591 double DETBufferingTimeY;
6592 double SwathWidthYCriticalPlane = 0;
6593 double VActiveTimeCriticalPlane = 0;
6594 double FrameTimeCriticalPlane = 0;
6595 int BytePerPixelYCriticalPlane = 0;
6596 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6597 double MinTTUVBlankCriticalPlane = 0;
6598 double TotalCompressedReadBandwidth;
6599 double TotalRowReadBandwidth;
6600 double AverageDCCCompressionRate;
6601 double EffectiveCompressedBufferSize;
6602 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6603 double StutterBurstTime;
6604 int TotalActiveWriteback;
6606 double LinesInDETYRoundedDownToSwath;
6607 double MaximumEffectiveCompressionLuma;
6608 double MaximumEffectiveCompressionChroma;
6609 double TotalZeroSizeRequestReadBandwidth;
6610 double TotalZeroSizeCompressedReadBandwidth;
6611 double AverageDCCZeroSizeFraction;
6612 double AverageZeroSizeCompressionRate;
6613 int TotalNumberOfActiveOTG = 0;
6614 double LastStutterPeriod = 0.0;
6615 double LastZ8StutterPeriod = 0.0;
6618 TotalZeroSizeRequestReadBandwidth = 0;
6619 TotalZeroSizeCompressedReadBandwidth = 0;
6620 TotalRowReadBandwidth = 0;
6621 TotalCompressedReadBandwidth = 0;
6623 for (k = 0; k < NumberOfActivePlanes; ++k) {
6624 if (DCCEnable[k] == true) {
6625 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6626 || DCCYMaxUncompressedBlock[k] < 256) {
6627 MaximumEffectiveCompressionLuma = 2;
6629 MaximumEffectiveCompressionLuma = 4;
6631 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6632 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6633 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6634 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6635 if (ReadBandwidthPlaneChroma[k] > 0) {
6636 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6637 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6638 MaximumEffectiveCompressionChroma = 2;
6640 MaximumEffectiveCompressionChroma = 4;
6642 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6643 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6644 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6645 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6646 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6649 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6651 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6654 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6655 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6657 #ifdef __DML_VBA_DEBUG__
6658 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6659 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6660 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6661 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6662 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6663 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6664 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6665 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6668 if (AverageDCCZeroSizeFraction == 1) {
6669 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6670 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6671 } else if (AverageDCCZeroSizeFraction > 0) {
6672 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6673 EffectiveCompressedBufferSize = dml_min(
6674 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6675 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6676 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6677 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6678 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6680 "DML::%s: min 2 = %f\n",
6682 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6683 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6684 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6686 EffectiveCompressedBufferSize = dml_min(
6687 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6688 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6689 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6690 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6693 #ifdef __DML_VBA_DEBUG__
6694 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6695 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6696 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6700 for (k = 0; k < NumberOfActivePlanes; ++k) {
6701 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6702 / BytePerPixelDETY[k] / SwathWidthY[k];
6703 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6704 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6705 #ifdef __DML_VBA_DEBUG__
6706 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6707 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6708 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6709 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6710 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6711 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6712 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6713 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6714 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6715 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6716 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6717 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6720 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6721 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6723 *StutterPeriod = DETBufferingTimeY;
6724 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6725 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6726 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6727 SwathWidthYCriticalPlane = SwathWidthY[k];
6728 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6729 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6731 #ifdef __DML_VBA_DEBUG__
6732 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6733 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6734 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6735 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6736 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6737 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6738 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6743 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6744 #ifdef __DML_VBA_DEBUG__
6745 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6746 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6747 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6748 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6749 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6750 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6751 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6752 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6753 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6754 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6757 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6758 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6759 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6760 #ifdef __DML_VBA_DEBUG__
6761 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6762 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6763 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6764 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6765 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6767 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6770 "DML::%s: Time to finish residue swath=%f\n",
6772 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6774 TotalActiveWriteback = 0;
6775 for (k = 0; k < NumberOfActivePlanes; ++k) {
6776 if (WritebackEnable[k]) {
6777 TotalActiveWriteback = TotalActiveWriteback + 1;
6781 if (TotalActiveWriteback == 0) {
6782 #ifdef __DML_VBA_DEBUG__
6783 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6784 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6785 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6786 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6788 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6789 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6790 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6791 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6793 *StutterEfficiencyNotIncludingVBlank = 0.;
6794 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6795 *NumberOfStutterBurstsPerFrame = 0;
6796 *Z8NumberOfStutterBurstsPerFrame = 0;
6798 #ifdef __DML_VBA_DEBUG__
6799 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6800 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6801 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6802 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6803 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6806 for (k = 0; k < NumberOfActivePlanes; ++k) {
6807 if (v->BlendingAndTiming[k] == k) {
6808 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6812 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6813 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6815 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6816 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6817 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6819 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6822 *StutterEfficiency = 0;
6825 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6826 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6827 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6828 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6829 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6831 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6834 *Z8StutterEfficiency = 0.;
6837 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6838 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6839 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6840 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6841 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6842 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6843 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6844 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6847 static void CalculateSwathAndDETConfiguration(
6848 bool ForceSingleDPP,
6849 int NumberOfActivePlanes,
6850 unsigned int DETBufferSizeInKByte,
6851 double MaximumSwathWidthLuma[],
6852 double MaximumSwathWidthChroma[],
6853 enum scan_direction_class SourceScan[],
6854 enum source_format_class SourcePixelFormat[],
6855 enum dm_swizzle_mode SurfaceTiling[],
6856 int ViewportWidth[],
6857 int ViewportHeight[],
6858 int SurfaceWidthY[],
6859 int SurfaceWidthC[],
6860 int SurfaceHeightY[],
6861 int SurfaceHeightC[],
6862 int Read256BytesBlockHeightY[],
6863 int Read256BytesBlockHeightC[],
6864 int Read256BytesBlockWidthY[],
6865 int Read256BytesBlockWidthC[],
6866 enum odm_combine_mode ODMCombineEnabled[],
6867 int BlendingAndTiming[],
6870 double BytePerPixDETY[],
6871 double BytePerPixDETC[],
6874 double HRatioChroma[],
6876 int swath_width_luma_ub[],
6877 int swath_width_chroma_ub[],
6878 double SwathWidth[],
6879 double SwathWidthChroma[],
6882 unsigned int DETBufferSizeY[],
6883 unsigned int DETBufferSizeC[],
6884 bool ViewportSizeSupportPerPlane[],
6885 bool *ViewportSizeSupport)
6887 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6888 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6889 int MinimumSwathHeightY;
6890 int MinimumSwathHeightC;
6891 int RoundedUpMaxSwathSizeBytesY;
6892 int RoundedUpMaxSwathSizeBytesC;
6893 int RoundedUpMinSwathSizeBytesY;
6894 int RoundedUpMinSwathSizeBytesC;
6895 int RoundedUpSwathSizeBytesY;
6896 int RoundedUpSwathSizeBytesC;
6897 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6898 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6901 CalculateSwathWidth(
6903 NumberOfActivePlanes,
6915 Read256BytesBlockHeightY,
6916 Read256BytesBlockHeightC,
6917 Read256BytesBlockWidthY,
6918 Read256BytesBlockWidthC,
6923 SwathWidthSingleDPP,
6924 SwathWidthSingleDPPChroma,
6927 MaximumSwathHeightY,
6928 MaximumSwathHeightC,
6929 swath_width_luma_ub,
6930 swath_width_chroma_ub);
6932 *ViewportSizeSupport = true;
6933 for (k = 0; k < NumberOfActivePlanes; ++k) {
6934 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6935 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6936 if (SurfaceTiling[k] == dm_sw_linear
6937 || (SourcePixelFormat[k] == dm_444_64
6938 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6939 && SourceScan[k] != dm_vert)) {
6940 MinimumSwathHeightY = MaximumSwathHeightY[k];
6941 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6942 MinimumSwathHeightY = MaximumSwathHeightY[k];
6944 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6946 MinimumSwathHeightC = MaximumSwathHeightC[k];
6948 if (SurfaceTiling[k] == dm_sw_linear) {
6949 MinimumSwathHeightY = MaximumSwathHeightY[k];
6950 MinimumSwathHeightC = MaximumSwathHeightC[k];
6951 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6952 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6953 MinimumSwathHeightC = MaximumSwathHeightC[k];
6954 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6955 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6956 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6957 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6958 MinimumSwathHeightY = MaximumSwathHeightY[k];
6959 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6961 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6962 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6966 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6967 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6968 if (SourcePixelFormat[k] == dm_420_10) {
6969 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6970 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6972 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6973 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6974 if (SourcePixelFormat[k] == dm_420_10) {
6975 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6976 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6979 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6980 SwathHeightY[k] = MaximumSwathHeightY[k];
6981 SwathHeightC[k] = MaximumSwathHeightC[k];
6982 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6983 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6984 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6985 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6986 SwathHeightY[k] = MinimumSwathHeightY;
6987 SwathHeightC[k] = MaximumSwathHeightC[k];
6988 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6989 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6990 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6991 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6992 SwathHeightY[k] = MaximumSwathHeightY[k];
6993 SwathHeightC[k] = MinimumSwathHeightC;
6994 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6995 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6997 SwathHeightY[k] = MinimumSwathHeightY;
6998 SwathHeightC[k] = MinimumSwathHeightC;
6999 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
7000 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
7003 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7004 if (SwathHeightC[k] == 0) {
7005 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
7006 DETBufferSizeC[k] = 0;
7007 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
7008 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
7009 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
7011 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
7012 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
7015 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
7016 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
7017 *ViewportSizeSupport = false;
7018 ViewportSizeSupportPerPlane[k] = false;
7020 ViewportSizeSupportPerPlane[k] = true;
7026 static void CalculateSwathWidth(
7027 bool ForceSingleDPP,
7028 int NumberOfActivePlanes,
7029 enum source_format_class SourcePixelFormat[],
7030 enum scan_direction_class SourceScan[],
7031 int ViewportWidth[],
7032 int ViewportHeight[],
7033 int SurfaceWidthY[],
7034 int SurfaceWidthC[],
7035 int SurfaceHeightY[],
7036 int SurfaceHeightC[],
7037 enum odm_combine_mode ODMCombineEnabled[],
7040 int Read256BytesBlockHeightY[],
7041 int Read256BytesBlockHeightC[],
7042 int Read256BytesBlockWidthY[],
7043 int Read256BytesBlockWidthC[],
7044 int BlendingAndTiming[],
7048 double SwathWidthSingleDPPY[],
7049 double SwathWidthSingleDPPC[],
7050 double SwathWidthY[],
7051 double SwathWidthC[],
7052 int MaximumSwathHeightY[],
7053 int MaximumSwathHeightC[],
7054 int swath_width_luma_ub[],
7055 int swath_width_chroma_ub[])
7057 enum odm_combine_mode MainPlaneODMCombine;
7060 #ifdef __DML_VBA_DEBUG__
7061 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
7064 for (k = 0; k < NumberOfActivePlanes; ++k) {
7065 if (SourceScan[k] != dm_vert) {
7066 SwathWidthSingleDPPY[k] = ViewportWidth[k];
7068 SwathWidthSingleDPPY[k] = ViewportHeight[k];
7071 #ifdef __DML_VBA_DEBUG__
7072 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
7073 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
7076 MainPlaneODMCombine = ODMCombineEnabled[k];
7077 for (j = 0; j < NumberOfActivePlanes; ++j) {
7078 if (BlendingAndTiming[k] == j) {
7079 MainPlaneODMCombine = ODMCombineEnabled[j];
7083 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
7084 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
7085 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
7086 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
7087 } else if (DPPPerPlane[k] == 2) {
7088 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
7090 SwathWidthY[k] = SwathWidthSingleDPPY[k];
7093 #ifdef __DML_VBA_DEBUG__
7094 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
7095 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
7098 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
7099 SwathWidthC[k] = SwathWidthY[k] / 2;
7100 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
7102 SwathWidthC[k] = SwathWidthY[k];
7103 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
7106 if (ForceSingleDPP == true) {
7107 SwathWidthY[k] = SwathWidthSingleDPPY[k];
7108 SwathWidthC[k] = SwathWidthSingleDPPC[k];
7111 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
7112 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
7113 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
7114 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
7116 #ifdef __DML_VBA_DEBUG__
7117 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
7120 if (SourceScan[k] != dm_vert) {
7121 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
7122 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
7123 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
7124 if (BytePerPixC[k] > 0) {
7125 swath_width_chroma_ub[k] = dml_min(
7127 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
7129 swath_width_chroma_ub[k] = 0;
7132 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
7133 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
7134 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
7135 if (BytePerPixC[k] > 0) {
7136 swath_width_chroma_ub[k] = dml_min(
7137 surface_height_ub_c,
7138 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
7140 swath_width_chroma_ub[k] = 0;
7147 static double CalculateExtraLatency(
7148 int RoundTripPingLatencyCycles,
7149 int ReorderingBytes,
7151 int TotalNumberOfActiveDPP,
7152 int PixelChunkSizeInKByte,
7153 int TotalNumberOfDCCActiveDPP,
7158 int NumberOfActivePlanes,
7160 int dpte_group_bytes[],
7161 double HostVMInefficiencyFactor,
7162 double HostVMMinPageSize,
7163 int HostVMMaxNonCachedPageTableLevels)
7165 double ExtraLatencyBytes;
7166 double ExtraLatency;
7168 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7170 TotalNumberOfActiveDPP,
7171 PixelChunkSizeInKByte,
7172 TotalNumberOfDCCActiveDPP,
7176 NumberOfActivePlanes,
7179 HostVMInefficiencyFactor,
7181 HostVMMaxNonCachedPageTableLevels);
7183 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7185 #ifdef __DML_VBA_DEBUG__
7186 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7187 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7188 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7189 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7190 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7193 return ExtraLatency;
7196 static double CalculateExtraLatencyBytes(
7197 int ReorderingBytes,
7198 int TotalNumberOfActiveDPP,
7199 int PixelChunkSizeInKByte,
7200 int TotalNumberOfDCCActiveDPP,
7204 int NumberOfActivePlanes,
7206 int dpte_group_bytes[],
7207 double HostVMInefficiencyFactor,
7208 double HostVMMinPageSize,
7209 int HostVMMaxNonCachedPageTableLevels)
7212 int HostVMDynamicLevels = 0, k;
7214 if (GPUVMEnable == true && HostVMEnable == true) {
7215 if (HostVMMinPageSize < 2048) {
7216 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7217 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
7218 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7220 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7223 HostVMDynamicLevels = 0;
7226 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7228 if (GPUVMEnable == true) {
7229 for (k = 0; k < NumberOfActivePlanes; ++k) {
7230 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7236 static double CalculateUrgentLatency(
7237 double UrgentLatencyPixelDataOnly,
7238 double UrgentLatencyPixelMixedWithVMData,
7239 double UrgentLatencyVMDataOnly,
7240 bool DoUrgentLatencyAdjustment,
7241 double UrgentLatencyAdjustmentFabricClockComponent,
7242 double UrgentLatencyAdjustmentFabricClockReference,
7247 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7248 if (DoUrgentLatencyAdjustment == true) {
7249 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7254 static void UseMinimumDCFCLK(
7255 struct display_mode_lib *mode_lib,
7256 int MaxInterDCNTileRepeaters,
7257 int MaxPrefetchMode,
7258 double FinalDRAMClockChangeLatency,
7259 double SREnterPlusExitTime,
7261 int RoundTripPingLatencyCycles,
7262 int ReorderingBytes,
7263 int PixelChunkSizeInKByte,
7266 int GPUVMMaxPageTableLevels,
7268 int NumberOfActivePlanes,
7269 double HostVMMinPageSize,
7270 int HostVMMaxNonCachedPageTableLevels,
7271 bool DynamicMetadataVMEnabled,
7272 enum immediate_flip_requirement ImmediateFlipRequirement,
7273 bool ProgressiveToInterlaceUnitInOPP,
7274 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
7275 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
7278 int DynamicMetadataTransmittedBytes[],
7279 int DynamicMetadataLinesBeforeActiveRequired[],
7281 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
7282 double RequiredDISPCLK[][2],
7283 double UrgLatency[],
7284 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
7285 double ProjectedDCFCLKDeepSleep[][2],
7286 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
7287 double TotalVActivePixelBandwidth[][2],
7288 double TotalVActiveCursorBandwidth[][2],
7289 double TotalMetaRowBandwidth[][2],
7290 double TotalDPTERowBandwidth[][2],
7291 unsigned int TotalNumberOfActiveDPP[][2],
7292 unsigned int TotalNumberOfDCCActiveDPP[][2],
7293 int dpte_group_bytes[],
7294 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
7295 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
7296 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
7297 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
7298 int BytePerPixelY[],
7299 int BytePerPixelC[],
7301 double PixelClock[],
7302 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
7303 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
7304 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
7305 bool DynamicMetadataEnable[],
7306 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
7307 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
7308 double ReadBandwidthLuma[],
7309 double ReadBandwidthChroma[],
7310 double DCFCLKPerState[],
7311 double DCFCLKState[][2])
7313 struct vba_vars_st *v = &mode_lib->vba;
7314 int dummy1, i, j, k;
7315 double NormalEfficiency, dummy2, dummy3;
7316 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7318 NormalEfficiency = PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7319 for (i = 0; i < v->soc.num_states; ++i) {
7320 for (j = 0; j <= 1; ++j) {
7321 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7322 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7323 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7324 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7325 double MinimumTWait;
7326 double NonDPTEBandwidth;
7327 double DPTEBandwidth;
7328 double DCFCLKRequiredForAverageBandwidth;
7329 double ExtraLatencyBytes;
7330 double ExtraLatencyCycles;
7331 double DCFCLKRequiredForPeakBandwidth;
7332 int NoOfDPPState[DC__NUM_DPP__MAX];
7333 double MinimumTvmPlus2Tr0;
7335 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7336 for (k = 0; k < NumberOfActivePlanes; ++k) {
7337 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7338 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
7341 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
7342 NoOfDPPState[k] = NoOfDPP[i][j][k];
7345 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
7346 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
7347 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
7348 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
7349 DCFCLKRequiredForAverageBandwidth = dml_max3(
7350 ProjectedDCFCLKDeepSleep[i][j],
7351 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth
7352 / (MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7353 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / ReturnBusWidth);
7355 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7357 TotalNumberOfActiveDPP[i][j],
7358 PixelChunkSizeInKByte,
7359 TotalNumberOfDCCActiveDPP[i][j],
7363 NumberOfActivePlanes,
7368 HostVMMaxNonCachedPageTableLevels);
7369 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
7370 for (k = 0; k < NumberOfActivePlanes; ++k) {
7371 double DCFCLKCyclesRequiredInPrefetch;
7372 double ExpectedPrefetchBWAcceleration;
7373 double PrefetchTime;
7375 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
7376 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
7377 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7378 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7379 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth
7380 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7381 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
7382 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k])
7383 / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
7384 DynamicMetadataVMExtraLatency[k] =
7385 (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
7386 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7387 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait
7389 * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : GPUVMMaxPageTableLevels - 2)
7390 * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7391 - DynamicMetadataVMExtraLatency[k];
7393 if (PrefetchTime > 0) {
7394 double ExpectedVRatioPrefetch;
7395 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7396 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7397 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7398 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7399 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
7400 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7401 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / ReturnBusWidth;
7404 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
7406 if (DynamicMetadataEnable[k] == true) {
7411 double AllowedTimeForUrgentExtraLatency;
7413 CalculateVupdateAndDynamicMetadataParameters(
7414 MaxInterDCNTileRepeaters,
7415 RequiredDPPCLK[i][j][k],
7416 RequiredDISPCLK[i][j],
7417 ProjectedDCFCLKDeepSleep[i][j],
7420 VTotal[k] - VActive[k],
7421 DynamicMetadataTransmittedBytes[k],
7422 DynamicMetadataLinesBeforeActiveRequired[k],
7424 ProgressiveToInterlaceUnitInOPP,
7432 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7433 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7434 if (AllowedTimeForUrgentExtraLatency > 0) {
7435 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7436 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7437 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7439 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
7443 DCFCLKRequiredForPeakBandwidth = 0;
7444 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
7445 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7447 MinimumTvmPlus2Tr0 = UrgLatency[i]
7448 * (GPUVMEnable == true ?
7449 (HostVMEnable == true ?
7450 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) :
7452 for (k = 0; k < NumberOfActivePlanes; ++k) {
7453 double MaximumTvmPlus2Tr0PlusTsw;
7454 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7455 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7456 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
7458 DCFCLKRequiredForPeakBandwidth = dml_max3(
7459 DCFCLKRequiredForPeakBandwidth,
7460 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7461 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7464 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7469 static void CalculateUnboundedRequestAndCompressedBufferSize(
7470 unsigned int DETBufferSizeInKByte,
7471 int ConfigReturnBufferSizeInKByte,
7472 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7474 bool NoChromaPlanes,
7476 int CompressedBufferSegmentSizeInkByteFinal,
7477 enum output_encoder_class *Output,
7478 bool *UnboundedRequestEnabled,
7479 int *CompressedBufferSizeInkByte)
7481 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7483 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7484 *CompressedBufferSizeInkByte = (
7485 *UnboundedRequestEnabled == true ?
7486 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7487 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7488 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7490 #ifdef __DML_VBA_DEBUG__
7491 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7492 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7493 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7494 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7495 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7496 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7497 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7501 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7503 bool ret_val = false;
7505 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7506 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {