2 * Copyright 2020 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
26 #ifdef CONFIG_DRM_AMD_DC_DCN
29 #include "../display_mode_lib.h"
30 #include "display_mode_vba_30.h"
31 #include "../dml_inline_defs.h"
36 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
38 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
39 * ways. Unless there is something clearly wrong with it the code should
40 * remain as-is as it provides us with a guarantee from HW that it is correct.
48 double DCFCLKDeepSleep;
49 unsigned int DPPPerPlane;
51 enum scan_direction_class SourceScan;
52 unsigned int BlockWidth256BytesY;
53 unsigned int BlockHeight256BytesY;
54 unsigned int BlockWidth256BytesC;
55 unsigned int BlockHeight256BytesC;
56 unsigned int InterlaceEnable;
57 unsigned int NumberOfCursors;
60 unsigned int DCCEnable;
61 bool ODMCombineEnabled;
65 #define BPP_BLENDED_PIPE 0xffffffff
66 #define DCN30_MAX_DSC_IMAGE_WIDTH 5184
67 #define DCN30_MAX_FMT_420_BUFFER_WIDTH 4096
69 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
70 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
71 struct display_mode_lib *mode_lib);
72 static unsigned int dscceComputeDelay(
75 unsigned int sliceWidth,
76 unsigned int numSlices,
77 enum output_format_class pixelFormat,
78 enum output_encoder_class Output);
79 static unsigned int dscComputeDelay(
80 enum output_format_class pixelFormat,
81 enum output_encoder_class Output);
82 // Super monster function with some 45 argument
83 static bool CalculatePrefetchSchedule(
84 struct display_mode_lib *mode_lib,
85 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
86 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
88 unsigned int DSCDelay,
89 double DPPCLKDelaySubtotalPlusCNVCFormater,
90 double DPPCLKDelaySCL,
91 double DPPCLKDelaySCLLBOnly,
92 double DPPCLKDelayCNVCCursor,
93 double DISPCLKDelaySubtotal,
94 unsigned int DPP_RECOUT_WIDTH,
95 enum output_format_class OutputFormat,
96 unsigned int MaxInterDCNTileRepeaters,
97 unsigned int VStartup,
98 unsigned int MaxVStartup,
99 unsigned int GPUVMPageTableLevels,
102 unsigned int HostVMMaxNonCachedPageTableLevels,
103 double HostVMMinPageSize,
104 bool DynamicMetadataEnable,
105 bool DynamicMetadataVMEnabled,
106 int DynamicMetadataLinesBeforeActiveRequired,
107 unsigned int DynamicMetadataTransmittedBytes,
108 double UrgentLatency,
109 double UrgentExtraLatency,
111 unsigned int PDEAndMetaPTEBytesFrame,
112 unsigned int MetaRowByte,
113 unsigned int PixelPTEBytesPerRow,
114 double PrefetchSourceLinesY,
115 unsigned int SwathWidthY,
117 double VInitPreFillY,
118 unsigned int MaxNumSwathY,
119 double PrefetchSourceLinesC,
120 unsigned int SwathWidthC,
122 double VInitPreFillC,
123 unsigned int MaxNumSwathC,
124 long swath_width_luma_ub,
125 long swath_width_chroma_ub,
126 unsigned int SwathHeightY,
127 unsigned int SwathHeightC,
129 bool ProgressiveToInterlaceUnitInOPP,
130 double *DSTXAfterScaler,
131 double *DSTYAfterScaler,
132 double *DestinationLinesForPrefetch,
133 double *PrefetchBandwidth,
134 double *DestinationLinesToRequestVMInVBlank,
135 double *DestinationLinesToRequestRowInVBlank,
136 double *VRatioPrefetchY,
137 double *VRatioPrefetchC,
138 double *RequiredPrefetchPixDataBWLuma,
139 double *RequiredPrefetchPixDataBWChroma,
140 bool *NotEnoughTimeForDynamicMetadata,
142 double *prefetch_vmrow_bw,
145 unsigned int *VUpdateOffsetPix,
146 double *VUpdateWidthPix,
147 double *VReadyOffsetPix);
148 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
149 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
150 static void CalculateDCCConfiguration(
152 bool DCCProgrammingAssumesScanDirectionUnknown,
153 enum source_format_class SourcePixelFormat,
154 unsigned int ViewportWidthLuma,
155 unsigned int ViewportWidthChroma,
156 unsigned int ViewportHeightLuma,
157 unsigned int ViewportHeightChroma,
158 double DETBufferSize,
159 unsigned int RequestHeight256ByteLuma,
160 unsigned int RequestHeight256ByteChroma,
161 enum dm_swizzle_mode TilingFormat,
162 unsigned int BytePerPixelY,
163 unsigned int BytePerPixelC,
164 double BytePerPixelDETY,
165 double BytePerPixelDETC,
166 enum scan_direction_class ScanOrientation,
167 unsigned int *MaxUncompressedBlockLuma,
168 unsigned int *MaxUncompressedBlockChroma,
169 unsigned int *MaxCompressedBlockLuma,
170 unsigned int *MaxCompressedBlockChroma,
171 unsigned int *IndependentBlockLuma,
172 unsigned int *IndependentBlockChroma);
173 static double CalculatePrefetchSourceLines(
174 struct display_mode_lib *mode_lib,
178 bool ProgressiveToInterlaceUnitInOPP,
179 unsigned int SwathHeight,
180 unsigned int ViewportYStart,
181 double *VInitPreFill,
182 unsigned int *MaxNumSwath);
183 static unsigned int CalculateVMAndRowBytes(
184 struct display_mode_lib *mode_lib,
186 unsigned int BlockHeight256Bytes,
187 unsigned int BlockWidth256Bytes,
188 enum source_format_class SourcePixelFormat,
189 unsigned int SurfaceTiling,
190 unsigned int BytePerPixel,
191 enum scan_direction_class ScanDirection,
192 unsigned int SwathWidth,
193 unsigned int ViewportHeight,
196 unsigned int HostVMMaxNonCachedPageTableLevels,
197 unsigned int GPUVMMinPageSize,
198 unsigned int HostVMMinPageSize,
199 unsigned int PTEBufferSizeInRequests,
201 unsigned int DCCMetaPitch,
202 unsigned int *MacroTileWidth,
203 unsigned int *MetaRowByte,
204 unsigned int *PixelPTEBytesPerRow,
205 bool *PTEBufferSizeNotExceeded,
206 unsigned int *dpte_row_width_ub,
207 unsigned int *dpte_row_height,
208 unsigned int *MetaRequestWidth,
209 unsigned int *MetaRequestHeight,
210 unsigned int *meta_row_width,
211 unsigned int *meta_row_height,
212 unsigned int *vm_group_bytes,
213 unsigned int *dpte_group_bytes,
214 unsigned int *PixelPTEReqWidth,
215 unsigned int *PixelPTEReqHeight,
216 unsigned int *PTERequestSize,
217 unsigned int *DPDE0BytesFrame,
218 unsigned int *MetaPTEBytesFrame);
219 static double CalculateTWait(
220 unsigned int PrefetchMode,
221 double DRAMClockChangeLatency,
222 double UrgentLatency,
223 double SREnterPlusExitTime);
224 static void CalculateRowBandwidth(
226 enum source_format_class SourcePixelFormat,
231 unsigned int MetaRowByteLuma,
232 unsigned int MetaRowByteChroma,
233 unsigned int meta_row_height_luma,
234 unsigned int meta_row_height_chroma,
235 unsigned int PixelPTEBytesPerRowLuma,
236 unsigned int PixelPTEBytesPerRowChroma,
237 unsigned int dpte_row_height_luma,
238 unsigned int dpte_row_height_chroma,
240 double *dpte_row_bw);
241 static void CalculateFlipSchedule(
242 struct display_mode_lib *mode_lib,
243 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
244 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
245 double UrgentExtraLatency,
246 double UrgentLatency,
247 unsigned int GPUVMMaxPageTableLevels,
249 unsigned int HostVMMaxNonCachedPageTableLevels,
251 double HostVMMinPageSize,
252 double PDEAndMetaPTEBytesPerFrame,
254 double DPTEBytesPerRow,
255 double BandwidthAvailableForImmediateFlip,
256 unsigned int TotImmediateFlipBytes,
257 enum source_format_class SourcePixelFormat,
263 unsigned int dpte_row_height,
264 unsigned int meta_row_height,
265 unsigned int dpte_row_height_chroma,
266 unsigned int meta_row_height_chroma,
267 double *DestinationLinesToRequestVMInImmediateFlip,
268 double *DestinationLinesToRequestRowInImmediateFlip,
269 double *final_flip_bw,
270 bool *ImmediateFlipSupportedForPipe);
271 static double CalculateWriteBackDelay(
272 enum source_format_class WritebackPixelFormat,
273 double WritebackHRatio,
274 double WritebackVRatio,
275 unsigned int WritebackVTaps,
276 long WritebackDestinationWidth,
277 long WritebackDestinationHeight,
278 long WritebackSourceHeight,
279 unsigned int HTotal);
280 static void CalculateDynamicMetadataParameters(
281 int MaxInterDCNTileRepeaters,
284 double DCFClkDeepSleep,
288 long DynamicMetadataTransmittedBytes,
289 long DynamicMetadataLinesBeforeActiveRequired,
291 bool ProgressiveToInterlaceUnitInOPP,
296 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
297 struct display_mode_lib *mode_lib,
298 unsigned int PrefetchMode,
299 unsigned int NumberOfActivePlanes,
300 unsigned int MaxLineBufferLines,
301 unsigned int LineBufferSize,
302 unsigned int DPPOutputBufferPixels,
303 unsigned int DETBufferSizeInKByte,
304 unsigned int WritebackInterfaceBufferSize,
308 unsigned int dpte_group_bytes[],
309 unsigned int MetaChunkSize,
310 double UrgentLatency,
312 double WritebackLatency,
313 double WritebackChunkSize,
315 double DRAMClockChangeLatency,
317 double SREnterPlusExitTime,
318 double DCFCLKDeepSleep,
319 unsigned int DPPPerPlane[],
322 unsigned int DETBufferSizeY[],
323 unsigned int DETBufferSizeC[],
324 unsigned int SwathHeightY[],
325 unsigned int SwathHeightC[],
326 unsigned int LBBitPerPixel[],
327 double SwathWidthY[],
328 double SwathWidthC[],
330 double HRatioChroma[],
331 unsigned int vtaps[],
332 unsigned int VTAPsChroma[],
334 double VRatioChroma[],
335 unsigned int HTotal[],
337 unsigned int BlendingAndTiming[],
338 double BytePerPixelDETY[],
339 double BytePerPixelDETC[],
340 double DSTXAfterScaler[],
341 double DSTYAfterScaler[],
342 bool WritebackEnable[],
343 enum source_format_class WritebackPixelFormat[],
344 double WritebackDestinationWidth[],
345 double WritebackDestinationHeight[],
346 double WritebackSourceHeight[],
347 enum clock_change_support *DRAMClockChangeSupport,
348 double *UrgentWatermark,
349 double *WritebackUrgentWatermark,
350 double *DRAMClockChangeWatermark,
351 double *WritebackDRAMClockChangeWatermark,
352 double *StutterExitWatermark,
353 double *StutterEnterPlusExitWatermark,
354 double *MinActiveDRAMClockChangeLatencySupported);
355 static void CalculateDCFCLKDeepSleep(
356 struct display_mode_lib *mode_lib,
357 unsigned int NumberOfActivePlanes,
361 double VRatioChroma[],
362 double SwathWidthY[],
363 double SwathWidthC[],
364 unsigned int DPPPerPlane[],
366 double HRatioChroma[],
368 double PSCL_THROUGHPUT[],
369 double PSCL_THROUGHPUT_CHROMA[],
371 double ReadBandwidthLuma[],
372 double ReadBandwidthChroma[],
374 double *DCFCLKDeepSleep);
375 static void CalculateUrgentBurstFactor(
376 long swath_width_luma_ub,
377 long swath_width_chroma_ub,
378 unsigned int DETBufferSizeInKByte,
379 unsigned int SwathHeightY,
380 unsigned int SwathHeightC,
382 double UrgentLatency,
383 double CursorBufferSize,
384 unsigned int CursorWidth,
385 unsigned int CursorBPP,
388 double BytePerPixelInDETY,
389 double BytePerPixelInDETC,
390 double DETBufferSizeY,
391 double DETBufferSizeC,
392 double *UrgentBurstFactorCursor,
393 double *UrgentBurstFactorLuma,
394 double *UrgentBurstFactorChroma,
395 bool *NotEnoughUrgentLatencyHiding);
397 static void UseMinimumDCFCLK(
398 struct display_mode_lib *mode_lib,
399 int MaxInterDCNTileRepeaters,
401 double FinalDRAMClockChangeLatency,
402 double SREnterPlusExitTime,
404 int RoundTripPingLatencyCycles,
406 int PixelChunkSizeInKByte,
409 int GPUVMMaxPageTableLevels,
411 int NumberOfActivePlanes,
412 double HostVMMinPageSize,
413 int HostVMMaxNonCachedPageTableLevels,
414 bool DynamicMetadataVMEnabled,
415 enum immediate_flip_requirement ImmediateFlipRequirement,
416 bool ProgressiveToInterlaceUnitInOPP,
417 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
418 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
419 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
420 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
423 int DynamicMetadataTransmittedBytes[],
424 int DynamicMetadataLinesBeforeActiveRequired[],
426 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
427 double RequiredDISPCLK[][2],
429 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
430 double ProjectedDCFCLKDeepSleep[][2],
431 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
432 double TotalVActivePixelBandwidth[][2],
433 double TotalVActiveCursorBandwidth[][2],
434 double TotalMetaRowBandwidth[][2],
435 double TotalDPTERowBandwidth[][2],
436 unsigned int TotalNumberOfActiveDPP[][2],
437 unsigned int TotalNumberOfDCCActiveDPP[][2],
438 int dpte_group_bytes[],
439 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
440 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
441 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
442 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
447 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
448 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
449 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
450 bool DynamicMetadataEnable[],
451 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
452 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
453 double ReadBandwidthLuma[],
454 double ReadBandwidthChroma[],
455 double DCFCLKPerState[],
456 double DCFCLKState[][2]);
457 static void CalculatePixelDeliveryTimes(
458 unsigned int NumberOfActivePlanes,
460 double VRatioChroma[],
461 double VRatioPrefetchY[],
462 double VRatioPrefetchC[],
463 unsigned int swath_width_luma_ub[],
464 unsigned int swath_width_chroma_ub[],
465 unsigned int DPPPerPlane[],
467 double HRatioChroma[],
469 double PSCL_THROUGHPUT[],
470 double PSCL_THROUGHPUT_CHROMA[],
473 enum scan_direction_class SourceScan[],
474 unsigned int NumberOfCursors[],
475 unsigned int CursorWidth[][2],
476 unsigned int CursorBPP[][2],
477 unsigned int BlockWidth256BytesY[],
478 unsigned int BlockHeight256BytesY[],
479 unsigned int BlockWidth256BytesC[],
480 unsigned int BlockHeight256BytesC[],
481 double DisplayPipeLineDeliveryTimeLuma[],
482 double DisplayPipeLineDeliveryTimeChroma[],
483 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
484 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
485 double DisplayPipeRequestDeliveryTimeLuma[],
486 double DisplayPipeRequestDeliveryTimeChroma[],
487 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
488 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
489 double CursorRequestDeliveryTime[],
490 double CursorRequestDeliveryTimePrefetch[]);
492 static void CalculateMetaAndPTETimes(
493 int NumberOfActivePlanes,
496 int MinMetaChunkSizeBytes,
499 double VRatioChroma[],
500 double DestinationLinesToRequestRowInVBlank[],
501 double DestinationLinesToRequestRowInImmediateFlip[],
506 enum scan_direction_class SourceScan[],
507 int dpte_row_height[],
508 int dpte_row_height_chroma[],
509 int meta_row_width[],
510 int meta_row_width_chroma[],
511 int meta_row_height[],
512 int meta_row_height_chroma[],
513 int meta_req_width[],
514 int meta_req_width_chroma[],
515 int meta_req_height[],
516 int meta_req_height_chroma[],
517 int dpte_group_bytes[],
518 int PTERequestSizeY[],
519 int PTERequestSizeC[],
520 int PixelPTEReqWidthY[],
521 int PixelPTEReqHeightY[],
522 int PixelPTEReqWidthC[],
523 int PixelPTEReqHeightC[],
524 int dpte_row_width_luma_ub[],
525 int dpte_row_width_chroma_ub[],
526 double DST_Y_PER_PTE_ROW_NOM_L[],
527 double DST_Y_PER_PTE_ROW_NOM_C[],
528 double DST_Y_PER_META_ROW_NOM_L[],
529 double DST_Y_PER_META_ROW_NOM_C[],
530 double TimePerMetaChunkNominal[],
531 double TimePerChromaMetaChunkNominal[],
532 double TimePerMetaChunkVBlank[],
533 double TimePerChromaMetaChunkVBlank[],
534 double TimePerMetaChunkFlip[],
535 double TimePerChromaMetaChunkFlip[],
536 double time_per_pte_group_nom_luma[],
537 double time_per_pte_group_vblank_luma[],
538 double time_per_pte_group_flip_luma[],
539 double time_per_pte_group_nom_chroma[],
540 double time_per_pte_group_vblank_chroma[],
541 double time_per_pte_group_flip_chroma[]);
543 static void CalculateVMGroupAndRequestTimes(
544 unsigned int NumberOfActivePlanes,
546 unsigned int GPUVMMaxPageTableLevels,
547 unsigned int HTotal[],
549 double DestinationLinesToRequestVMInVBlank[],
550 double DestinationLinesToRequestVMInImmediateFlip[],
553 int dpte_row_width_luma_ub[],
554 int dpte_row_width_chroma_ub[],
555 int vm_group_bytes[],
556 unsigned int dpde0_bytes_per_frame_ub_l[],
557 unsigned int dpde0_bytes_per_frame_ub_c[],
558 int meta_pte_bytes_per_frame_ub_l[],
559 int meta_pte_bytes_per_frame_ub_c[],
560 double TimePerVMGroupVBlank[],
561 double TimePerVMGroupFlip[],
562 double TimePerVMRequestVBlank[],
563 double TimePerVMRequestFlip[]);
565 static void CalculateStutterEfficiency(
566 int NumberOfActivePlanes,
567 long ROBBufferSizeInKByte,
568 double TotalDataReadBandwidth,
572 bool SynchronizedVBlank,
574 unsigned int DETBufferSizeY[],
576 double BytePerPixelDETY[],
577 double SwathWidthY[],
580 double DCCRateLuma[],
581 double DCCRateChroma[],
586 enum scan_direction_class SourceScan[],
587 int BlockHeight256BytesY[],
588 int BlockWidth256BytesY[],
589 int BlockHeight256BytesC[],
590 int BlockWidth256BytesC[],
591 int DCCYMaxUncompressedBlock[],
592 int DCCCMaxUncompressedBlock[],
595 bool WritebackEnable[],
596 double ReadBandwidthPlaneLuma[],
597 double ReadBandwidthPlaneChroma[],
598 double meta_row_bw[],
599 double dpte_row_bw[],
600 double *StutterEfficiencyNotIncludingVBlank,
601 double *StutterEfficiency,
602 double *StutterPeriodOut);
604 static void CalculateSwathAndDETConfiguration(
606 int NumberOfActivePlanes,
607 unsigned int DETBufferSizeInKByte,
608 double MaximumSwathWidthLuma[],
609 double MaximumSwathWidthChroma[],
610 enum scan_direction_class SourceScan[],
611 enum source_format_class SourcePixelFormat[],
612 enum dm_swizzle_mode SurfaceTiling[],
614 int ViewportHeight[],
617 int SurfaceHeightY[],
618 int SurfaceHeightC[],
619 int Read256BytesBlockHeightY[],
620 int Read256BytesBlockHeightC[],
621 int Read256BytesBlockWidthY[],
622 int Read256BytesBlockWidthC[],
623 enum odm_combine_mode ODMCombineEnabled[],
624 int BlendingAndTiming[],
627 double BytePerPixDETY[],
628 double BytePerPixDETC[],
631 double HRatioChroma[],
633 int swath_width_luma_ub[],
634 int swath_width_chroma_ub[],
636 double SwathWidthChroma[],
639 unsigned int DETBufferSizeY[],
640 unsigned int DETBufferSizeC[],
641 bool ViewportSizeSupportPerPlane[],
642 bool *ViewportSizeSupport);
643 static void CalculateSwathWidth(
645 int NumberOfActivePlanes,
646 enum source_format_class SourcePixelFormat[],
647 enum scan_direction_class SourceScan[],
648 unsigned int ViewportWidth[],
649 unsigned int ViewportHeight[],
650 unsigned int SurfaceWidthY[],
651 unsigned int SurfaceWidthC[],
652 unsigned int SurfaceHeightY[],
653 unsigned int SurfaceHeightC[],
654 enum odm_combine_mode ODMCombineEnabled[],
657 int Read256BytesBlockHeightY[],
658 int Read256BytesBlockHeightC[],
659 int Read256BytesBlockWidthY[],
660 int Read256BytesBlockWidthC[],
661 int BlendingAndTiming[],
662 unsigned int HActive[],
665 double SwathWidthSingleDPPY[],
666 double SwathWidthSingleDPPC[],
667 double SwathWidthY[],
668 double SwathWidthC[],
669 int MaximumSwathHeightY[],
670 int MaximumSwathHeightC[],
671 unsigned int swath_width_luma_ub[],
672 unsigned int swath_width_chroma_ub[]);
673 static double CalculateExtraLatency(
674 long RoundTripPingLatencyCycles,
675 long ReorderingBytes,
677 int TotalNumberOfActiveDPP,
678 int PixelChunkSizeInKByte,
679 int TotalNumberOfDCCActiveDPP,
684 int NumberOfActivePlanes,
686 int dpte_group_bytes[],
687 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
688 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
689 double HostVMMinPageSize,
690 int HostVMMaxNonCachedPageTableLevels);
691 static double CalculateExtraLatencyBytes(
692 long ReorderingBytes,
693 int TotalNumberOfActiveDPP,
694 int PixelChunkSizeInKByte,
695 int TotalNumberOfDCCActiveDPP,
699 int NumberOfActivePlanes,
701 int dpte_group_bytes[],
702 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
703 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
704 double HostVMMinPageSize,
705 int HostVMMaxNonCachedPageTableLevels);
706 static double CalculateUrgentLatency(
707 double UrgentLatencyPixelDataOnly,
708 double UrgentLatencyPixelMixedWithVMData,
709 double UrgentLatencyVMDataOnly,
710 bool DoUrgentLatencyAdjustment,
711 double UrgentLatencyAdjustmentFabricClockComponent,
712 double UrgentLatencyAdjustmentFabricClockReference,
713 double FabricClockSingle);
715 static bool CalculateBytePerPixelAnd256BBlockSizes(
716 enum source_format_class SourcePixelFormat,
717 enum dm_swizzle_mode SurfaceTiling,
718 unsigned int *BytePerPixelY,
719 unsigned int *BytePerPixelC,
720 double *BytePerPixelDETY,
721 double *BytePerPixelDETC,
722 unsigned int *BlockHeight256BytesY,
723 unsigned int *BlockHeight256BytesC,
724 unsigned int *BlockWidth256BytesY,
725 unsigned int *BlockWidth256BytesC);
727 void dml30_recalculate(struct display_mode_lib *mode_lib)
729 ModeSupportAndSystemConfiguration(mode_lib);
730 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
731 DisplayPipeConfiguration(mode_lib);
732 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
735 static unsigned int dscceComputeDelay(
738 unsigned int sliceWidth,
739 unsigned int numSlices,
740 enum output_format_class pixelFormat,
741 enum output_encoder_class Output)
743 // valid bpc = source bits per component in the set of {8, 10, 12}
744 // valid bpp = increments of 1/16 of a bit
745 // min = 6/7/8 in N420/N422/444, respectively
746 // max = such that compression is 1:1
747 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
748 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
749 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
752 unsigned int rcModelSize = 8192;
754 // N422/N420 operate at 2 pixels per clock
755 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L,
758 if (pixelFormat == dm_420)
760 // #all other modes operate at 1 pixel per clock
761 else if (pixelFormat == dm_444)
763 else if (pixelFormat == dm_n422)
768 //initial transmit delay as per PPS
769 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
779 //divide by pixel per cycle to compute slice width as seen by DSC
780 w = sliceWidth / pixelsPerClock;
782 //422 mode has an additional cycle of delay
783 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
788 //main calculation for the dscce
789 ix = initalXmitDelay + 45;
794 ax = (a + 2) / 3 + D + 6 + 1;
795 L = (ax + wx - 1) / wx;
796 if ((ix % w) == 0 && P != 0)
800 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
802 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
803 pixels = Delay * 3 * pixelsPerClock;
807 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
809 unsigned int Delay = 0;
811 if (pixelFormat == dm_420) {
816 // dscc - input deserializer
818 // dscc gets pixels every other cycle
820 // dscc - input cdc fifo
822 // dscc gets pixels every other cycle
824 // dscc - cdc uncertainty
826 // dscc - output cdc fifo
828 // dscc gets pixels every other cycle
830 // dscc - cdc uncertainty
832 // dscc - output serializer
836 } else if (pixelFormat == dm_n422) {
841 // dscc - input deserializer
843 // dscc - input cdc fifo
845 // dscc - cdc uncertainty
847 // dscc - output cdc fifo
849 // dscc - cdc uncertainty
851 // dscc - output serializer
861 // dscc - input deserializer
863 // dscc - input cdc fifo
865 // dscc - cdc uncertainty
867 // dscc - output cdc fifo
869 // dscc - output serializer
871 // dscc - cdc uncertainty
880 static bool CalculatePrefetchSchedule(
881 struct display_mode_lib *mode_lib,
882 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
883 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
885 unsigned int DSCDelay,
886 double DPPCLKDelaySubtotalPlusCNVCFormater,
887 double DPPCLKDelaySCL,
888 double DPPCLKDelaySCLLBOnly,
889 double DPPCLKDelayCNVCCursor,
890 double DISPCLKDelaySubtotal,
891 unsigned int DPP_RECOUT_WIDTH,
892 enum output_format_class OutputFormat,
893 unsigned int MaxInterDCNTileRepeaters,
894 unsigned int VStartup,
895 unsigned int MaxVStartup,
896 unsigned int GPUVMPageTableLevels,
899 unsigned int HostVMMaxNonCachedPageTableLevels,
900 double HostVMMinPageSize,
901 bool DynamicMetadataEnable,
902 bool DynamicMetadataVMEnabled,
903 int DynamicMetadataLinesBeforeActiveRequired,
904 unsigned int DynamicMetadataTransmittedBytes,
905 double UrgentLatency,
906 double UrgentExtraLatency,
908 unsigned int PDEAndMetaPTEBytesFrame,
909 unsigned int MetaRowByte,
910 unsigned int PixelPTEBytesPerRow,
911 double PrefetchSourceLinesY,
912 unsigned int SwathWidthY,
914 double VInitPreFillY,
915 unsigned int MaxNumSwathY,
916 double PrefetchSourceLinesC,
917 unsigned int SwathWidthC,
919 double VInitPreFillC,
920 unsigned int MaxNumSwathC,
921 long swath_width_luma_ub,
922 long swath_width_chroma_ub,
923 unsigned int SwathHeightY,
924 unsigned int SwathHeightC,
926 bool ProgressiveToInterlaceUnitInOPP,
927 double *DSTXAfterScaler,
928 double *DSTYAfterScaler,
929 double *DestinationLinesForPrefetch,
930 double *PrefetchBandwidth,
931 double *DestinationLinesToRequestVMInVBlank,
932 double *DestinationLinesToRequestRowInVBlank,
933 double *VRatioPrefetchY,
934 double *VRatioPrefetchC,
935 double *RequiredPrefetchPixDataBWLuma,
936 double *RequiredPrefetchPixDataBWChroma,
937 bool *NotEnoughTimeForDynamicMetadata,
939 double *prefetch_vmrow_bw,
942 unsigned int *VUpdateOffsetPix,
943 double *VUpdateWidthPix,
944 double *VReadyOffsetPix)
946 bool MyError = false;
947 unsigned int DPPCycles = 0, DISPCLKCycles = 0;
948 double DSTTotalPixelsAfterScaler = 0;
949 double LineTime = 0, Tsetup = 0;
950 double dst_y_prefetch_equ = 0;
952 double prefetch_bw_oto = 0;
955 double Tvm_oto_lines = 0;
956 double Tr0_oto_lines = 0;
957 double dst_y_prefetch_oto = 0;
958 double TimeForFetchingMetaPTE = 0;
959 double TimeForFetchingRowInVBlank = 0;
960 double LinesToRequestPrefetchPixelData = 0;
961 double HostVMInefficiencyFactor = 0;
962 unsigned int HostVMDynamicLevelsTrips = 0;
963 double trip_to_mem = 0;
964 double Tvm_trips = 0;
965 double Tr0_trips = 0;
966 double Tvm_trips_rounded = 0;
967 double Tr0_trips_rounded = 0;
969 double Tpre_rounded = 0;
970 double prefetch_bw_equ = 0;
977 if (GPUVMEnable == true && HostVMEnable == true) {
978 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
979 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
981 HostVMInefficiencyFactor = 1;
982 HostVMDynamicLevelsTrips = 0;
985 CalculateDynamicMetadataParameters(
986 MaxInterDCNTileRepeaters,
989 myPipe->DCFCLKDeepSleep,
993 DynamicMetadataTransmittedBytes,
994 DynamicMetadataLinesBeforeActiveRequired,
995 myPipe->InterlaceEnable,
996 ProgressiveToInterlaceUnitInOPP,
1002 LineTime = myPipe->HTotal / myPipe->PixelClock;
1003 trip_to_mem = UrgentLatency;
1004 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
1006 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
1007 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
1009 *Tdmdl = TWait + UrgentExtraLatency;
1012 if (DynamicMetadataEnable == true) {
1013 if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1014 *NotEnoughTimeForDynamicMetadata = true;
1016 *NotEnoughTimeForDynamicMetadata = false;
1017 dml_print("DML: Not Enough Time for Dynamic Meta!\n");
1018 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1019 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1020 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1021 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1024 *NotEnoughTimeForDynamicMetadata = false;
1027 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1029 if (myPipe->ScalerEnabled)
1030 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1032 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1034 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1036 DISPCLKCycles = DISPCLKDelaySubtotal;
1038 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1041 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK
1044 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1046 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
1047 *DSTYAfterScaler = 1;
1049 *DSTYAfterScaler = 0;
1051 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1052 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1053 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1058 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1059 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1060 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1063 if (GPUVMPageTableLevels >= 3) {
1064 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1067 } else if (!myPipe->DCCEnable)
1070 *Tno_bw = LineTime / 4;
1072 dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
1073 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1075 Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
1076 Tsw_oto = Lsw_oto * LineTime;
1078 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto;
1080 if (GPUVMEnable == true) {
1081 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
1085 Tvm_oto = LineTime / 4.0;
1087 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1089 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
1090 LineTime - Tvm_oto, LineTime / 4);
1092 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1094 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1095 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1096 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1098 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1099 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1101 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1102 dml_print("DML: dst_y_prefetch_equ: %f\n", dst_y_prefetch_equ);
1104 dml_print("DML: LineTime: %f\n", LineTime);
1105 dml_print("DML: VStartup: %d\n", VStartup);
1106 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1107 dml_print("DML: Tsetup: %fus - time from vstartup to vready\n", Tsetup);
1108 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1109 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1110 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1111 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1112 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1113 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1114 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1115 dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1116 dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler);
1118 *PrefetchBandwidth = 0;
1119 *DestinationLinesToRequestVMInVBlank = 0;
1120 *DestinationLinesToRequestRowInVBlank = 0;
1121 *VRatioPrefetchY = 0;
1122 *VRatioPrefetchC = 0;
1123 *RequiredPrefetchPixDataBWLuma = 0;
1124 if (dst_y_prefetch_equ > 1) {
1125 double PrefetchBandwidth1 = 0;
1126 double PrefetchBandwidth2 = 0;
1127 double PrefetchBandwidth3 = 0;
1128 double PrefetchBandwidth4 = 0;
1130 if (Tpre_rounded - *Tno_bw > 0)
1131 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
1132 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1133 + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY
1134 + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1135 / (Tpre_rounded - *Tno_bw);
1137 PrefetchBandwidth1 = 0;
1139 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) {
1140 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw);
1143 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1144 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
1145 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1146 swath_width_luma_ub * BytePerPixelY +
1147 PrefetchSourceLinesC * swath_width_chroma_ub *
1149 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1151 PrefetchBandwidth2 = 0;
1153 if (Tpre_rounded - Tvm_trips_rounded > 0)
1154 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow *
1155 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1156 swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC *
1157 swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded -
1160 PrefetchBandwidth3 = 0;
1162 if (VStartup == MaxVStartup && (PrefetchBandwidth3 > 4 * prefetch_bw_oto) && Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1163 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded);
1166 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1167 PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1168 / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1170 PrefetchBandwidth4 = 0;
1177 if (PrefetchBandwidth1 > 0) {
1178 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
1179 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1188 if (PrefetchBandwidth2 > 0) {
1189 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
1190 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1199 if (PrefetchBandwidth3 > 0) {
1200 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3
1201 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1211 prefetch_bw_equ = PrefetchBandwidth1;
1212 } else if (Case2OK) {
1213 prefetch_bw_equ = PrefetchBandwidth2;
1214 } else if (Case3OK) {
1215 prefetch_bw_equ = PrefetchBandwidth3;
1217 prefetch_bw_equ = PrefetchBandwidth4;
1220 dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ);
1222 if (prefetch_bw_equ > 0) {
1224 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1226 Tvm_equ = LineTime / 4;
1229 if ((GPUVMEnable || myPipe->DCCEnable)) {
1231 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1233 (LineTime - Tvm_equ) / 2,
1236 Tr0_equ = (LineTime - Tvm_equ) / 2;
1241 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1245 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1246 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1247 TimeForFetchingMetaPTE = Tvm_oto;
1248 TimeForFetchingRowInVBlank = Tr0_oto;
1249 *PrefetchBandwidth = prefetch_bw_oto;
1251 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1252 TimeForFetchingMetaPTE = Tvm_equ;
1253 TimeForFetchingRowInVBlank = Tr0_equ;
1254 *PrefetchBandwidth = prefetch_bw_equ;
1257 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1259 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1262 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank
1263 - 2 * *DestinationLinesToRequestRowInVBlank;
1265 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1267 *VRatioPrefetchY = (double) PrefetchSourceLinesY
1268 / LinesToRequestPrefetchPixelData;
1269 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1270 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1271 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1272 *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1273 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1274 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1277 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1278 *VRatioPrefetchY = 0;
1282 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1283 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1285 if ((SwathHeightC > 4)) {
1286 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1287 *VRatioPrefetchC = dml_max(*VRatioPrefetchC,
1288 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1289 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1292 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1293 *VRatioPrefetchC = 0;
1297 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime;
1298 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime;
1301 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1302 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1303 *VRatioPrefetchY = 0;
1304 *VRatioPrefetchC = 0;
1305 *RequiredPrefetchPixDataBWLuma = 0;
1306 *RequiredPrefetchPixDataBWChroma = 0;
1309 dml_print("DML: Tpre: %fus - sum of tim to request meta pte, 2 x data pte + meta data, swaths\n", (double)LinesToRequestPrefetchPixelData * LineTime + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1310 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1311 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1312 dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1313 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime);
1314 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1315 dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n");
1316 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup);
1317 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1321 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1325 double prefetch_vm_bw = 0;
1326 double prefetch_row_bw = 0;
1328 if (PDEAndMetaPTEBytesFrame == 0) {
1330 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1331 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1335 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1337 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1338 prefetch_row_bw = 0;
1339 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1340 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1342 prefetch_row_bw = 0;
1344 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1347 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1351 *PrefetchBandwidth = 0;
1352 TimeForFetchingMetaPTE = 0;
1353 TimeForFetchingRowInVBlank = 0;
1354 *DestinationLinesToRequestVMInVBlank = 0;
1355 *DestinationLinesToRequestRowInVBlank = 0;
1356 *DestinationLinesForPrefetch = 0;
1357 LinesToRequestPrefetchPixelData = 0;
1358 *VRatioPrefetchY = 0;
1359 *VRatioPrefetchC = 0;
1360 *RequiredPrefetchPixDataBWLuma = 0;
1361 *RequiredPrefetchPixDataBWChroma = 0;
1367 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1369 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1372 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1374 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1377 static void CalculateDCCConfiguration(
1379 bool DCCProgrammingAssumesScanDirectionUnknown,
1380 enum source_format_class SourcePixelFormat,
1381 unsigned int SurfaceWidthLuma,
1382 unsigned int SurfaceWidthChroma,
1383 unsigned int SurfaceHeightLuma,
1384 unsigned int SurfaceHeightChroma,
1385 double DETBufferSize,
1386 unsigned int RequestHeight256ByteLuma,
1387 unsigned int RequestHeight256ByteChroma,
1388 enum dm_swizzle_mode TilingFormat,
1389 unsigned int BytePerPixelY,
1390 unsigned int BytePerPixelC,
1391 double BytePerPixelDETY,
1392 double BytePerPixelDETC,
1393 enum scan_direction_class ScanOrientation,
1394 unsigned int *MaxUncompressedBlockLuma,
1395 unsigned int *MaxUncompressedBlockChroma,
1396 unsigned int *MaxCompressedBlockLuma,
1397 unsigned int *MaxCompressedBlockChroma,
1398 unsigned int *IndependentBlockLuma,
1399 unsigned int *IndependentBlockChroma)
1407 int req128_horz_wc_l = 0;
1408 int req128_horz_wc_c = 0;
1409 int req128_vert_wc_l = 0;
1410 int req128_vert_wc_c = 0;
1411 int segment_order_horz_contiguous_luma = 0;
1412 int segment_order_horz_contiguous_chroma = 0;
1413 int segment_order_vert_contiguous_luma = 0;
1414 int segment_order_vert_contiguous_chroma = 0;
1416 long full_swath_bytes_horz_wc_l = 0;
1417 long full_swath_bytes_horz_wc_c = 0;
1418 long full_swath_bytes_vert_wc_l = 0;
1419 long full_swath_bytes_vert_wc_c = 0;
1421 long swath_buf_size = 0;
1422 double detile_buf_vp_horz_limit = 0;
1423 double detile_buf_vp_vert_limit = 0;
1425 long MAS_vp_horz_limit = 0;
1426 long MAS_vp_vert_limit = 0;
1427 long max_vp_horz_width = 0;
1428 long max_vp_vert_height = 0;
1429 long eff_surf_width_l = 0;
1430 long eff_surf_width_c = 0;
1431 long eff_surf_height_l = 0;
1432 long eff_surf_height_c = 0;
1436 REQ_128BytesNonContiguous,
1437 REQ_128BytesContiguous,
1441 RequestType RequestLuma;
1442 RequestType RequestChroma;
1444 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1450 if (BytePerPixelY == 1)
1452 if (BytePerPixelC == 1)
1454 if (BytePerPixelY == 8
1455 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1456 || TilingFormat == dm_sw_64kb_s_x))
1458 if (BytePerPixelC == 8
1459 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1460 || TilingFormat == dm_sw_64kb_s_x))
1463 if (BytePerPixelC == 0) {
1464 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1465 detile_buf_vp_horz_limit = (double) swath_buf_size
1466 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1467 / (1 + horz_div_l));
1468 detile_buf_vp_vert_limit = (double) swath_buf_size
1469 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1471 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1472 detile_buf_vp_horz_limit = (double) swath_buf_size
1473 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1475 + (double) RequestHeight256ByteChroma
1476 * BytePerPixelC / (1 + horz_div_c)
1478 detile_buf_vp_vert_limit = (double) swath_buf_size
1479 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)
1480 + 256.0 / RequestHeight256ByteChroma
1481 / (1 + vert_div_c) / (1 + yuv420));
1484 if (SourcePixelFormat == dm_420_10) {
1485 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1486 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1489 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1490 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1492 MAS_vp_horz_limit = 5760;
1493 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1494 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1495 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1497 (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1498 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1499 eff_surf_height_l = (
1500 SurfaceHeightLuma > max_vp_vert_height ?
1501 max_vp_vert_height : SurfaceHeightLuma);
1502 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1504 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1505 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1506 if (BytePerPixelC > 0) {
1507 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma
1509 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1511 full_swath_bytes_horz_wc_c = 0;
1512 full_swath_bytes_vert_wc_c = 0;
1515 if (SourcePixelFormat == dm_420_10) {
1516 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1517 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1518 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1519 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1522 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1523 req128_horz_wc_l = 0;
1524 req128_horz_wc_c = 0;
1525 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c
1526 && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c
1528 req128_horz_wc_l = 0;
1529 req128_horz_wc_c = 1;
1530 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c
1531 && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c
1533 req128_horz_wc_l = 1;
1534 req128_horz_wc_c = 0;
1536 req128_horz_wc_l = 1;
1537 req128_horz_wc_c = 1;
1540 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1541 req128_vert_wc_l = 0;
1542 req128_vert_wc_c = 0;
1543 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c
1544 && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c
1546 req128_vert_wc_l = 0;
1547 req128_vert_wc_c = 1;
1548 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c
1549 && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c
1551 req128_vert_wc_l = 1;
1552 req128_vert_wc_c = 0;
1554 req128_vert_wc_l = 1;
1555 req128_vert_wc_c = 1;
1558 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1559 segment_order_horz_contiguous_luma = 0;
1561 segment_order_horz_contiguous_luma = 1;
1563 if ((BytePerPixelY == 8
1564 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1565 || TilingFormat == dm_sw_64kb_d_t
1566 || TilingFormat == dm_sw_64kb_r_x))
1567 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1568 segment_order_vert_contiguous_luma = 0;
1570 segment_order_vert_contiguous_luma = 1;
1572 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1573 segment_order_horz_contiguous_chroma = 0;
1575 segment_order_horz_contiguous_chroma = 1;
1577 if ((BytePerPixelC == 8
1578 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1579 || TilingFormat == dm_sw_64kb_d_t
1580 || TilingFormat == dm_sw_64kb_r_x))
1581 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1582 segment_order_vert_contiguous_chroma = 0;
1584 segment_order_vert_contiguous_chroma = 1;
1587 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1588 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1589 RequestLuma = REQ_256Bytes;
1590 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0)
1591 || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1592 RequestLuma = REQ_128BytesNonContiguous;
1594 RequestLuma = REQ_128BytesContiguous;
1596 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1597 RequestChroma = REQ_256Bytes;
1598 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0)
1599 || (req128_vert_wc_c == 1
1600 && segment_order_vert_contiguous_chroma == 0)) {
1601 RequestChroma = REQ_128BytesNonContiguous;
1603 RequestChroma = REQ_128BytesContiguous;
1605 } else if (ScanOrientation != dm_vert) {
1606 if (req128_horz_wc_l == 0) {
1607 RequestLuma = REQ_256Bytes;
1608 } else if (segment_order_horz_contiguous_luma == 0) {
1609 RequestLuma = REQ_128BytesNonContiguous;
1611 RequestLuma = REQ_128BytesContiguous;
1613 if (req128_horz_wc_c == 0) {
1614 RequestChroma = REQ_256Bytes;
1615 } else if (segment_order_horz_contiguous_chroma == 0) {
1616 RequestChroma = REQ_128BytesNonContiguous;
1618 RequestChroma = REQ_128BytesContiguous;
1621 if (req128_vert_wc_l == 0) {
1622 RequestLuma = REQ_256Bytes;
1623 } else if (segment_order_vert_contiguous_luma == 0) {
1624 RequestLuma = REQ_128BytesNonContiguous;
1626 RequestLuma = REQ_128BytesContiguous;
1628 if (req128_vert_wc_c == 0) {
1629 RequestChroma = REQ_256Bytes;
1630 } else if (segment_order_vert_contiguous_chroma == 0) {
1631 RequestChroma = REQ_128BytesNonContiguous;
1633 RequestChroma = REQ_128BytesContiguous;
1637 if (RequestLuma == REQ_256Bytes) {
1638 *MaxUncompressedBlockLuma = 256;
1639 *MaxCompressedBlockLuma = 256;
1640 *IndependentBlockLuma = 0;
1641 } else if (RequestLuma == REQ_128BytesContiguous) {
1642 *MaxUncompressedBlockLuma = 256;
1643 *MaxCompressedBlockLuma = 128;
1644 *IndependentBlockLuma = 128;
1646 *MaxUncompressedBlockLuma = 256;
1647 *MaxCompressedBlockLuma = 64;
1648 *IndependentBlockLuma = 64;
1651 if (RequestChroma == REQ_256Bytes) {
1652 *MaxUncompressedBlockChroma = 256;
1653 *MaxCompressedBlockChroma = 256;
1654 *IndependentBlockChroma = 0;
1655 } else if (RequestChroma == REQ_128BytesContiguous) {
1656 *MaxUncompressedBlockChroma = 256;
1657 *MaxCompressedBlockChroma = 128;
1658 *IndependentBlockChroma = 128;
1660 *MaxUncompressedBlockChroma = 256;
1661 *MaxCompressedBlockChroma = 64;
1662 *IndependentBlockChroma = 64;
1665 if (DCCEnabled != true || BytePerPixelC == 0) {
1666 *MaxUncompressedBlockChroma = 0;
1667 *MaxCompressedBlockChroma = 0;
1668 *IndependentBlockChroma = 0;
1671 if (DCCEnabled != true) {
1672 *MaxUncompressedBlockLuma = 0;
1673 *MaxCompressedBlockLuma = 0;
1674 *IndependentBlockLuma = 0;
1679 static double CalculatePrefetchSourceLines(
1680 struct display_mode_lib *mode_lib,
1684 bool ProgressiveToInterlaceUnitInOPP,
1685 unsigned int SwathHeight,
1686 unsigned int ViewportYStart,
1687 double *VInitPreFill,
1688 unsigned int *MaxNumSwath)
1690 unsigned int MaxPartialSwath = 0;
1692 if (ProgressiveToInterlaceUnitInOPP)
1693 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1695 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1697 if (!mode_lib->vba.IgnoreViewportPositioning) {
1699 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1701 if (*VInitPreFill > 1.0)
1702 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1704 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1706 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1710 if (ViewportYStart != 0)
1712 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1714 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1716 if (*VInitPreFill > 1.0)
1717 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1719 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1723 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1726 static unsigned int CalculateVMAndRowBytes(
1727 struct display_mode_lib *mode_lib,
1729 unsigned int BlockHeight256Bytes,
1730 unsigned int BlockWidth256Bytes,
1731 enum source_format_class SourcePixelFormat,
1732 unsigned int SurfaceTiling,
1733 unsigned int BytePerPixel,
1734 enum scan_direction_class ScanDirection,
1735 unsigned int SwathWidth,
1736 unsigned int ViewportHeight,
1739 unsigned int HostVMMaxNonCachedPageTableLevels,
1740 unsigned int GPUVMMinPageSize,
1741 unsigned int HostVMMinPageSize,
1742 unsigned int PTEBufferSizeInRequests,
1744 unsigned int DCCMetaPitch,
1745 unsigned int *MacroTileWidth,
1746 unsigned int *MetaRowByte,
1747 unsigned int *PixelPTEBytesPerRow,
1748 bool *PTEBufferSizeNotExceeded,
1749 unsigned int *dpte_row_width_ub,
1750 unsigned int *dpte_row_height,
1751 unsigned int *MetaRequestWidth,
1752 unsigned int *MetaRequestHeight,
1753 unsigned int *meta_row_width,
1754 unsigned int *meta_row_height,
1755 unsigned int *vm_group_bytes,
1756 unsigned int *dpte_group_bytes,
1757 unsigned int *PixelPTEReqWidth,
1758 unsigned int *PixelPTEReqHeight,
1759 unsigned int *PTERequestSize,
1760 unsigned int *DPDE0BytesFrame,
1761 unsigned int *MetaPTEBytesFrame)
1763 unsigned int MPDEBytesFrame = 0;
1764 unsigned int DCCMetaSurfaceBytes = 0;
1765 unsigned int MacroTileSizeBytes = 0;
1766 unsigned int MacroTileHeight = 0;
1767 unsigned int ExtraDPDEBytesFrame = 0;
1768 unsigned int PDEAndMetaPTEBytesFrame = 0;
1769 unsigned int PixelPTEReqHeightPTEs = 0;
1770 unsigned int HostVMDynamicLevels = 0;
1772 double FractionOfPTEReturnDrop;
1774 if (GPUVMEnable == true && HostVMEnable == true) {
1775 if (HostVMMinPageSize < 2048) {
1776 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1777 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1778 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1780 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1784 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1785 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1786 if (ScanDirection != dm_vert) {
1787 *meta_row_height = *MetaRequestHeight;
1788 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1789 + *MetaRequestWidth;
1790 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1792 *meta_row_height = *MetaRequestWidth;
1793 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1794 + *MetaRequestHeight;
1795 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1797 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1798 + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1799 if (GPUVMEnable == true) {
1800 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1801 MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
1803 *MetaPTEBytesFrame = 0;
1807 if (DCCEnable != true) {
1808 *MetaPTEBytesFrame = 0;
1813 if (SurfaceTiling == dm_sw_linear) {
1814 MacroTileSizeBytes = 256;
1815 MacroTileHeight = BlockHeight256Bytes;
1817 MacroTileSizeBytes = 65536;
1818 MacroTileHeight = 16 * BlockHeight256Bytes;
1820 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1822 if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
1823 if (ScanDirection != dm_vert) {
1824 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1826 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1828 ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
1830 *DPDE0BytesFrame = 0;
1831 ExtraDPDEBytesFrame = 0;
1834 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1835 + ExtraDPDEBytesFrame;
1837 if (HostVMEnable == true) {
1838 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1841 if (SurfaceTiling == dm_sw_linear) {
1842 PixelPTEReqHeightPTEs = 1;
1843 *PixelPTEReqHeight = 1;
1844 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1845 *PTERequestSize = 64;
1846 FractionOfPTEReturnDrop = 0;
1847 } else if (MacroTileSizeBytes == 4096) {
1848 PixelPTEReqHeightPTEs = 1;
1849 *PixelPTEReqHeight = MacroTileHeight;
1850 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1851 *PTERequestSize = 64;
1852 if (ScanDirection != dm_vert)
1853 FractionOfPTEReturnDrop = 0;
1855 FractionOfPTEReturnDrop = 7 / 8;
1856 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1857 PixelPTEReqHeightPTEs = 16;
1858 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1859 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1860 *PTERequestSize = 128;
1861 FractionOfPTEReturnDrop = 0;
1863 PixelPTEReqHeightPTEs = 1;
1864 *PixelPTEReqHeight = MacroTileHeight;
1865 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1866 *PTERequestSize = 64;
1867 FractionOfPTEReturnDrop = 0;
1870 if (SurfaceTiling == dm_sw_linear) {
1871 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1872 *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1873 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1874 } else if (ScanDirection != dm_vert) {
1875 *dpte_row_height = *PixelPTEReqHeight;
1876 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1877 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1879 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1880 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1881 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1883 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1884 <= 64 * PTEBufferSizeInRequests) {
1885 *PTEBufferSizeNotExceeded = true;
1887 *PTEBufferSizeNotExceeded = false;
1890 if (GPUVMEnable != true) {
1891 *PixelPTEBytesPerRow = 0;
1892 *PTEBufferSizeNotExceeded = true;
1894 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1896 if (HostVMEnable == true) {
1897 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1900 if (HostVMEnable == true) {
1901 *vm_group_bytes = 512;
1902 *dpte_group_bytes = 512;
1903 } else if (GPUVMEnable == true) {
1904 *vm_group_bytes = 2048;
1905 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1906 *dpte_group_bytes = 512;
1908 *dpte_group_bytes = 2048;
1911 *vm_group_bytes = 0;
1912 *dpte_group_bytes = 0;
1915 return PDEAndMetaPTEBytesFrame;
1918 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1919 struct display_mode_lib *mode_lib)
1921 struct vba_vars_st *v = &mode_lib->vba;
1923 long ReorderBytes = 0;
1924 unsigned int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1925 double MaxTotalRDBandwidth = 0;
1926 double MaxTotalRDBandwidthNoUrgentBurst = 0;
1927 bool DestinationLineTimesForPrefetchLessThan2 = false;
1928 bool VRatioPrefetchMoreThan4 = false;
1931 v->WritebackDISPCLK = 0.0;
1932 v->DISPCLKWithRamping = 0;
1933 v->DISPCLKWithoutRamping = 0;
1934 v->GlobalDPPCLK = 0.0;
1935 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
1936 v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] = dml_min3(
1937 v->ReturnBusWidth * v->DCFCLK,
1938 v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth,
1939 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
1940 if (v->HostVMEnable != true) {
1941 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
1943 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
1945 /* End DAL custom code */
1947 // DISPCLK and DPPCLK Calculation
1949 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1950 if (v->WritebackEnable[k]) {
1951 v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
1952 dml30_CalculateWriteBackDISPCLK(
1953 v->WritebackPixelFormat[k],
1955 v->WritebackHRatio[k],
1956 v->WritebackVRatio[k],
1957 v->WritebackHTaps[k],
1958 v->WritebackVTaps[k],
1959 v->WritebackSourceWidth[k],
1960 v->WritebackDestinationWidth[k],
1962 v->WritebackLineBufferSize));
1966 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1967 if (v->HRatio[k] > 1) {
1968 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1969 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
1971 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1972 v->MaxDCHUBToPSCLThroughput,
1973 v->MaxPSCLToLBThroughput);
1976 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
1977 * dml_max(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
1978 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
1980 if ((v->htaps[k] > 6 || v->vtaps[k] > 6)
1981 && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
1982 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
1985 if ((v->SourcePixelFormat[k] != dm_420_8
1986 && v->SourcePixelFormat[k] != dm_420_10
1987 && v->SourcePixelFormat[k] != dm_420_12
1988 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
1989 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1990 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
1992 if (v->HRatioChroma[k] > 1) {
1993 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1994 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
1996 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1997 v->MaxDCHUBToPSCLThroughput,
1998 v->MaxPSCLToLBThroughput);
2000 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2001 * dml_max3(v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2002 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 1.0);
2004 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6)
2005 && v->DPPCLKUsingSingleDPPChroma
2006 < 2 * v->PixelClock[k]) {
2007 v->DPPCLKUsingSingleDPPChroma = 2
2011 v->DPPCLKUsingSingleDPP[k] = dml_max(
2012 v->DPPCLKUsingSingleDPPLuma,
2013 v->DPPCLKUsingSingleDPPChroma);
2017 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2018 if (v->BlendingAndTiming[k] != k)
2020 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2021 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2022 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2023 * (1 + v->DISPCLKRampingMargin / 100));
2024 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2025 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2026 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2027 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2028 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2029 * (1 + v->DISPCLKRampingMargin / 100));
2030 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2031 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2033 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2034 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2035 * (1 + v->DISPCLKRampingMargin / 100));
2036 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2037 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2041 v->DISPCLKWithRamping = dml_max(
2042 v->DISPCLKWithRamping,
2043 v->WritebackDISPCLK);
2044 v->DISPCLKWithoutRamping = dml_max(
2045 v->DISPCLKWithoutRamping,
2046 v->WritebackDISPCLK);
2048 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2049 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
2050 v->DISPCLKWithRamping,
2051 v->DISPCLKDPPCLKVCOSpeed);
2052 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
2053 v->DISPCLKWithoutRamping,
2054 v->DISPCLKDPPCLKVCOSpeed);
2055 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2056 v->soc.clock_limits[mode_lib->soc.num_states - 1].dispclk_mhz,
2057 v->DISPCLKDPPCLKVCOSpeed);
2058 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity
2059 > v->MaxDispclkRoundedToDFSGranularity) {
2060 v->DISPCLK_calculated =
2061 v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2062 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity
2063 > v->MaxDispclkRoundedToDFSGranularity) {
2064 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2066 v->DISPCLK_calculated =
2067 v->DISPCLKWithRampingRoundedToDFSGranularity;
2069 v->DISPCLK = v->DISPCLK_calculated;
2070 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2072 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2073 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k]
2075 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2076 v->GlobalDPPCLK = dml_max(
2078 v->DPPCLK_calculated[k]);
2080 v->GlobalDPPCLK = RoundToDFSGranularityUp(
2082 v->DISPCLKDPPCLKVCOSpeed);
2083 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2084 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255
2086 v->DPPCLK_calculated[k] * 255.0
2089 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2090 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2093 // Urgent and B P-State/DRAM Clock Change Watermark
2094 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2095 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2097 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2098 CalculateBytePerPixelAnd256BBlockSizes(
2099 v->SourcePixelFormat[k],
2100 v->SurfaceTiling[k],
2101 &v->BytePerPixelY[k],
2102 &v->BytePerPixelC[k],
2103 &v->BytePerPixelDETY[k],
2104 &v->BytePerPixelDETC[k],
2105 &v->BlockHeight256BytesY[k],
2106 &v->BlockHeight256BytesC[k],
2107 &v->BlockWidth256BytesY[k],
2108 &v->BlockWidth256BytesC[k]);
2111 CalculateSwathWidth(
2113 v->NumberOfActivePlanes,
2114 v->SourcePixelFormat,
2122 v->ODMCombineEnabled,
2125 v->BlockHeight256BytesY,
2126 v->BlockHeight256BytesC,
2127 v->BlockWidth256BytesY,
2128 v->BlockWidth256BytesC,
2129 v->BlendingAndTiming,
2133 v->SwathWidthSingleDPPY,
2134 v->SwathWidthSingleDPPC,
2139 v->swath_width_luma_ub,
2140 v->swath_width_chroma_ub);
2143 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2144 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2145 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioChroma[k];
2146 DTRACE("read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2150 // DCFCLK Deep Sleep
2151 CalculateDCFCLKDeepSleep(
2153 v->NumberOfActivePlanes,
2164 v->PSCL_THROUGHPUT_LUMA,
2165 v->PSCL_THROUGHPUT_CHROMA,
2167 v->ReadBandwidthPlaneLuma,
2168 v->ReadBandwidthPlaneChroma,
2170 &v->DCFCLKDeepSleep);
2173 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2174 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2175 v->DSCCLK_calculated[k] = 0.0;
2177 if (v->OutputFormat[k] == dm_420)
2178 v->DSCFormatFactor = 2;
2179 else if (v->OutputFormat[k] == dm_444)
2180 v->DSCFormatFactor = 1;
2181 else if (v->OutputFormat[k] == dm_n422)
2182 v->DSCFormatFactor = 2;
2184 v->DSCFormatFactor = 1;
2185 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2186 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12
2187 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2188 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2189 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6
2190 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2192 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3
2193 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2198 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2199 double BPP = v->OutputBppPerState[k][v->VoltageLevel];
2201 if (v->DSCEnabled[k] && BPP != 0) {
2202 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2203 v->DSCDelay[k] = dscceComputeDelay(v->DSCInputBitPerComponent[k],
2205 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2206 v->NumberOfDSCSlices[k],
2209 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2210 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2211 v->DSCDelay[k] = 2 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2213 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2214 v->NumberOfDSCSlices[k] / 2.0,
2217 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2219 v->DSCDelay[k] = 4 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2221 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2222 v->NumberOfDSCSlices[k] / 4.0,
2225 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2227 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2233 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2234 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2235 if (j != k && v->BlendingAndTiming[k] == j
2236 && v->DSCEnabled[j])
2237 v->DSCDelay[k] = v->DSCDelay[j];
2240 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2241 unsigned int PDEAndMetaPTEBytesFrameY = 0;
2242 unsigned int PixelPTEBytesPerRowY = 0;
2243 unsigned int MetaRowByteY = 0;
2244 unsigned int MetaRowByteC = 0;
2245 unsigned int PDEAndMetaPTEBytesFrameC = 0;
2246 unsigned int PixelPTEBytesPerRowC = 0;
2247 bool PTEBufferSizeNotExceededY = 0;
2248 bool PTEBufferSizeNotExceededC = 0;
2251 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2252 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2253 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2254 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2256 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2257 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2260 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2263 v->BlockHeight256BytesC[k],
2264 v->BlockWidth256BytesC[k],
2265 v->SourcePixelFormat[k],
2266 v->SurfaceTiling[k],
2267 v->BytePerPixelC[k],
2270 v->ViewportHeightChroma[k],
2273 v->HostVMMaxNonCachedPageTableLevels,
2274 v->GPUVMMinPageSize,
2275 v->HostVMMinPageSize,
2276 v->PTEBufferSizeInRequestsForChroma,
2278 v->DCCMetaPitchC[k],
2279 &v->MacroTileWidthC[k],
2281 &PixelPTEBytesPerRowC,
2282 &PTEBufferSizeNotExceededC,
2283 &v->dpte_row_width_chroma_ub[k],
2284 &v->dpte_row_height_chroma[k],
2285 &v->meta_req_width_chroma[k],
2286 &v->meta_req_height_chroma[k],
2287 &v->meta_row_width_chroma[k],
2288 &v->meta_row_height_chroma[k],
2291 &v->PixelPTEReqWidthC[k],
2292 &v->PixelPTEReqHeightC[k],
2293 &v->PTERequestSizeC[k],
2294 &v->dpde0_bytes_per_frame_ub_c[k],
2295 &v->meta_pte_bytes_per_frame_ub_c[k]);
2297 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2302 v->ProgressiveToInterlaceUnitInOPP,
2304 v->ViewportYStartC[k],
2305 &v->VInitPreFillC[k],
2306 &v->MaxNumSwathC[k]);
2308 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2309 v->PTEBufferSizeInRequestsForChroma = 0;
2310 PixelPTEBytesPerRowC = 0;
2311 PDEAndMetaPTEBytesFrameC = 0;
2313 v->MaxNumSwathC[k] = 0;
2314 v->PrefetchSourceLinesC[k] = 0;
2317 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2320 v->BlockHeight256BytesY[k],
2321 v->BlockWidth256BytesY[k],
2322 v->SourcePixelFormat[k],
2323 v->SurfaceTiling[k],
2324 v->BytePerPixelY[k],
2327 v->ViewportHeight[k],
2330 v->HostVMMaxNonCachedPageTableLevels,
2331 v->GPUVMMinPageSize,
2332 v->HostVMMinPageSize,
2333 v->PTEBufferSizeInRequestsForLuma,
2335 v->DCCMetaPitchY[k],
2336 &v->MacroTileWidthY[k],
2338 &PixelPTEBytesPerRowY,
2339 &PTEBufferSizeNotExceededY,
2340 &v->dpte_row_width_luma_ub[k],
2341 &v->dpte_row_height[k],
2342 &v->meta_req_width[k],
2343 &v->meta_req_height[k],
2344 &v->meta_row_width[k],
2345 &v->meta_row_height[k],
2346 &v->vm_group_bytes[k],
2347 &v->dpte_group_bytes[k],
2348 &v->PixelPTEReqWidthY[k],
2349 &v->PixelPTEReqHeightY[k],
2350 &v->PTERequestSizeY[k],
2351 &v->dpde0_bytes_per_frame_ub_l[k],
2352 &v->meta_pte_bytes_per_frame_ub_l[k]);
2354 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2359 v->ProgressiveToInterlaceUnitInOPP,
2361 v->ViewportYStartY[k],
2362 &v->VInitPreFillY[k],
2363 &v->MaxNumSwathY[k]);
2364 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2365 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
2366 + PDEAndMetaPTEBytesFrameC;
2367 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2369 CalculateRowBandwidth(
2371 v->SourcePixelFormat[k],
2375 v->HTotal[k] / v->PixelClock[k],
2378 v->meta_row_height[k],
2379 v->meta_row_height_chroma[k],
2380 PixelPTEBytesPerRowY,
2381 PixelPTEBytesPerRowC,
2382 v->dpte_row_height[k],
2383 v->dpte_row_height_chroma[k],
2385 &v->dpte_row_bw[k]);
2388 v->TotalDCCActiveDPP = 0;
2389 v->TotalActiveDPP = 0;
2390 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2391 v->TotalActiveDPP = v->TotalActiveDPP
2392 + v->DPPPerPlane[k];
2393 if (v->DCCEnable[k])
2394 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP
2395 + v->DPPPerPlane[k];
2399 ReorderBytes = v->NumberOfChannels * dml_max3(
2400 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2401 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2402 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2404 v->UrgentExtraLatency = CalculateExtraLatency(
2405 v->RoundTripPingLatencyCycles,
2409 v->PixelChunkSizeInKByte,
2410 v->TotalDCCActiveDPP,
2415 v->NumberOfActivePlanes,
2417 v->dpte_group_bytes,
2418 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2419 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2420 v->HostVMMinPageSize,
2421 v->HostVMMaxNonCachedPageTableLevels);
2423 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2425 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2426 if (v->BlendingAndTiming[k] == k) {
2427 if (v->WritebackEnable[k] == true) {
2428 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency +
2429 CalculateWriteBackDelay(v->WritebackPixelFormat[k],
2430 v->WritebackHRatio[k],
2431 v->WritebackVRatio[k],
2432 v->WritebackVTaps[k],
2433 v->WritebackDestinationWidth[k],
2434 v->WritebackDestinationHeight[k],
2435 v->WritebackSourceHeight[k],
2436 v->HTotal[k]) / v->DISPCLK;
2438 v->WritebackDelay[v->VoltageLevel][k] = 0;
2439 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2440 if (v->BlendingAndTiming[j] == k
2441 && v->WritebackEnable[j] == true) {
2442 v->WritebackDelay[v->VoltageLevel][k] = dml_max(v->WritebackDelay[v->VoltageLevel][k],
2443 v->WritebackLatency + CalculateWriteBackDelay(
2444 v->WritebackPixelFormat[j],
2445 v->WritebackHRatio[j],
2446 v->WritebackVRatio[j],
2447 v->WritebackVTaps[j],
2448 v->WritebackDestinationWidth[j],
2449 v->WritebackDestinationHeight[j],
2450 v->WritebackSourceHeight[j],
2451 v->HTotal[k]) / v->DISPCLK);
2457 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2458 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2459 if (v->BlendingAndTiming[k] == j)
2460 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2462 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2463 v->MaxVStartupLines[k] = v->VTotal[k] - v->VActive[k] - dml_max(1.0, dml_ceil((double) v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1));
2466 v->MaximumMaxVStartupLines = 0;
2467 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2468 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2470 if (v->DRAMClockChangeLatencyOverride > 0.0) {
2471 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatencyOverride;
2473 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatency;
2475 v->UrgentLatency = CalculateUrgentLatency(v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock);
2478 v->FractionOfUrgentBandwidth = 0.0;
2479 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2481 v->VStartupLines = 13;
2484 MaxTotalRDBandwidth = 0;
2485 MaxTotalRDBandwidthNoUrgentBurst = 0;
2486 DestinationLineTimesForPrefetchLessThan2 = false;
2487 VRatioPrefetchMoreThan4 = false;
2488 TWait = CalculateTWait(
2490 v->FinalDRAMClockChangeLatency,
2492 v->SREnterPlusExitTime);
2494 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2495 Pipe myPipe = { 0 };
2497 myPipe.DPPCLK = v->DPPCLK[k];
2498 myPipe.DISPCLK = v->DISPCLK;
2499 myPipe.PixelClock = v->PixelClock[k];
2500 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2501 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2502 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2503 myPipe.SourceScan = v->SourceScan[k];
2504 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2505 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2506 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2507 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2508 myPipe.InterlaceEnable = v->Interlace[k];
2509 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2510 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2511 myPipe.HTotal = v->HTotal[k];
2512 myPipe.DCCEnable = v->DCCEnable[k];
2513 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
2515 v->ErrorResult[k] = CalculatePrefetchSchedule(
2517 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2518 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2521 v->DPPCLKDelaySubtotal
2522 + v->DPPCLKDelayCNVCFormater,
2524 v->DPPCLKDelaySCLLBOnly,
2525 v->DPPCLKDelayCNVCCursor,
2526 v->DISPCLKDelaySubtotal,
2527 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2529 v->MaxInterDCNTileRepeaters,
2530 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2531 v->MaxVStartupLines[k],
2532 v->GPUVMMaxPageTableLevels,
2535 v->HostVMMaxNonCachedPageTableLevels,
2536 v->HostVMMinPageSize,
2537 v->DynamicMetadataEnable[k],
2538 v->DynamicMetadataVMEnabled,
2539 v->DynamicMetadataLinesBeforeActiveRequired[k],
2540 v->DynamicMetadataTransmittedBytes[k],
2542 v->UrgentExtraLatency,
2544 v->PDEAndMetaPTEBytesFrame[k],
2546 v->PixelPTEBytesPerRow[k],
2547 v->PrefetchSourceLinesY[k],
2549 v->BytePerPixelY[k],
2550 v->VInitPreFillY[k],
2552 v->PrefetchSourceLinesC[k],
2554 v->BytePerPixelC[k],
2555 v->VInitPreFillC[k],
2557 v->swath_width_luma_ub[k],
2558 v->swath_width_chroma_ub[k],
2562 v->ProgressiveToInterlaceUnitInOPP,
2563 &v->DSTXAfterScaler[k],
2564 &v->DSTYAfterScaler[k],
2565 &v->DestinationLinesForPrefetch[k],
2566 &v->PrefetchBandwidth[k],
2567 &v->DestinationLinesToRequestVMInVBlank[k],
2568 &v->DestinationLinesToRequestRowInVBlank[k],
2569 &v->VRatioPrefetchY[k],
2570 &v->VRatioPrefetchC[k],
2571 &v->RequiredPrefetchPixDataBWLuma[k],
2572 &v->RequiredPrefetchPixDataBWChroma[k],
2573 &v->NotEnoughTimeForDynamicMetadata[k],
2575 &v->prefetch_vmrow_bw[k],
2578 &v->VUpdateOffsetPix[k],
2579 &v->VUpdateWidthPix[k],
2580 &v->VReadyOffsetPix[k]);
2581 if (v->BlendingAndTiming[k] == k) {
2582 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2583 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k];
2584 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[k];
2585 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[k] / 4.0, 1);
2586 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2588 int x = v->BlendingAndTiming[k];
2589 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2590 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[x];
2591 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[x];
2592 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[x] / 4.0, 1);
2593 if (!v->MaxVStartupLines[x])
2594 v->MaxVStartupLines[x] = v->MaxVStartupLines[k];
2595 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[x]);
2599 v->NotEnoughUrgentLatencyHiding[0][0] = false;
2600 v->NotEnoughUrgentLatencyHidingPre = false;
2602 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2603 v->cursor_bw[k] = v->NumberOfCursors[k]
2604 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2606 / (v->HTotal[k] / v->PixelClock[k])
2608 v->cursor_bw_pre[k] = v->NumberOfCursors[k]
2609 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2611 / (v->HTotal[k] / v->PixelClock[k])
2612 * v->VRatioPrefetchY[k];
2614 CalculateUrgentBurstFactor(
2615 v->swath_width_luma_ub[k],
2616 v->swath_width_chroma_ub[k],
2617 v->DETBufferSizeInKByte[0],
2620 v->HTotal[k] / v->PixelClock[k],
2622 v->CursorBufferSize,
2623 v->CursorWidth[k][0],
2627 v->BytePerPixelDETY[k],
2628 v->BytePerPixelDETC[k],
2629 v->DETBufferSizeY[k],
2630 v->DETBufferSizeC[k],
2631 &v->UrgentBurstFactorCursor[k],
2632 &v->UrgentBurstFactorLuma[k],
2633 &v->UrgentBurstFactorChroma[k],
2634 &v->NoUrgentLatencyHiding[k]);
2636 CalculateUrgentBurstFactor(
2637 v->swath_width_luma_ub[k],
2638 v->swath_width_chroma_ub[k],
2639 v->DETBufferSizeInKByte[0],
2642 v->HTotal[k] / v->PixelClock[k],
2644 v->CursorBufferSize,
2645 v->CursorWidth[k][0],
2647 v->VRatioPrefetchY[k],
2648 v->VRatioPrefetchC[k],
2649 v->BytePerPixelDETY[k],
2650 v->BytePerPixelDETC[k],
2651 v->DETBufferSizeY[k],
2652 v->DETBufferSizeC[k],
2653 &v->UrgentBurstFactorCursorPre[k],
2654 &v->UrgentBurstFactorLumaPre[k],
2655 &v->UrgentBurstFactorChromaPre[k],
2656 &v->NoUrgentLatencyHidingPre[k]);
2658 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2659 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2660 v->ReadBandwidthPlaneLuma[k] *
2661 v->UrgentBurstFactorLuma[k] +
2662 v->ReadBandwidthPlaneChroma[k] *
2663 v->UrgentBurstFactorChroma[k] +
2665 v->UrgentBurstFactorCursor[k] +
2666 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2667 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2668 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] *
2669 v->UrgentBurstFactorCursorPre[k]);
2671 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2672 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2673 v->ReadBandwidthPlaneLuma[k] +
2674 v->ReadBandwidthPlaneChroma[k] +
2676 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2677 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2679 if (v->DestinationLinesForPrefetch[k] < 2)
2680 DestinationLineTimesForPrefetchLessThan2 = true;
2681 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2682 VRatioPrefetchMoreThan4 = true;
2683 if (v->NoUrgentLatencyHiding[k] == true)
2684 v->NotEnoughUrgentLatencyHiding[0][0] = true;
2686 if (v->NoUrgentLatencyHidingPre[k] == true)
2687 v->NotEnoughUrgentLatencyHidingPre = true;
2689 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2692 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding[0][0] == 0
2693 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2694 && !DestinationLineTimesForPrefetchLessThan2)
2695 v->PrefetchModeSupported = true;
2697 v->PrefetchModeSupported = false;
2698 dml_print("DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2699 dml_print("DML: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", MaxTotalRDBandwidth, v->ReturnBW);
2700 dml_print("DML: VRatioPrefetch %s more than 4\n", (VRatioPrefetchMoreThan4) ? "is" : "is not");
2701 dml_print("DML: DestinationLines for Prefetch %s less than 2\n", (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2704 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2705 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2706 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2707 v->BandwidthAvailableForImmediateFlip =
2708 v->BandwidthAvailableForImmediateFlip
2710 v->ReadBandwidthPlaneLuma[k] * v->UrgentBurstFactorLuma[k]
2711 + v->ReadBandwidthPlaneChroma[k] * v->UrgentBurstFactorChroma[k]
2712 + v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2713 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2714 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2715 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2718 v->TotImmediateFlipBytes = 0;
2719 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2720 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2722 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2723 CalculateFlipSchedule(
2725 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2726 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2727 v->UrgentExtraLatency,
2729 v->GPUVMMaxPageTableLevels,
2731 v->HostVMMaxNonCachedPageTableLevels,
2733 v->HostVMMinPageSize,
2734 v->PDEAndMetaPTEBytesFrame[k],
2736 v->PixelPTEBytesPerRow[k],
2737 v->BandwidthAvailableForImmediateFlip,
2738 v->TotImmediateFlipBytes,
2739 v->SourcePixelFormat[k],
2740 v->HTotal[k] / v->PixelClock[k],
2745 v->dpte_row_height[k],
2746 v->meta_row_height[k],
2747 v->dpte_row_height_chroma[k],
2748 v->meta_row_height_chroma[k],
2749 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2750 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2751 &v->final_flip_bw[k],
2752 &v->ImmediateFlipSupportedForPipe[k]);
2754 v->total_dcn_read_bw_with_flip = 0.0;
2755 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2756 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2757 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3(
2758 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2759 v->DPPPerPlane[k] * v->final_flip_bw[k] +
2760 v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] +
2761 v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k] +
2762 v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2763 v->DPPPerPlane[k] * (v->final_flip_bw[k] +
2764 v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2765 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2766 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2767 v->total_dcn_read_bw_with_flip_no_urgent_burst =
2768 v->total_dcn_read_bw_with_flip_no_urgent_burst +
2769 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2770 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2771 v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2774 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2776 v->ImmediateFlipSupported = true;
2777 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2778 v->ImmediateFlipSupported = false;
2779 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2781 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2782 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2783 v->ImmediateFlipSupported = false;
2787 v->ImmediateFlipSupported = false;
2790 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2791 if (v->ErrorResult[k] || v->NotEnoughTimeForDynamicMetadata[k]) {
2792 v->PrefetchModeSupported = false;
2793 dml_print("DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2797 v->VStartupLines = v->VStartupLines + 1;
2798 v->PrefetchModeSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport &&
2799 !v->HostVMEnable && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2800 v->ImmediateFlipSupported)) ? true : false;
2801 } while (!v->PrefetchModeSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2802 ASSERT(v->PrefetchModeSupported);
2804 //Watermarks and NB P-State/DRAM Clock Change Support
2806 enum clock_change_support DRAMClockChangeSupport = 0; // dummy
2807 CalculateWatermarksAndDRAMSpeedChangeSupport(
2810 v->NumberOfActivePlanes,
2811 v->MaxLineBufferLines,
2813 v->DPPOutputBufferPixels,
2814 v->DETBufferSizeInKByte[0],
2815 v->WritebackInterfaceBufferSize,
2819 v->dpte_group_bytes,
2822 v->UrgentExtraLatency,
2823 v->WritebackLatency,
2824 v->WritebackChunkSize,
2826 v->FinalDRAMClockChangeLatency,
2828 v->SREnterPlusExitTime,
2848 v->BlendingAndTiming,
2849 v->BytePerPixelDETY,
2850 v->BytePerPixelDETC,
2854 v->WritebackPixelFormat,
2855 v->WritebackDestinationWidth,
2856 v->WritebackDestinationHeight,
2857 v->WritebackSourceHeight,
2858 &DRAMClockChangeSupport,
2859 &v->UrgentWatermark,
2860 &v->WritebackUrgentWatermark,
2861 &v->DRAMClockChangeWatermark,
2862 &v->WritebackDRAMClockChangeWatermark,
2863 &v->StutterExitWatermark,
2864 &v->StutterEnterPlusExitWatermark,
2865 &v->MinActiveDRAMClockChangeLatencySupported);
2867 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2868 if (v->WritebackEnable[k] == true) {
2869 if (v->BlendingAndTiming[k] == k) {
2870 v->ThisVStartup = v->VStartup[k];
2872 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2873 if (v->BlendingAndTiming[k] == j) {
2874 v->ThisVStartup = v->VStartup[j];
2878 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
2879 v->ThisVStartup * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2881 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2888 //Display Pipeline Delivery Time in Prefetch, Groups
2889 CalculatePixelDeliveryTimes(
2890 v->NumberOfActivePlanes,
2895 v->swath_width_luma_ub,
2896 v->swath_width_chroma_ub,
2901 v->PSCL_THROUGHPUT_LUMA,
2902 v->PSCL_THROUGHPUT_CHROMA,
2909 v->BlockWidth256BytesY,
2910 v->BlockHeight256BytesY,
2911 v->BlockWidth256BytesC,
2912 v->BlockHeight256BytesC,
2913 v->DisplayPipeLineDeliveryTimeLuma,
2914 v->DisplayPipeLineDeliveryTimeChroma,
2915 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
2916 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
2917 v->DisplayPipeRequestDeliveryTimeLuma,
2918 v->DisplayPipeRequestDeliveryTimeChroma,
2919 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2920 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
2921 v->CursorRequestDeliveryTime,
2922 v->CursorRequestDeliveryTimePrefetch);
2924 CalculateMetaAndPTETimes(
2925 v->NumberOfActivePlanes,
2928 v->MinMetaChunkSizeBytes,
2932 v->DestinationLinesToRequestRowInVBlank,
2933 v->DestinationLinesToRequestRowInImmediateFlip,
2940 v->dpte_row_height_chroma,
2942 v->meta_row_width_chroma,
2944 v->meta_row_height_chroma,
2946 v->meta_req_width_chroma,
2948 v->meta_req_height_chroma,
2949 v->dpte_group_bytes,
2952 v->PixelPTEReqWidthY,
2953 v->PixelPTEReqHeightY,
2954 v->PixelPTEReqWidthC,
2955 v->PixelPTEReqHeightC,
2956 v->dpte_row_width_luma_ub,
2957 v->dpte_row_width_chroma_ub,
2958 v->DST_Y_PER_PTE_ROW_NOM_L,
2959 v->DST_Y_PER_PTE_ROW_NOM_C,
2960 v->DST_Y_PER_META_ROW_NOM_L,
2961 v->DST_Y_PER_META_ROW_NOM_C,
2962 v->TimePerMetaChunkNominal,
2963 v->TimePerChromaMetaChunkNominal,
2964 v->TimePerMetaChunkVBlank,
2965 v->TimePerChromaMetaChunkVBlank,
2966 v->TimePerMetaChunkFlip,
2967 v->TimePerChromaMetaChunkFlip,
2968 v->time_per_pte_group_nom_luma,
2969 v->time_per_pte_group_vblank_luma,
2970 v->time_per_pte_group_flip_luma,
2971 v->time_per_pte_group_nom_chroma,
2972 v->time_per_pte_group_vblank_chroma,
2973 v->time_per_pte_group_flip_chroma);
2975 CalculateVMGroupAndRequestTimes(
2976 v->NumberOfActivePlanes,
2978 v->GPUVMMaxPageTableLevels,
2981 v->DestinationLinesToRequestVMInVBlank,
2982 v->DestinationLinesToRequestVMInImmediateFlip,
2985 v->dpte_row_width_luma_ub,
2986 v->dpte_row_width_chroma_ub,
2988 v->dpde0_bytes_per_frame_ub_l,
2989 v->dpde0_bytes_per_frame_ub_c,
2990 v->meta_pte_bytes_per_frame_ub_l,
2991 v->meta_pte_bytes_per_frame_ub_c,
2992 v->TimePerVMGroupVBlank,
2993 v->TimePerVMGroupFlip,
2994 v->TimePerVMRequestVBlank,
2995 v->TimePerVMRequestFlip);
2999 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3000 if (PrefetchMode == 0) {
3001 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3002 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3003 v->MinTTUVBlank[k] = dml_max(
3004 v->DRAMClockChangeWatermark,
3006 v->StutterEnterPlusExitWatermark,
3007 v->UrgentWatermark));
3008 } else if (PrefetchMode == 1) {
3009 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3010 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3011 v->MinTTUVBlank[k] = dml_max(
3012 v->StutterEnterPlusExitWatermark,
3013 v->UrgentWatermark);
3015 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3016 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3017 v->MinTTUVBlank[k] = v->UrgentWatermark;
3019 if (!v->DynamicMetadataEnable[k])
3020 v->MinTTUVBlank[k] = v->TCalc
3021 + v->MinTTUVBlank[k];
3024 // DCC Configuration
3026 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3027 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3028 v->SourcePixelFormat[k],
3029 v->SurfaceWidthY[k],
3030 v->SurfaceWidthC[k],
3031 v->SurfaceHeightY[k],
3032 v->SurfaceHeightC[k],
3033 v->DETBufferSizeInKByte[0] * 1024,
3034 v->BlockHeight256BytesY[k],
3035 v->BlockHeight256BytesC[k],
3036 v->SurfaceTiling[k],
3037 v->BytePerPixelY[k],
3038 v->BytePerPixelC[k],
3039 v->BytePerPixelDETY[k],
3040 v->BytePerPixelDETC[k],
3042 &v->DCCYMaxUncompressedBlock[k],
3043 &v->DCCCMaxUncompressedBlock[k],
3044 &v->DCCYMaxCompressedBlock[k],
3045 &v->DCCCMaxCompressedBlock[k],
3046 &v->DCCYIndependentBlock[k],
3047 &v->DCCCIndependentBlock[k]);
3051 //Maximum Bandwidth Used
3052 double TotalWRBandwidth = 0;
3053 double MaxPerPlaneVActiveWRBandwidth = 0;
3054 double WRBandwidth = 0;
3055 double MaxUsedBW = 0;
3056 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3057 if (v->WritebackEnable[k] == true
3058 && v->WritebackPixelFormat[k] == dm_444_32) {
3059 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3060 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3061 } else if (v->WritebackEnable[k] == true) {
3062 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3063 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3065 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3066 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3069 v->TotalDataReadBandwidth = 0;
3070 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3071 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
3072 + v->ReadBandwidthPlaneLuma[k]
3073 + v->ReadBandwidthPlaneChroma[k];
3077 double MaxPerPlaneVActiveRDBandwidth = 0;
3078 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3079 MaxPerPlaneVActiveRDBandwidth = dml_max(MaxPerPlaneVActiveRDBandwidth,
3080 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
3085 MaxUsedBW = MaxTotalRDBandwidth + TotalWRBandwidth;
3089 v->VStartupMargin = 0;
3090 v->FirstMainPlane = true;
3091 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3092 if (v->BlendingAndTiming[k] == k) {
3093 double margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k]
3095 if (v->FirstMainPlane == true) {
3096 v->VStartupMargin = margin;
3097 v->FirstMainPlane = false;
3099 v->VStartupMargin = dml_min(v->VStartupMargin, margin);
3104 // Stutter Efficiency
3105 CalculateStutterEfficiency(
3106 v->NumberOfActivePlanes,
3107 v->ROBBufferSizeInKByte,
3108 v->TotalDataReadBandwidth,
3112 v->SynchronizedVBlank,
3116 v->BytePerPixelDETY,
3127 v->BlockHeight256BytesY,
3128 v->BlockWidth256BytesY,
3129 v->BlockHeight256BytesC,
3130 v->BlockWidth256BytesC,
3131 v->DCCYMaxUncompressedBlock,
3132 v->DCCCMaxUncompressedBlock,
3136 v->ReadBandwidthPlaneLuma,
3137 v->ReadBandwidthPlaneChroma,
3140 &v->StutterEfficiencyNotIncludingVBlank,
3141 &v->StutterEfficiency,
3145 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3147 // Display Pipe Configuration
3148 double BytePerPixDETY[DC__NUM_DPP__MAX] = { 0 };
3149 double BytePerPixDETC[DC__NUM_DPP__MAX] = { 0 };
3150 int BytePerPixY[DC__NUM_DPP__MAX] = { 0 };
3151 int BytePerPixC[DC__NUM_DPP__MAX] = { 0 };
3152 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX] = { 0 };
3153 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX] = { 0 };
3154 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX] = { 0 };
3155 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX] = { 0 };
3156 double dummy1[DC__NUM_DPP__MAX] = { 0 };
3157 double dummy2[DC__NUM_DPP__MAX] = { 0 };
3158 double dummy3[DC__NUM_DPP__MAX] = { 0 };
3159 double dummy4[DC__NUM_DPP__MAX] = { 0 };
3160 int dummy5[DC__NUM_DPP__MAX] = { 0 };
3161 int dummy6[DC__NUM_DPP__MAX] = { 0 };
3162 bool dummy7[DC__NUM_DPP__MAX] = { 0 };
3163 bool dummysinglestring = 0;
3166 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
3168 CalculateBytePerPixelAnd256BBlockSizes(
3169 mode_lib->vba.SourcePixelFormat[k],
3170 mode_lib->vba.SurfaceTiling[k],
3175 &Read256BytesBlockHeightY[k],
3176 &Read256BytesBlockHeightC[k],
3177 &Read256BytesBlockWidthY[k],
3178 &Read256BytesBlockWidthC[k]);
3180 CalculateSwathAndDETConfiguration(
3182 mode_lib->vba.NumberOfActivePlanes,
3183 mode_lib->vba.DETBufferSizeInKByte[0],
3186 mode_lib->vba.SourceScan,
3187 mode_lib->vba.SourcePixelFormat,
3188 mode_lib->vba.SurfaceTiling,
3189 mode_lib->vba.ViewportWidth,
3190 mode_lib->vba.ViewportHeight,
3191 mode_lib->vba.SurfaceWidthY,
3192 mode_lib->vba.SurfaceWidthC,
3193 mode_lib->vba.SurfaceHeightY,
3194 mode_lib->vba.SurfaceHeightC,
3195 Read256BytesBlockHeightY,
3196 Read256BytesBlockHeightC,
3197 Read256BytesBlockWidthY,
3198 Read256BytesBlockWidthC,
3199 mode_lib->vba.ODMCombineEnabled,
3200 mode_lib->vba.BlendingAndTiming,
3205 mode_lib->vba.HActive,
3206 mode_lib->vba.HRatio,
3207 mode_lib->vba.HRatioChroma,
3208 mode_lib->vba.DPPPerPlane,
3213 mode_lib->vba.SwathHeightY,
3214 mode_lib->vba.SwathHeightC,
3215 mode_lib->vba.DETBufferSizeY,
3216 mode_lib->vba.DETBufferSizeC,
3218 &dummysinglestring);
3221 static bool CalculateBytePerPixelAnd256BBlockSizes(
3222 enum source_format_class SourcePixelFormat,
3223 enum dm_swizzle_mode SurfaceTiling,
3224 unsigned int *BytePerPixelY,
3225 unsigned int *BytePerPixelC,
3226 double *BytePerPixelDETY,
3227 double *BytePerPixelDETC,
3228 unsigned int *BlockHeight256BytesY,
3229 unsigned int *BlockHeight256BytesC,
3230 unsigned int *BlockWidth256BytesY,
3231 unsigned int *BlockWidth256BytesC)
3233 if (SourcePixelFormat == dm_444_64) {
3234 *BytePerPixelDETY = 8;
3235 *BytePerPixelDETC = 0;
3238 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3239 *BytePerPixelDETY = 4;
3240 *BytePerPixelDETC = 0;
3243 } else if (SourcePixelFormat == dm_444_16) {
3244 *BytePerPixelDETY = 2;
3245 *BytePerPixelDETC = 0;
3248 } else if (SourcePixelFormat == dm_444_8) {
3249 *BytePerPixelDETY = 1;
3250 *BytePerPixelDETC = 0;
3253 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3254 *BytePerPixelDETY = 4;
3255 *BytePerPixelDETC = 1;
3258 } else if (SourcePixelFormat == dm_420_8) {
3259 *BytePerPixelDETY = 1;
3260 *BytePerPixelDETC = 2;
3263 } else if (SourcePixelFormat == dm_420_12) {
3264 *BytePerPixelDETY = 2;
3265 *BytePerPixelDETC = 4;
3269 *BytePerPixelDETY = 4.0 / 3;
3270 *BytePerPixelDETC = 8.0 / 3;
3275 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
3276 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
3277 || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
3278 || SourcePixelFormat == dm_rgbe)) {
3279 if (SurfaceTiling == dm_sw_linear) {
3280 *BlockHeight256BytesY = 1;
3281 } else if (SourcePixelFormat == dm_444_64) {
3282 *BlockHeight256BytesY = 4;
3283 } else if (SourcePixelFormat == dm_444_8) {
3284 *BlockHeight256BytesY = 16;
3286 *BlockHeight256BytesY = 8;
3288 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3289 *BlockHeight256BytesC = 0;
3290 *BlockWidth256BytesC = 0;
3292 if (SurfaceTiling == dm_sw_linear) {
3293 *BlockHeight256BytesY = 1;
3294 *BlockHeight256BytesC = 1;
3295 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3296 *BlockHeight256BytesY = 8;
3297 *BlockHeight256BytesC = 16;
3298 } else if (SourcePixelFormat == dm_420_8) {
3299 *BlockHeight256BytesY = 16;
3300 *BlockHeight256BytesC = 8;
3302 *BlockHeight256BytesY = 8;
3303 *BlockHeight256BytesC = 8;
3305 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3306 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3311 static double CalculateTWait(
3312 unsigned int PrefetchMode,
3313 double DRAMClockChangeLatency,
3314 double UrgentLatency,
3315 double SREnterPlusExitTime)
3317 if (PrefetchMode == 0) {
3318 return dml_max(DRAMClockChangeLatency + UrgentLatency,
3319 dml_max(SREnterPlusExitTime, UrgentLatency));
3320 } else if (PrefetchMode == 1) {
3321 return dml_max(SREnterPlusExitTime, UrgentLatency);
3323 return UrgentLatency;
3327 double dml30_CalculateWriteBackDISPCLK(
3328 enum source_format_class WritebackPixelFormat,
3330 double WritebackHRatio,
3331 double WritebackVRatio,
3332 unsigned int WritebackHTaps,
3333 unsigned int WritebackVTaps,
3334 long WritebackSourceWidth,
3335 long WritebackDestinationWidth,
3336 unsigned int HTotal,
3337 unsigned int WritebackLineBufferSize)
3339 double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0;
3341 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3342 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3343 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3344 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3347 static double CalculateWriteBackDelay(
3348 enum source_format_class WritebackPixelFormat,
3349 double WritebackHRatio,
3350 double WritebackVRatio,
3351 unsigned int WritebackVTaps,
3352 long WritebackDestinationWidth,
3353 long WritebackDestinationHeight,
3354 long WritebackSourceHeight,
3355 unsigned int HTotal)
3357 double CalculateWriteBackDelay = 0;
3358 double Line_length = 0;
3359 double Output_lines_last_notclamped = 0;
3360 double WritebackVInit = 0;
3362 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3363 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3364 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3365 if (Output_lines_last_notclamped < 0) {
3366 CalculateWriteBackDelay = 0;
3368 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3370 return CalculateWriteBackDelay;
3374 static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
3375 double DCFClkDeepSleep, double PixelClock, long HTotal, long VBlank, long DynamicMetadataTransmittedBytes,
3376 long DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
3377 double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
3379 double TotalRepeaterDelayTime = 0;
3380 double VUpdateWidthPix = 0;
3381 double VReadyOffsetPix = 0;
3382 double VUpdateOffsetPix = 0;
3383 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3384 VUpdateWidthPix = (14 / DCFClkDeepSleep + 12 / DPPCLK + TotalRepeaterDelayTime) * PixelClock;
3385 VReadyOffsetPix = dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20 / DCFClkDeepSleep + 10 / DPPCLK) * PixelClock;
3386 VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3387 *Tsetup = (VUpdateOffsetPix + VUpdateWidthPix + VReadyOffsetPix) / PixelClock;
3388 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3389 *Tdmec = HTotal / PixelClock;
3390 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3391 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3393 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3395 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3396 *Tdmsks = *Tdmsks / 2;
3400 static void CalculateRowBandwidth(
3402 enum source_format_class SourcePixelFormat,
3404 double VRatioChroma,
3407 unsigned int MetaRowByteLuma,
3408 unsigned int MetaRowByteChroma,
3409 unsigned int meta_row_height_luma,
3410 unsigned int meta_row_height_chroma,
3411 unsigned int PixelPTEBytesPerRowLuma,
3412 unsigned int PixelPTEBytesPerRowChroma,
3413 unsigned int dpte_row_height_luma,
3414 unsigned int dpte_row_height_chroma,
3415 double *meta_row_bw,
3416 double *dpte_row_bw)
3418 if (DCCEnable != true) {
3420 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3421 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3422 + VRatioChroma * MetaRowByteChroma
3423 / (meta_row_height_chroma * LineTime);
3425 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3428 if (GPUVMEnable != true) {
3430 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3431 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3432 + VRatioChroma * PixelPTEBytesPerRowChroma
3433 / (dpte_row_height_chroma * LineTime);
3435 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3439 static void CalculateFlipSchedule(
3440 struct display_mode_lib *mode_lib,
3441 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3442 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3443 double UrgentExtraLatency,
3444 double UrgentLatency,
3445 unsigned int GPUVMMaxPageTableLevels,
3447 unsigned int HostVMMaxNonCachedPageTableLevels,
3449 double HostVMMinPageSize,
3450 double PDEAndMetaPTEBytesPerFrame,
3451 double MetaRowBytes,
3452 double DPTEBytesPerRow,
3453 double BandwidthAvailableForImmediateFlip,
3454 unsigned int TotImmediateFlipBytes,
3455 enum source_format_class SourcePixelFormat,
3458 double VRatioChroma,
3461 unsigned int dpte_row_height,
3462 unsigned int meta_row_height,
3463 unsigned int dpte_row_height_chroma,
3464 unsigned int meta_row_height_chroma,
3465 double *DestinationLinesToRequestVMInImmediateFlip,
3466 double *DestinationLinesToRequestRowInImmediateFlip,
3467 double *final_flip_bw,
3468 bool *ImmediateFlipSupportedForPipe)
3470 double min_row_time = 0.0;
3471 unsigned int HostVMDynamicLevelsTrips = 0;
3472 double TimeForFetchingMetaPTEImmediateFlip = 0;
3473 double TimeForFetchingRowInVBlankImmediateFlip = 0;
3474 double ImmediateFlipBW = 0;
3475 double HostVMInefficiencyFactor = 0;
3477 if (GPUVMEnable == true && HostVMEnable == true) {
3478 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3479 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3481 HostVMInefficiencyFactor = 1;
3482 HostVMDynamicLevelsTrips = 0;
3485 if (GPUVMEnable == true || DCCEnable == true) {
3486 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3489 if (GPUVMEnable == true) {
3490 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3491 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0);
3493 TimeForFetchingMetaPTEImmediateFlip = 0;
3496 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3497 if ((GPUVMEnable == true || DCCEnable == true)) {
3498 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3499 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4);
3501 TimeForFetchingRowInVBlankImmediateFlip = 0;
3504 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3506 if (GPUVMEnable == true) {
3507 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3508 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3509 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3510 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3516 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3517 if (GPUVMEnable == true && DCCEnable != true) {
3518 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3519 } else if (GPUVMEnable != true && DCCEnable == true) {
3520 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3522 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio,
3523 dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
3526 if (GPUVMEnable == true && DCCEnable != true) {
3527 min_row_time = dpte_row_height * LineTime / VRatio;
3528 } else if (GPUVMEnable != true && DCCEnable == true) {
3529 min_row_time = meta_row_height * LineTime / VRatio;
3531 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3535 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3536 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3537 *ImmediateFlipSupportedForPipe = false;
3539 *ImmediateFlipSupportedForPipe = true;
3543 static double TruncToValidBPP(
3551 enum output_encoder_class Output,
3552 enum output_format_class Format,
3553 unsigned int DSCInputBitPerComponent,
3557 enum odm_combine_mode ODMCombine)
3559 double MaxLinkBPP = 0;
3561 double MaxDSCBPP = 0;
3566 if (Format == dm_420) {
3571 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3572 } else if (Format == dm_444) {
3577 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3579 if (Output == dm_hdmi) {
3590 if (Format == dm_n422) {
3592 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3596 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3600 if (DSCEnable && Output == dm_dp) {
3601 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3603 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3606 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3608 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3613 if (DesiredBPP == 0) {
3615 if (MaxLinkBPP < MinDSCBPP) {
3617 } else if (MaxLinkBPP >= MaxDSCBPP) {
3620 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3623 if (MaxLinkBPP >= NonDSCBPP2) {
3625 } else if (MaxLinkBPP >= NonDSCBPP1) {
3627 } else if (MaxLinkBPP >= NonDSCBPP0) {
3634 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0 || DesiredBPP == 18)) ||
3635 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3644 void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3646 struct vba_vars_st *v = &mode_lib->vba;
3647 int MinPrefetchMode, MaxPrefetchMode;
3649 unsigned int j, k, m;
3650 bool EnoughWritebackUnits = true;
3651 bool WritebackModeSupport = true;
3652 bool ViewportExceedsSurface = false;
3653 double MaxTotalVActiveRDBandwidth = 0;
3654 long ReorderingBytes = 0;
3655 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX] = { 0 };
3657 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3659 CalculateMinAndMaxPrefetchMode(
3660 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3661 &MinPrefetchMode, &MaxPrefetchMode);
3663 /*Scale Ratio, taps Support Check*/
3665 v->ScaleRatioAndTapsSupport = true;
3666 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3667 if (v->ScalerEnabled[k] == false
3668 && ((v->SourcePixelFormat[k] != dm_444_64
3669 && v->SourcePixelFormat[k] != dm_444_32
3670 && v->SourcePixelFormat[k] != dm_444_16
3671 && v->SourcePixelFormat[k] != dm_mono_16
3672 && v->SourcePixelFormat[k] != dm_mono_8
3673 && v->SourcePixelFormat[k] != dm_rgbe
3674 && v->SourcePixelFormat[k] != dm_rgbe_alpha)
3675 || v->HRatio[k] != 1.0
3676 || v->htaps[k] != 1.0
3677 || v->VRatio[k] != 1.0
3678 || v->vtaps[k] != 1.0)) {
3679 v->ScaleRatioAndTapsSupport = false;
3680 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0
3681 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3682 || (v->htaps[k] > 1.0
3683 && (v->htaps[k] % 2) == 1)
3684 || v->HRatio[k] > v->MaxHSCLRatio
3685 || v->VRatio[k] > v->MaxVSCLRatio
3686 || v->HRatio[k] > v->htaps[k]
3687 || v->VRatio[k] > v->vtaps[k]
3688 || (v->SourcePixelFormat[k] != dm_444_64
3689 && v->SourcePixelFormat[k] != dm_444_32
3690 && v->SourcePixelFormat[k] != dm_444_16
3691 && v->SourcePixelFormat[k] != dm_mono_16
3692 && v->SourcePixelFormat[k] != dm_mono_8
3693 && v->SourcePixelFormat[k] != dm_rgbe
3694 && (v->VTAPsChroma[k] < 1
3695 || v->VTAPsChroma[k] > 8
3696 || v->HTAPsChroma[k] < 1
3697 || v->HTAPsChroma[k] > 8
3698 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3699 || v->HRatioChroma[k] > v->MaxHSCLRatio
3700 || v->VRatioChroma[k] > v->MaxVSCLRatio
3701 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3702 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3703 v->ScaleRatioAndTapsSupport = false;
3706 /*Source Format, Pixel Format and Scan Support Check*/
3708 v->SourceFormatPixelAndScanSupport = true;
3709 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3710 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3711 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t || v->SurfaceTiling[k] == dm_sw_64kb_d_x)
3712 && !(v->SourcePixelFormat[k] == dm_444_64))) {
3713 v->SourceFormatPixelAndScanSupport = false;
3716 /*Bandwidth Support Check*/
3718 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3719 CalculateBytePerPixelAnd256BBlockSizes(
3720 v->SourcePixelFormat[k],
3721 v->SurfaceTiling[k],
3722 &v->BytePerPixelY[k],
3723 &v->BytePerPixelC[k],
3724 &v->BytePerPixelInDETY[k],
3725 &v->BytePerPixelInDETC[k],
3726 &v->Read256BlockHeightY[k],
3727 &v->Read256BlockHeightC[k],
3728 &v->Read256BlockWidthY[k],
3729 &v->Read256BlockWidthC[k]);
3731 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3732 if (v->SourceScan[k] != dm_vert) {
3733 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3734 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3736 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3737 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3740 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3741 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3742 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3744 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3745 if (v->WritebackEnable[k] == true
3746 && v->WritebackPixelFormat[k] == dm_444_64) {
3747 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3748 * v->WritebackDestinationHeight[k]
3749 / (v->WritebackSourceHeight[k]
3751 / v->PixelClock[k]) * 8.0;
3752 } else if (v->WritebackEnable[k] == true) {
3753 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3754 * v->WritebackDestinationHeight[k]
3755 / (v->WritebackSourceHeight[k]
3757 / v->PixelClock[k]) * 4.0;
3759 v->WriteBandwidth[k] = 0.0;
3763 /*Writeback Latency support check*/
3765 v->WritebackLatencySupport = true;
3766 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3767 if (v->WritebackEnable[k] == true) {
3768 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave ||
3769 v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
3770 if (v->WriteBandwidth[k]
3771 > 2.0 * v->WritebackInterfaceBufferSize * 1024
3772 / v->WritebackLatency) {
3773 v->WritebackLatencySupport = false;
3776 if (v->WriteBandwidth[k]
3777 > v->WritebackInterfaceBufferSize * 1024
3778 / v->WritebackLatency) {
3779 v->WritebackLatencySupport = false;
3785 /*Writeback Mode Support Check*/
3787 v->TotalNumberOfActiveWriteback = 0;
3788 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3789 if (v->WritebackEnable[k] == true) {
3790 v->TotalNumberOfActiveWriteback =
3791 v->TotalNumberOfActiveWriteback + 1;
3795 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3796 EnoughWritebackUnits = false;
3798 if (!v->WritebackSupportInterleaveAndUsingWholeBufferForASingleStream
3799 && (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave
3800 || v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave)) {
3802 WritebackModeSupport = false;
3804 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave && v->TotalNumberOfActiveWriteback > 1) {
3805 WritebackModeSupport = false;
3808 /*Writeback Scale Ratio and Taps Support Check*/
3810 v->WritebackScaleRatioAndTapsSupport = true;
3811 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3812 if (v->WritebackEnable[k] == true) {
3813 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio
3814 || v->WritebackVRatio[k]
3815 > v->WritebackMaxVSCLRatio
3816 || v->WritebackHRatio[k]
3817 < v->WritebackMinHSCLRatio
3818 || v->WritebackVRatio[k]
3819 < v->WritebackMinVSCLRatio
3820 || v->WritebackHTaps[k]
3821 > v->WritebackMaxHSCLTaps
3822 || v->WritebackVTaps[k]
3823 > v->WritebackMaxVSCLTaps
3824 || v->WritebackHRatio[k]
3825 > v->WritebackHTaps[k]
3826 || v->WritebackVRatio[k]
3827 > v->WritebackVTaps[k]
3828 || (v->WritebackHTaps[k] > 2.0
3829 && ((v->WritebackHTaps[k] % 2)
3831 v->WritebackScaleRatioAndTapsSupport = false;
3833 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3834 v->WritebackScaleRatioAndTapsSupport = false;
3838 /*Maximum DISPCLK/DPPCLK Support check*/
3840 v->WritebackRequiredDISPCLK = 0.0;
3841 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3842 if (v->WritebackEnable[k] == true) {
3843 v->WritebackRequiredDISPCLK = dml_max(v->WritebackRequiredDISPCLK,
3844 dml30_CalculateWriteBackDISPCLK(
3845 v->WritebackPixelFormat[k],
3847 v->WritebackHRatio[k],
3848 v->WritebackVRatio[k],
3849 v->WritebackHTaps[k],
3850 v->WritebackVTaps[k],
3851 v->WritebackSourceWidth[k],
3852 v->WritebackDestinationWidth[k],
3854 v->WritebackLineBufferSize));
3857 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3858 if (v->HRatio[k] > 1.0) {
3859 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3861 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3863 if (v->BytePerPixelC[k] == 0.0) {
3864 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3865 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3866 * dml_max3(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 1.0);
3867 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3868 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3871 if (v->HRatioChroma[k] > 1.0) {
3872 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
3873 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3875 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3877 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max5(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3878 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3879 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3880 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3882 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3883 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3884 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3888 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3889 int MaximumSwathWidthSupportLuma = 0;
3890 int MaximumSwathWidthSupportChroma = 0;
3892 if (v->SurfaceTiling[k] == dm_sw_linear) {
3893 MaximumSwathWidthSupportLuma = 8192.0;
3894 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3895 MaximumSwathWidthSupportLuma = 2880.0;
3897 MaximumSwathWidthSupportLuma = 5760.0;
3900 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
3901 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
3903 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
3905 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
3906 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
3907 if (v->BytePerPixelC[k] == 0.0) {
3908 v->MaximumSwathWidthInLineBufferChroma = 0;
3910 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
3911 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
3913 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
3914 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
3917 CalculateSwathAndDETConfiguration(
3919 v->NumberOfActivePlanes,
3920 v->DETBufferSizeInKByte[0],
3921 v->MaximumSwathWidthLuma,
3922 v->MaximumSwathWidthChroma,
3924 v->SourcePixelFormat,
3932 v->Read256BlockHeightY,
3933 v->Read256BlockHeightC,
3934 v->Read256BlockWidthY,
3935 v->Read256BlockWidthC,
3936 v->odm_combine_dummy,
3937 v->BlendingAndTiming,
3940 v->BytePerPixelInDETY,
3941 v->BytePerPixelInDETC,
3946 v->swath_width_luma_ub,
3947 v->swath_width_chroma_ub,
3954 v->SingleDPPViewportSizeSupportPerPlane,
3955 &v->ViewportSizeSupport[0][0]);
3957 for (i = 0; i < v->soc.num_states; i++) {
3958 for (j = 0; j < 2; j++) {
3959 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
3960 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
3961 v->RequiredDISPCLK[i][j] = 0.0;
3962 v->DISPCLK_DPPCLK_Support[i][j] = true;
3963 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3964 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3965 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3966 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3967 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3968 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3970 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3971 * (1 + v->DISPCLKRampingMargin / 100.0);
3972 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3973 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3974 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3976 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3977 * (1 + v->DISPCLKRampingMargin / 100.0);
3978 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3979 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3980 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3983 if (v->ODMCombinePolicy == dm_odm_combine_policy_none) {
3984 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3985 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3986 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
3987 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3988 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3989 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
3990 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
3991 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3992 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3993 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
3994 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3995 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3997 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3998 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4000 if (v->DSCEnabled[k] && v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH
4001 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4002 if (v->HActive[k] / 2 > DCN30_MAX_DSC_IMAGE_WIDTH) {
4003 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4004 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4006 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4007 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4010 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN30_MAX_FMT_420_BUFFER_WIDTH
4011 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4012 if (v->HActive[k] / 2 > DCN30_MAX_FMT_420_BUFFER_WIDTH) {
4013 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4014 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4016 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4017 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4020 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4021 v->MPCCombine[i][j][k] = false;
4022 v->NoOfDPP[i][j][k] = 4;
4023 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4024 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4025 v->MPCCombine[i][j][k] = false;
4026 v->NoOfDPP[i][j][k] = 2;
4027 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4028 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4029 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= v->MaxDppclkRoundedDownToDFSGranularity
4030 && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4031 v->MPCCombine[i][j][k] = false;
4032 v->NoOfDPP[i][j][k] = 1;
4033 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4035 v->MPCCombine[i][j][k] = true;
4036 v->NoOfDPP[i][j][k] = 2;
4037 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4039 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4040 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4041 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4042 v->DISPCLK_DPPCLK_Support[i][j] = false;
4045 v->TotalNumberOfActiveDPP[i][j] = 0;
4046 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4047 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4048 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4049 if (v->NoOfDPP[i][j][k] == 1)
4050 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4052 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never) {
4053 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4054 double BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4055 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4056 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4057 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4058 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4059 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4060 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4061 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4062 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4065 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4066 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4067 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4068 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4069 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4070 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4073 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4074 v->RequiredDISPCLK[i][j] = 0.0;
4075 v->DISPCLK_DPPCLK_Support[i][j] = true;
4076 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4077 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4078 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4079 v->MPCCombine[i][j][k] = true;
4080 v->NoOfDPP[i][j][k] = 2;
4081 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4083 v->MPCCombine[i][j][k] = false;
4084 v->NoOfDPP[i][j][k] = 1;
4085 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4087 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4088 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4089 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4091 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4093 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4094 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4095 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4096 v->DISPCLK_DPPCLK_Support[i][j] = false;
4099 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4100 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4101 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4104 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4105 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4106 v->DISPCLK_DPPCLK_Support[i][j] = false;
4111 /*Total Available Pipes Support Check*/
4113 for (i = 0; i < v->soc.num_states; i++) {
4114 for (j = 0; j < 2; j++) {
4115 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4116 v->TotalAvailablePipesSupport[i][j] = true;
4118 v->TotalAvailablePipesSupport[i][j] = false;
4122 /*Display IO and DSC Support Check*/
4124 v->NonsupportedDSCInputBPC = false;
4125 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4126 if (!(v->DSCInputBitPerComponent[k] == 12.0
4127 || v->DSCInputBitPerComponent[k] == 10.0
4128 || v->DSCInputBitPerComponent[k] == 8.0)) {
4129 v->NonsupportedDSCInputBPC = true;
4133 /*Number Of DSC Slices*/
4134 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4135 if (v->BlendingAndTiming[k] == k) {
4136 if (v->PixelClockBackEnd[k] > 3200) {
4137 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4138 } else if (v->PixelClockBackEnd[k] > 1360) {
4139 v->NumberOfDSCSlices[k] = 8;
4140 } else if (v->PixelClockBackEnd[k] > 680) {
4141 v->NumberOfDSCSlices[k] = 4;
4142 } else if (v->PixelClockBackEnd[k] > 340) {
4143 v->NumberOfDSCSlices[k] = 2;
4145 v->NumberOfDSCSlices[k] = 1;
4148 v->NumberOfDSCSlices[k] = 0;
4152 for (i = 0; i < v->soc.num_states; i++) {
4153 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4154 v->RequiresDSC[i][k] = false;
4155 v->RequiresFEC[i][k] = false;
4156 if (v->BlendingAndTiming[k] == k) {
4157 if (v->Output[k] == dm_hdmi) {
4158 v->RequiresDSC[i][k] = false;
4159 v->RequiresFEC[i][k] = false;
4160 v->OutputBppPerState[i][k] = TruncToValidBPP(
4161 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4165 v->PixelClockBackEnd[k],
4166 v->ForcedOutputLinkBPP[k],
4170 v->DSCInputBitPerComponent[k],
4171 v->NumberOfDSCSlices[k],
4172 v->AudioSampleRate[k],
4173 v->AudioSampleLayout[k],
4174 v->ODMCombineEnablePerState[i][k]);
4175 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4176 if (v->DSCEnable[k] == true) {
4177 v->RequiresDSC[i][k] = true;
4178 v->LinkDSCEnable = true;
4179 if (v->Output[k] == dm_dp) {
4180 v->RequiresFEC[i][k] = true;
4182 v->RequiresFEC[i][k] = false;
4185 v->RequiresDSC[i][k] = false;
4186 v->LinkDSCEnable = false;
4187 v->RequiresFEC[i][k] = false;
4190 v->Outbpp = BPP_INVALID;
4191 if (v->PHYCLKPerState[i] >= 270.0) {
4192 v->Outbpp = TruncToValidBPP(
4193 (1.0 - v->Downspreading / 100.0) * 2700,
4194 v->OutputLinkDPLanes[k],
4197 v->PixelClockBackEnd[k],
4198 v->ForcedOutputLinkBPP[k],
4202 v->DSCInputBitPerComponent[k],
4203 v->NumberOfDSCSlices[k],
4204 v->AudioSampleRate[k],
4205 v->AudioSampleLayout[k],
4206 v->ODMCombineEnablePerState[i][k]);
4207 v->OutputBppPerState[i][k] = v->Outbpp;
4208 // TODO: Need some other way to handle this nonsense
4209 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4211 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4212 v->Outbpp = TruncToValidBPP(
4213 (1.0 - v->Downspreading / 100.0) * 5400,
4214 v->OutputLinkDPLanes[k],
4217 v->PixelClockBackEnd[k],
4218 v->ForcedOutputLinkBPP[k],
4222 v->DSCInputBitPerComponent[k],
4223 v->NumberOfDSCSlices[k],
4224 v->AudioSampleRate[k],
4225 v->AudioSampleLayout[k],
4226 v->ODMCombineEnablePerState[i][k]);
4227 v->OutputBppPerState[i][k] = v->Outbpp;
4228 // TODO: Need some other way to handle this nonsense
4229 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4231 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4232 v->Outbpp = TruncToValidBPP(
4233 (1.0 - v->Downspreading / 100.0) * 8100,
4234 v->OutputLinkDPLanes[k],
4237 v->PixelClockBackEnd[k],
4238 v->ForcedOutputLinkBPP[k],
4242 v->DSCInputBitPerComponent[k],
4243 v->NumberOfDSCSlices[k],
4244 v->AudioSampleRate[k],
4245 v->AudioSampleLayout[k],
4246 v->ODMCombineEnablePerState[i][k]);
4247 if (v->Outbpp == BPP_INVALID && v->ForcedOutputLinkBPP[k] == 0) {
4248 //if (v->Outbpp == BPP_INVALID && v->DSCEnabled[k] == dm_dsc_enable_only_if_necessary && v->ForcedOutputLinkBPP[k] == 0) {
4249 v->RequiresDSC[i][k] = true;
4250 v->LinkDSCEnable = true;
4251 if (v->Output[k] == dm_dp) {
4252 v->RequiresFEC[i][k] = true;
4254 v->Outbpp = TruncToValidBPP(
4255 (1.0 - v->Downspreading / 100.0) * 8100,
4256 v->OutputLinkDPLanes[k],
4259 v->PixelClockBackEnd[k],
4260 v->ForcedOutputLinkBPP[k],
4264 v->DSCInputBitPerComponent[k],
4265 v->NumberOfDSCSlices[k],
4266 v->AudioSampleRate[k],
4267 v->AudioSampleLayout[k],
4268 v->ODMCombineEnablePerState[i][k]);
4270 v->OutputBppPerState[i][k] = v->Outbpp;
4271 // TODO: Need some other way to handle this nonsense
4272 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4276 v->OutputBppPerState[i][k] = 0;
4280 for (i = 0; i < v->soc.num_states; i++) {
4281 v->DIOSupport[i] = true;
4282 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4283 if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
4284 && (v->OutputBppPerState[i][k] == 0
4285 || (v->OutputFormat[k] == dm_420 && v->Interlace[k] == true && v->ProgressiveToInterlaceUnitInOPP == true))) {
4286 v->DIOSupport[i] = false;
4291 for (i = 0; i < v->soc.num_states; ++i) {
4292 v->ODMCombine4To1SupportCheckOK[i] = true;
4293 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4294 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4295 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) {
4296 v->ODMCombine4To1SupportCheckOK[i] = false;
4301 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4303 for (i = 0; i < v->soc.num_states; i++) {
4304 v->NotEnoughDSCUnits[i] = false;
4305 v->TotalDSCUnitsRequired = 0.0;
4306 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4307 if (v->RequiresDSC[i][k] == true) {
4308 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4309 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4310 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4311 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4313 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4317 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4318 v->NotEnoughDSCUnits[i] = true;
4321 /*DSC Delay per state*/
4323 for (i = 0; i < v->soc.num_states; i++) {
4324 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4325 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4328 v->BPP = v->OutputBppPerState[i][k];
4330 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4331 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4332 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4333 v->DSCInputBitPerComponent[k],
4335 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4336 v->NumberOfDSCSlices[k],
4338 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4339 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4340 v->DSCDelayPerState[i][k] = 2.0
4341 * dscceComputeDelay(
4342 v->DSCInputBitPerComponent[k],
4344 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4345 v->NumberOfDSCSlices[k] / 2,
4347 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4349 v->DSCDelayPerState[i][k] = 4.0
4350 * (dscceComputeDelay(
4351 v->DSCInputBitPerComponent[k],
4353 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4354 v->NumberOfDSCSlices[k] / 4,
4356 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4358 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4360 v->DSCDelayPerState[i][k] = 0.0;
4363 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4364 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4365 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4366 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4372 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4374 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4375 for (j = 0; j <= 1; ++j) {
4376 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4377 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4378 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4379 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4382 CalculateSwathAndDETConfiguration(
4384 v->NumberOfActivePlanes,
4385 v->DETBufferSizeInKByte[0],
4386 v->MaximumSwathWidthLuma,
4387 v->MaximumSwathWidthChroma,
4389 v->SourcePixelFormat,
4397 v->Read256BlockHeightY,
4398 v->Read256BlockHeightC,
4399 v->Read256BlockWidthY,
4400 v->Read256BlockWidthC,
4401 v->ODMCombineEnableThisState,
4402 v->BlendingAndTiming,
4405 v->BytePerPixelInDETY,
4406 v->BytePerPixelInDETC,
4410 v->NoOfDPPThisState,
4411 v->swath_width_luma_ub_this_state,
4412 v->swath_width_chroma_ub_this_state,
4413 v->SwathWidthYThisState,
4414 v->SwathWidthCThisState,
4415 v->SwathHeightYThisState,
4416 v->SwathHeightCThisState,
4417 v->DETBufferSizeYThisState,
4418 v->DETBufferSizeCThisState,
4420 &v->ViewportSizeSupport[i][j]);
4422 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4423 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4424 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4425 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4426 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4427 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4428 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4429 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4430 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4435 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4436 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4439 for (i = 0; i < v->soc.num_states; i++) {
4440 for (j = 0; j < 2; j++) {
4441 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4442 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4443 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4444 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4445 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4446 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4447 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4448 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4449 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4452 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4453 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4454 if (v->DCCEnable[k] == true) {
4455 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4459 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4460 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
4461 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4463 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
4464 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
4465 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4467 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4468 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4471 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4474 v->Read256BlockHeightC[k],
4475 v->Read256BlockWidthY[k],
4476 v->SourcePixelFormat[k],
4477 v->SurfaceTiling[k],
4478 v->BytePerPixelC[k],
4480 v->SwathWidthCThisState[k],
4481 v->ViewportHeightChroma[k],
4484 v->HostVMMaxNonCachedPageTableLevels,
4485 v->GPUVMMinPageSize,
4486 v->HostVMMinPageSize,
4487 v->PTEBufferSizeInRequestsForChroma,
4490 &v->MacroTileWidthC[k],
4492 &v->DPTEBytesPerRowC,
4493 &v->PTEBufferSizeNotExceededC[i][j][k],
4495 &v->dpte_row_height_chroma[k],
4499 &v->meta_row_height_chroma[k],
4506 &v->dummyinteger11);
4508 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4513 v->ProgressiveToInterlaceUnitInOPP,
4514 v->SwathHeightCThisState[k],
4515 v->ViewportYStartC[k],
4519 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4520 v->PTEBufferSizeInRequestsForChroma = 0;
4521 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4522 v->MetaRowBytesC = 0.0;
4523 v->DPTEBytesPerRowC = 0.0;
4524 v->PrefetchLinesC[i][j][k] = 0.0;
4525 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4527 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4530 v->Read256BlockHeightY[k],
4531 v->Read256BlockWidthY[k],
4532 v->SourcePixelFormat[k],
4533 v->SurfaceTiling[k],
4534 v->BytePerPixelY[k],
4536 v->SwathWidthYThisState[k],
4537 v->ViewportHeight[k],
4540 v->HostVMMaxNonCachedPageTableLevels,
4541 v->GPUVMMinPageSize,
4542 v->HostVMMinPageSize,
4543 v->PTEBufferSizeInRequestsForLuma,
4545 v->DCCMetaPitchY[k],
4546 &v->MacroTileWidthY[k],
4548 &v->DPTEBytesPerRowY,
4549 &v->PTEBufferSizeNotExceededY[i][j][k],
4551 &v->dpte_row_height[k],
4555 &v->meta_row_height[k],
4557 &v->dpte_group_bytes[k],
4563 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4568 v->ProgressiveToInterlaceUnitInOPP,
4569 v->SwathHeightYThisState[k],
4570 v->ViewportYStartY[k],
4573 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4574 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4575 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4577 CalculateRowBandwidth(
4579 v->SourcePixelFormat[k],
4583 v->HTotal[k] / v->PixelClock[k],
4586 v->meta_row_height[k],
4587 v->meta_row_height_chroma[k],
4588 v->DPTEBytesPerRowY,
4589 v->DPTEBytesPerRowC,
4590 v->dpte_row_height[k],
4591 v->dpte_row_height_chroma[k],
4592 &v->meta_row_bandwidth[i][j][k],
4593 &v->dpte_row_bandwidth[i][j][k]);
4595 v->UrgLatency[i] = CalculateUrgentLatency(
4596 v->UrgentLatencyPixelDataOnly,
4597 v->UrgentLatencyPixelMixedWithVMData,
4598 v->UrgentLatencyVMDataOnly,
4599 v->DoUrgentLatencyAdjustment,
4600 v->UrgentLatencyAdjustmentFabricClockComponent,
4601 v->UrgentLatencyAdjustmentFabricClockReference,
4602 v->FabricClockPerState[i]);
4604 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4605 CalculateUrgentBurstFactor(
4606 v->swath_width_luma_ub_this_state[k],
4607 v->swath_width_chroma_ub_this_state[k],
4608 v->DETBufferSizeInKByte[0],
4609 v->SwathHeightYThisState[k],
4610 v->SwathHeightCThisState[k],
4611 v->HTotal[k] / v->PixelClock[k],
4613 v->CursorBufferSize,
4614 v->CursorWidth[k][0],
4618 v->BytePerPixelInDETY[k],
4619 v->BytePerPixelInDETC[k],
4620 v->DETBufferSizeYThisState[k],
4621 v->DETBufferSizeCThisState[k],
4622 &v->UrgentBurstFactorCursor[k],
4623 &v->UrgentBurstFactorLuma[k],
4624 &v->UrgentBurstFactorChroma[k],
4625 &NotUrgentLatencyHiding[k]);
4628 v->NotUrgentLatencyHiding[i][j] = false;
4629 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4630 if (NotUrgentLatencyHiding[k]) {
4631 v->NotUrgentLatencyHiding[i][j] = true;
4635 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4636 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4637 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4638 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4641 v->TotalVActivePixelBandwidth[i][j] = 0;
4642 v->TotalVActiveCursorBandwidth[i][j] = 0;
4643 v->TotalMetaRowBandwidth[i][j] = 0;
4644 v->TotalDPTERowBandwidth[i][j] = 0;
4645 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4646 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4647 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4648 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4649 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4652 CalculateDCFCLKDeepSleep(
4654 v->NumberOfActivePlanes,
4659 v->SwathWidthYThisState,
4660 v->SwathWidthCThisState,
4661 v->NoOfDPPThisState,
4666 v->PSCL_FACTOR_CHROMA,
4667 v->RequiredDPPCLKThisState,
4668 v->ReadBandwidthLuma,
4669 v->ReadBandwidthChroma,
4671 &v->ProjectedDCFCLKDeepSleep[i][j]);
4675 //Calculate Return BW
4677 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4678 for (j = 0; j <= 1; ++j) {
4679 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4680 if (v->BlendingAndTiming[k] == k) {
4681 if (v->WritebackEnable[k] == true) {
4682 v->WritebackDelayTime[k] = v->WritebackLatency
4683 + CalculateWriteBackDelay(
4684 v->WritebackPixelFormat[k],
4685 v->WritebackHRatio[k],
4686 v->WritebackVRatio[k],
4687 v->WritebackVTaps[k],
4688 v->WritebackDestinationWidth[k],
4689 v->WritebackDestinationHeight[k],
4690 v->WritebackSourceHeight[k],
4691 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4693 v->WritebackDelayTime[k] = 0.0;
4695 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4696 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4697 v->WritebackDelayTime[k] = dml_max(
4698 v->WritebackDelayTime[k],
4700 + CalculateWriteBackDelay(
4701 v->WritebackPixelFormat[m],
4702 v->WritebackHRatio[m],
4703 v->WritebackVRatio[m],
4704 v->WritebackVTaps[m],
4705 v->WritebackDestinationWidth[m],
4706 v->WritebackDestinationHeight[m],
4707 v->WritebackSourceHeight[m],
4708 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4713 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4714 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4715 if (v->BlendingAndTiming[k] == m) {
4716 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4720 v->MaxMaxVStartup[i][j] = 0;
4721 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4722 v->MaximumVStartup[i][j][k] = v->VTotal[k] - v->VActive[k]
4723 - dml_max(1.0, dml_ceil(1.0 * v->WritebackDelayTime[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0));
4724 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4729 ReorderingBytes = v->NumberOfChannels
4731 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4732 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4733 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4734 v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
4736 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4737 for (j = 0; j <= 1; ++j) {
4738 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4742 if (v->UseMinimumRequiredDCFCLK == true) {
4745 v->MaxInterDCNTileRepeaters,
4747 v->FinalDRAMClockChangeLatency,
4748 v->SREnterPlusExitTime,
4750 v->RoundTripPingLatencyCycles,
4752 v->PixelChunkSizeInKByte,
4755 v->GPUVMMaxPageTableLevels,
4757 v->NumberOfActivePlanes,
4758 v->HostVMMinPageSize,
4759 v->HostVMMaxNonCachedPageTableLevels,
4760 v->DynamicMetadataVMEnabled,
4761 v->ImmediateFlipRequirement[0],
4762 v->ProgressiveToInterlaceUnitInOPP,
4763 v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
4764 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4765 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4766 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
4769 v->DynamicMetadataTransmittedBytes,
4770 v->DynamicMetadataLinesBeforeActiveRequired,
4776 v->ProjectedDCFCLKDeepSleep,
4778 v->TotalVActivePixelBandwidth,
4779 v->TotalVActiveCursorBandwidth,
4780 v->TotalMetaRowBandwidth,
4781 v->TotalDPTERowBandwidth,
4782 v->TotalNumberOfActiveDPP,
4783 v->TotalNumberOfDCCActiveDPP,
4784 v->dpte_group_bytes,
4787 v->swath_width_luma_ub_all_states,
4788 v->swath_width_chroma_ub_all_states,
4793 v->PDEAndMetaPTEBytesPerFrame,
4796 v->DynamicMetadataEnable,
4797 v->VActivePixelBandwidth,
4798 v->VActiveCursorBandwidth,
4799 v->ReadBandwidthLuma,
4800 v->ReadBandwidthChroma,
4804 if (v->ClampMinDCFCLK) {
4805 /* Clamp calculated values to actual minimum */
4806 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4807 for (j = 0; j <= 1; ++j) {
4808 if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
4809 v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
4816 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4817 for (j = 0; j <= 1; ++j) {
4818 v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
4819 v->ReturnBusWidth * v->DCFCLKState[i][j],
4820 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth,
4821 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
4822 if (v->HostVMEnable != true) {
4823 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
4826 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j]
4827 * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
4832 //Re-ordering Buffer Support Check
4834 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4835 for (j = 0; j <= 1; ++j) {
4836 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
4837 > (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
4838 v->ROBSupport[i][j] = true;
4840 v->ROBSupport[i][j] = false;
4845 //Vertical Active BW support check
4847 MaxTotalVActiveRDBandwidth = 0;
4848 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4849 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4852 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4853 for (j = 0; j <= 1; ++j) {
4854 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
4855 v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
4856 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
4858 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
4859 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
4861 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
4868 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4869 for (j = 0; j <= 1; ++j) {
4870 int NextPrefetchModeState = MinPrefetchMode;
4872 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
4874 v->BandwidthWithoutPrefetchSupported[i][j] = true;
4875 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + v->TotalDPTERowBandwidth[i][j]
4876 > v->ReturnBWPerState[i][j] || v->NotUrgentLatencyHiding[i][j]) {
4877 v->BandwidthWithoutPrefetchSupported[i][j] = false;
4880 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4881 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4882 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4883 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4884 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4885 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4886 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4887 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4888 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4889 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4890 v->ODMCombineEnabled[k] = v->ODMCombineEnablePerState[i][k];
4893 v->ExtraLatency = CalculateExtraLatency(
4894 v->RoundTripPingLatencyCycles,
4896 v->DCFCLKState[i][j],
4897 v->TotalNumberOfActiveDPP[i][j],
4898 v->PixelChunkSizeInKByte,
4899 v->TotalNumberOfDCCActiveDPP[i][j],
4901 v->ReturnBWPerState[i][j],
4904 v->NumberOfActivePlanes,
4905 v->NoOfDPPThisState,
4906 v->dpte_group_bytes,
4907 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4908 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4909 v->HostVMMinPageSize,
4910 v->HostVMMaxNonCachedPageTableLevels);
4912 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4914 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
4915 v->MaxVStartup = v->NextMaxVStartup;
4917 v->TWait = CalculateTWait(v->PrefetchModePerState[i][j], v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
4919 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4920 Pipe myPipe = { 0 };
4922 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
4923 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
4924 myPipe.PixelClock = v->PixelClock[k];
4925 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
4926 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
4927 myPipe.ScalerEnabled = v->ScalerEnabled[k];
4928 myPipe.SourceScan = v->SourceScan[k];
4929 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
4930 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
4931 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
4932 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
4933 myPipe.InterlaceEnable = v->Interlace[k];
4934 myPipe.NumberOfCursors = v->NumberOfCursors[k];
4935 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
4936 myPipe.HTotal = v->HTotal[k];
4937 myPipe.DCCEnable = v->DCCEnable[k];
4938 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
4940 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
4942 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4943 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4945 v->DSCDelayPerState[i][k],
4946 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
4948 v->DPPCLKDelaySCLLBOnly,
4949 v->DPPCLKDelayCNVCCursor,
4950 v->DISPCLKDelaySubtotal,
4951 v->SwathWidthYThisState[k] / v->HRatio[k],
4953 v->MaxInterDCNTileRepeaters,
4954 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
4955 v->MaximumVStartup[i][j][k],
4956 v->GPUVMMaxPageTableLevels,
4959 v->HostVMMaxNonCachedPageTableLevels,
4960 v->HostVMMinPageSize,
4961 v->DynamicMetadataEnable[k],
4962 v->DynamicMetadataVMEnabled,
4963 v->DynamicMetadataLinesBeforeActiveRequired[k],
4964 v->DynamicMetadataTransmittedBytes[k],
4968 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4969 v->MetaRowBytes[i][j][k],
4970 v->DPTEBytesPerRow[i][j][k],
4971 v->PrefetchLinesY[i][j][k],
4972 v->SwathWidthYThisState[k],
4973 v->BytePerPixelY[k],
4976 v->PrefetchLinesC[i][j][k],
4977 v->SwathWidthCThisState[k],
4978 v->BytePerPixelC[k],
4981 v->swath_width_luma_ub_this_state[k],
4982 v->swath_width_chroma_ub_this_state[k],
4983 v->SwathHeightYThisState[k],
4984 v->SwathHeightCThisState[k],
4986 v->ProgressiveToInterlaceUnitInOPP,
4987 &v->DSTXAfterScaler[k],
4988 &v->DSTYAfterScaler[k],
4989 &v->LineTimesForPrefetch[k],
4991 &v->LinesForMetaPTE[k],
4992 &v->LinesForMetaAndDPTERow[k],
4993 &v->VRatioPreY[i][j][k],
4994 &v->VRatioPreC[i][j][k],
4995 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
4996 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
4997 &v->NoTimeForDynamicMetadata[i][j][k],
4999 &v->prefetch_vmrow_bw[k],
5002 &v->VUpdateOffsetPix[k],
5003 &v->VUpdateWidthPix[k],
5004 &v->VReadyOffsetPix[k]);
5007 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5008 CalculateUrgentBurstFactor(
5009 v->swath_width_luma_ub_this_state[k],
5010 v->swath_width_chroma_ub_this_state[k],
5011 v->DETBufferSizeInKByte[0],
5012 v->SwathHeightYThisState[k],
5013 v->SwathHeightCThisState[k],
5014 v->HTotal[k] / v->PixelClock[k],
5016 v->CursorBufferSize,
5017 v->CursorWidth[k][0],
5019 v->VRatioPreY[i][j][k],
5020 v->VRatioPreC[i][j][k],
5021 v->BytePerPixelInDETY[k],
5022 v->BytePerPixelInDETC[k],
5023 v->DETBufferSizeYThisState[k],
5024 v->DETBufferSizeCThisState[k],
5025 &v->UrgentBurstFactorCursorPre[k],
5026 &v->UrgentBurstFactorLumaPre[k],
5027 &v->UrgentBurstFactorChroma[k],
5028 &v->NoUrgentLatencyHidingPre[k]);
5031 v->MaximumReadBandwidthWithPrefetch = 0.0;
5032 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5033 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k])
5034 * v->VRatioPreY[i][j][k];
5036 v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch
5038 v->VActivePixelBandwidth[i][j][k],
5039 v->VActiveCursorBandwidth[i][j][k]
5040 + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]),
5041 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5043 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
5044 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5045 * v->UrgentBurstFactorChromaPre[k])
5046 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5049 v->NotEnoughUrgentLatencyHidingPre = false;
5050 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5051 if (v->NoUrgentLatencyHidingPre[k] == true) {
5052 v->NotEnoughUrgentLatencyHidingPre = true;
5056 v->PrefetchSupported[i][j] = true;
5057 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5058 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5059 v->PrefetchSupported[i][j] = false;
5061 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5062 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5063 || v->NoTimeForPrefetch[i][j][k] == true) {
5064 v->PrefetchSupported[i][j] = false;
5068 v->DynamicMetadataSupported[i][j] = true;
5069 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5070 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5071 v->DynamicMetadataSupported[i][j] = false;
5075 v->VRatioInPrefetchSupported[i][j] = true;
5076 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5077 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5078 v->VRatioInPrefetchSupported[i][j] = false;
5081 v->AnyLinesForVMOrRowTooLarge = false;
5082 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5083 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5084 v->AnyLinesForVMOrRowTooLarge = true;
5088 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5089 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5090 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5091 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5093 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5095 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
5096 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5097 * v->UrgentBurstFactorChromaPre[k])
5098 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5100 v->TotImmediateFlipBytes = 0.0;
5101 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5102 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k]
5103 + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k];
5106 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5107 CalculateFlipSchedule(
5109 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
5110 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
5113 v->GPUVMMaxPageTableLevels,
5115 v->HostVMMaxNonCachedPageTableLevels,
5117 v->HostVMMinPageSize,
5118 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5119 v->MetaRowBytes[i][j][k],
5120 v->DPTEBytesPerRow[i][j][k],
5121 v->BandwidthAvailableForImmediateFlip,
5122 v->TotImmediateFlipBytes,
5123 v->SourcePixelFormat[k],
5124 v->HTotal[k] / v->PixelClock[k],
5129 v->dpte_row_height[k],
5130 v->meta_row_height[k],
5131 v->dpte_row_height_chroma[k],
5132 v->meta_row_height_chroma[k],
5133 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5134 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5135 &v->final_flip_bw[k],
5136 &v->ImmediateFlipSupportedForPipe[k]);
5138 v->total_dcn_read_bw_with_flip = 0.0;
5139 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5140 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5142 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5143 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5144 + v->VActiveCursorBandwidth[i][j][k],
5146 * (v->final_flip_bw[k]
5147 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5148 * v->UrgentBurstFactorLumaPre[k]
5149 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5150 * v->UrgentBurstFactorChromaPre[k])
5151 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5153 v->ImmediateFlipSupportedForState[i][j] = true;
5154 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5155 v->ImmediateFlipSupportedForState[i][j] = false;
5157 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5158 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5159 v->ImmediateFlipSupportedForState[i][j] = false;
5163 v->ImmediateFlipSupportedForState[i][j] = false;
5165 if (v->MaxVStartup <= 13 || v->AnyLinesForVMOrRowTooLarge == false) {
5166 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5167 NextPrefetchModeState = NextPrefetchModeState + 1;
5169 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5171 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5172 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5173 || v->ImmediateFlipSupportedForState[i][j] == true))
5174 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5176 CalculateWatermarksAndDRAMSpeedChangeSupport(
5178 v->PrefetchModePerState[i][j],
5179 v->NumberOfActivePlanes,
5180 v->MaxLineBufferLines,
5182 v->DPPOutputBufferPixels,
5183 v->DETBufferSizeInKByte[0],
5184 v->WritebackInterfaceBufferSize,
5185 v->DCFCLKState[i][j],
5186 v->ReturnBWPerState[i][j],
5188 v->dpte_group_bytes,
5192 v->WritebackLatency,
5193 v->WritebackChunkSize,
5194 v->SOCCLKPerState[i],
5195 v->FinalDRAMClockChangeLatency,
5197 v->SREnterPlusExitTime,
5198 v->ProjectedDCFCLKDeepSleep[i][j],
5199 v->NoOfDPPThisState,
5201 v->RequiredDPPCLKThisState,
5202 v->DETBufferSizeYThisState,
5203 v->DETBufferSizeCThisState,
5204 v->SwathHeightYThisState,
5205 v->SwathHeightCThisState,
5207 v->SwathWidthYThisState,
5208 v->SwathWidthCThisState,
5217 v->BlendingAndTiming,
5218 v->BytePerPixelInDETY,
5219 v->BytePerPixelInDETC,
5223 v->WritebackPixelFormat,
5224 v->WritebackDestinationWidth,
5225 v->WritebackDestinationHeight,
5226 v->WritebackSourceHeight,
5227 &v->DRAMClockChangeSupport[i][j],
5228 &v->UrgentWatermark,
5229 &v->WritebackUrgentWatermark,
5230 &v->DRAMClockChangeWatermark,
5231 &v->WritebackDRAMClockChangeWatermark,
5232 &v->StutterExitWatermark,
5233 &v->StutterEnterPlusExitWatermark,
5234 &v->MinActiveDRAMClockChangeLatencySupported);
5238 /*PTE Buffer Size Check*/
5240 for (i = 0; i < v->soc.num_states; i++) {
5241 for (j = 0; j < 2; j++) {
5242 v->PTEBufferSizeNotExceeded[i][j] = true;
5243 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5244 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5245 v->PTEBufferSizeNotExceeded[i][j] = false;
5250 /*Cursor Support Check*/
5252 v->CursorSupport = true;
5253 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5254 if (v->CursorWidth[k][0] > 0.0) {
5255 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5256 v->CursorSupport = false;
5260 /*Valid Pitch Check*/
5262 v->PitchSupport = true;
5263 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5264 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5265 if (v->DCCEnable[k] == true) {
5266 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5268 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5270 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
5271 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_mono_8) {
5272 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5273 if (v->DCCEnable[k] == true) {
5274 v->AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 64.0 * v->Read256BlockWidthC[k]);
5276 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5279 v->AlignedCPitch[k] = v->PitchC[k];
5280 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5282 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k]
5283 || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5284 v->PitchSupport = false;
5288 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5289 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k])
5290 ViewportExceedsSurface = true;
5292 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5293 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
5294 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5295 ViewportExceedsSurface = true;
5299 /*Mode Support, Voltage State and SOC Configuration*/
5301 for (i = v->soc.num_states - 1; i >= 0; i--) {
5302 for (j = 0; j < 2; j++) {
5303 if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
5304 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
5305 && v->NotEnoughDSCUnits[i] == 0
5306 && v->DTBCLKRequiredMoreThanSupported[i] == 0
5307 && v->ROBSupport[i][j] == 1 && v->DISPCLK_DPPCLK_Support[i][j] == 1 && v->TotalAvailablePipesSupport[i][j] == 1
5308 && EnoughWritebackUnits == 1 && WritebackModeSupport == 1
5309 && v->WritebackLatencySupport == 1 && v->WritebackScaleRatioAndTapsSupport == 1 && v->CursorSupport == 1 && v->PitchSupport == 1
5310 && ViewportExceedsSurface == 0 && v->PrefetchSupported[i][j] == 1 && v->DynamicMetadataSupported[i][j] == 1
5311 && v->TotalVerticalActiveBandwidthSupport[i][j] == 1 && v->VRatioInPrefetchSupported[i][j] == 1
5312 && v->PTEBufferSizeNotExceeded[i][j] == 1 && v->NonsupportedDSCInputBPC == 0
5313 && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5314 || v->ImmediateFlipSupportedForState[i][j] == true)) {
5315 v->ModeSupport[i][j] = true;
5317 v->ModeSupport[i][j] = false;
5322 unsigned int MaximumMPCCombine = 0;
5323 for (i = v->soc.num_states; i >= 0; i--) {
5324 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5325 v->VoltageLevel = i;
5326 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5327 if (v->ModeSupport[i][1] == true) {
5328 MaximumMPCCombine = 1;
5330 MaximumMPCCombine = 0;
5334 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5335 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5336 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5337 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5339 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5340 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5341 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5342 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5343 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5344 v->maxMpcComb = MaximumMPCCombine;
5348 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5349 struct display_mode_lib *mode_lib,
5350 unsigned int PrefetchMode,
5351 unsigned int NumberOfActivePlanes,
5352 unsigned int MaxLineBufferLines,
5353 unsigned int LineBufferSize,
5354 unsigned int DPPOutputBufferPixels,
5355 unsigned int DETBufferSizeInKByte,
5356 unsigned int WritebackInterfaceBufferSize,
5360 unsigned int dpte_group_bytes[],
5361 unsigned int MetaChunkSize,
5362 double UrgentLatency,
5363 double ExtraLatency,
5364 double WritebackLatency,
5365 double WritebackChunkSize,
5367 double DRAMClockChangeLatency,
5369 double SREnterPlusExitTime,
5370 double DCFCLKDeepSleep,
5371 unsigned int DPPPerPlane[],
5374 unsigned int DETBufferSizeY[],
5375 unsigned int DETBufferSizeC[],
5376 unsigned int SwathHeightY[],
5377 unsigned int SwathHeightC[],
5378 unsigned int LBBitPerPixel[],
5379 double SwathWidthY[],
5380 double SwathWidthC[],
5382 double HRatioChroma[],
5383 unsigned int vtaps[],
5384 unsigned int VTAPsChroma[],
5386 double VRatioChroma[],
5387 unsigned int HTotal[],
5388 double PixelClock[],
5389 unsigned int BlendingAndTiming[],
5390 double BytePerPixelDETY[],
5391 double BytePerPixelDETC[],
5392 double DSTXAfterScaler[],
5393 double DSTYAfterScaler[],
5394 bool WritebackEnable[],
5395 enum source_format_class WritebackPixelFormat[],
5396 double WritebackDestinationWidth[],
5397 double WritebackDestinationHeight[],
5398 double WritebackSourceHeight[],
5399 enum clock_change_support *DRAMClockChangeSupport,
5400 double *UrgentWatermark,
5401 double *WritebackUrgentWatermark,
5402 double *DRAMClockChangeWatermark,
5403 double *WritebackDRAMClockChangeWatermark,
5404 double *StutterExitWatermark,
5405 double *StutterEnterPlusExitWatermark,
5406 double *MinActiveDRAMClockChangeLatencySupported)
5408 double EffectiveLBLatencyHidingY = 0;
5409 double EffectiveLBLatencyHidingC = 0;
5410 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5411 double LinesInDETC = 0;
5412 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5413 unsigned int LinesInDETCRoundedDownToSwath = 0;
5414 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5415 double FullDETBufferingTimeC = 0;
5416 double ActiveDRAMClockChangeLatencyMarginY = 0;
5417 double ActiveDRAMClockChangeLatencyMarginC = 0;
5418 double WritebackDRAMClockChangeLatencyMargin = 0;
5419 double PlaneWithMinActiveDRAMClockChangeMargin = 0;
5420 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 0;
5421 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5422 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5423 double WritebackDRAMClockChangeLatencyHiding = 0;
5426 mode_lib->vba.TotalActiveDPP = 0;
5427 mode_lib->vba.TotalDCCActiveDPP = 0;
5428 for (k = 0; k < NumberOfActivePlanes; ++k) {
5429 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5430 if (DCCEnable[k] == true) {
5431 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5435 *UrgentWatermark = UrgentLatency + ExtraLatency;
5437 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5439 mode_lib->vba.TotalActiveWriteback = 0;
5440 for (k = 0; k < NumberOfActivePlanes; ++k) {
5441 if (WritebackEnable[k] == true) {
5442 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5446 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5447 *WritebackUrgentWatermark = WritebackLatency;
5449 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5452 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5453 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5455 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5458 for (k = 0; k < NumberOfActivePlanes; ++k) {
5460 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5462 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5464 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5466 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5468 LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5469 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5470 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5471 if (BytePerPixelDETC[k] > 0) {
5472 LinesInDETC = mode_lib->vba.DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5473 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5474 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5477 FullDETBufferingTimeC = 999999;
5480 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5482 if (NumberOfActivePlanes > 1) {
5483 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5486 if (BytePerPixelDETC[k] > 0) {
5487 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5489 if (NumberOfActivePlanes > 1) {
5490 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5492 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5494 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5497 if (WritebackEnable[k] == true) {
5499 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5500 if (WritebackPixelFormat[k] == dm_444_64) {
5501 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5503 if (mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
5504 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2;
5506 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - mode_lib->vba.WritebackDRAMClockChangeWatermark;
5507 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5511 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5512 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5513 for (k = 0; k < NumberOfActivePlanes; ++k) {
5514 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5515 mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5516 if (BlendingAndTiming[k] == k) {
5517 PlaneWithMinActiveDRAMClockChangeMargin = k;
5519 for (j = 0; j < NumberOfActivePlanes; ++j) {
5520 if (BlendingAndTiming[k] == j) {
5521 PlaneWithMinActiveDRAMClockChangeMargin = j;
5528 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5530 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5531 for (k = 0; k < NumberOfActivePlanes; ++k) {
5532 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5533 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5537 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5538 for (k = 0; k < NumberOfActivePlanes; ++k) {
5539 if (BlendingAndTiming[k] == k) {
5540 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5544 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
5545 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5546 } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) {
5547 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5549 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5552 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5553 for (k = 0; k < NumberOfActivePlanes; ++k) {
5554 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5555 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k];
5556 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (HTotal[k] / PixelClock[k]) / VRatio[k];
5560 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5561 *StutterEnterPlusExitWatermark = dml_max(SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane);
5565 static void CalculateDCFCLKDeepSleep(
5566 struct display_mode_lib *mode_lib,
5567 unsigned int NumberOfActivePlanes,
5568 int BytePerPixelY[],
5569 int BytePerPixelC[],
5571 double VRatioChroma[],
5572 double SwathWidthY[],
5573 double SwathWidthC[],
5574 unsigned int DPPPerPlane[],
5576 double HRatioChroma[],
5577 double PixelClock[],
5578 double PSCL_THROUGHPUT[],
5579 double PSCL_THROUGHPUT_CHROMA[],
5581 double ReadBandwidthLuma[],
5582 double ReadBandwidthChroma[],
5584 double *DCFCLKDeepSleep)
5586 double DisplayPipeLineDeliveryTimeLuma = 0;
5587 double DisplayPipeLineDeliveryTimeChroma = 0;
5589 double ReadBandwidth = 0.0;
5591 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5592 for (k = 0; k < NumberOfActivePlanes; ++k) {
5594 if (VRatio[k] <= 1) {
5595 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5597 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5599 if (BytePerPixelC[k] == 0) {
5600 DisplayPipeLineDeliveryTimeChroma = 0;
5602 if (VRatioChroma[k] <= 1) {
5603 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5605 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5609 if (BytePerPixelC[k] > 0) {
5610 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(1.1 * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 1.1 * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5612 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5614 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(mode_lib->vba.DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5618 for (k = 0; k < NumberOfActivePlanes; ++k) {
5619 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5622 *DCFCLKDeepSleep = dml_max(8.0, ReadBandwidth / ReturnBusWidth);
5624 for (k = 0; k < NumberOfActivePlanes; ++k) {
5625 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5629 static void CalculateUrgentBurstFactor(
5630 long swath_width_luma_ub,
5631 long swath_width_chroma_ub,
5632 unsigned int DETBufferSizeInKByte,
5633 unsigned int SwathHeightY,
5634 unsigned int SwathHeightC,
5636 double UrgentLatency,
5637 double CursorBufferSize,
5638 unsigned int CursorWidth,
5639 unsigned int CursorBPP,
5642 double BytePerPixelInDETY,
5643 double BytePerPixelInDETC,
5644 double DETBufferSizeY,
5645 double DETBufferSizeC,
5646 double *UrgentBurstFactorCursor,
5647 double *UrgentBurstFactorLuma,
5648 double *UrgentBurstFactorChroma,
5649 bool *NotEnoughUrgentLatencyHiding)
5651 double LinesInDETLuma = 0;
5652 double LinesInDETChroma = 0;
5653 unsigned int LinesInCursorBuffer = 0;
5654 double CursorBufferSizeInTime = 0;
5655 double DETBufferSizeInTimeLuma = 0;
5656 double DETBufferSizeInTimeChroma = 0;
5658 *NotEnoughUrgentLatencyHiding = 0;
5660 if (CursorWidth > 0) {
5661 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5663 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5664 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5665 *NotEnoughUrgentLatencyHiding = 1;
5666 *UrgentBurstFactorCursor = 0;
5668 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5671 *UrgentBurstFactorCursor = 1;
5675 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5677 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5678 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5679 *NotEnoughUrgentLatencyHiding = 1;
5680 *UrgentBurstFactorLuma = 0;
5682 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5685 *UrgentBurstFactorLuma = 1;
5688 if (BytePerPixelInDETC > 0) {
5689 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5691 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5692 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5693 *NotEnoughUrgentLatencyHiding = 1;
5694 *UrgentBurstFactorChroma = 0;
5696 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5699 *UrgentBurstFactorChroma = 1;
5704 static void CalculatePixelDeliveryTimes(
5705 unsigned int NumberOfActivePlanes,
5707 double VRatioChroma[],
5708 double VRatioPrefetchY[],
5709 double VRatioPrefetchC[],
5710 unsigned int swath_width_luma_ub[],
5711 unsigned int swath_width_chroma_ub[],
5712 unsigned int DPPPerPlane[],
5714 double HRatioChroma[],
5715 double PixelClock[],
5716 double PSCL_THROUGHPUT[],
5717 double PSCL_THROUGHPUT_CHROMA[],
5719 int BytePerPixelC[],
5720 enum scan_direction_class SourceScan[],
5721 unsigned int NumberOfCursors[],
5722 unsigned int CursorWidth[][2],
5723 unsigned int CursorBPP[][2],
5724 unsigned int BlockWidth256BytesY[],
5725 unsigned int BlockHeight256BytesY[],
5726 unsigned int BlockWidth256BytesC[],
5727 unsigned int BlockHeight256BytesC[],
5728 double DisplayPipeLineDeliveryTimeLuma[],
5729 double DisplayPipeLineDeliveryTimeChroma[],
5730 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5731 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5732 double DisplayPipeRequestDeliveryTimeLuma[],
5733 double DisplayPipeRequestDeliveryTimeChroma[],
5734 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5735 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5736 double CursorRequestDeliveryTime[],
5737 double CursorRequestDeliveryTimePrefetch[])
5739 double req_per_swath_ub = 0;
5742 for (k = 0; k < NumberOfActivePlanes; ++k) {
5743 if (VRatio[k] <= 1) {
5744 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5746 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5749 if (BytePerPixelC[k] == 0) {
5750 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5752 if (VRatioChroma[k] <= 1) {
5753 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5755 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5759 if (VRatioPrefetchY[k] <= 1) {
5760 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5762 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5765 if (BytePerPixelC[k] == 0) {
5766 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5768 if (VRatioPrefetchC[k] <= 1) {
5769 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5771 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5776 for (k = 0; k < NumberOfActivePlanes; ++k) {
5777 if (SourceScan[k] != dm_vert) {
5778 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5780 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5782 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5783 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5784 if (BytePerPixelC[k] == 0) {
5785 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5786 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5788 if (SourceScan[k] != dm_vert) {
5789 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5791 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5793 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5794 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5798 for (k = 0; k < NumberOfActivePlanes; ++k) {
5799 int cursor_req_per_width = 0;
5800 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5801 if (NumberOfCursors[k] > 0) {
5802 if (VRatio[k] <= 1) {
5803 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5805 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5807 if (VRatioPrefetchY[k] <= 1) {
5808 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5810 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5813 CursorRequestDeliveryTime[k] = 0;
5814 CursorRequestDeliveryTimePrefetch[k] = 0;
5819 static void CalculateMetaAndPTETimes(
5820 int NumberOfActivePlanes,
5823 int MinMetaChunkSizeBytes,
5826 double VRatioChroma[],
5827 double DestinationLinesToRequestRowInVBlank[],
5828 double DestinationLinesToRequestRowInImmediateFlip[],
5830 double PixelClock[],
5831 int BytePerPixelY[],
5832 int BytePerPixelC[],
5833 enum scan_direction_class SourceScan[],
5834 int dpte_row_height[],
5835 int dpte_row_height_chroma[],
5836 int meta_row_width[],
5837 int meta_row_width_chroma[],
5838 int meta_row_height[],
5839 int meta_row_height_chroma[],
5840 int meta_req_width[],
5841 int meta_req_width_chroma[],
5842 int meta_req_height[],
5843 int meta_req_height_chroma[],
5844 int dpte_group_bytes[],
5845 int PTERequestSizeY[],
5846 int PTERequestSizeC[],
5847 int PixelPTEReqWidthY[],
5848 int PixelPTEReqHeightY[],
5849 int PixelPTEReqWidthC[],
5850 int PixelPTEReqHeightC[],
5851 int dpte_row_width_luma_ub[],
5852 int dpte_row_width_chroma_ub[],
5853 double DST_Y_PER_PTE_ROW_NOM_L[],
5854 double DST_Y_PER_PTE_ROW_NOM_C[],
5855 double DST_Y_PER_META_ROW_NOM_L[],
5856 double DST_Y_PER_META_ROW_NOM_C[],
5857 double TimePerMetaChunkNominal[],
5858 double TimePerChromaMetaChunkNominal[],
5859 double TimePerMetaChunkVBlank[],
5860 double TimePerChromaMetaChunkVBlank[],
5861 double TimePerMetaChunkFlip[],
5862 double TimePerChromaMetaChunkFlip[],
5863 double time_per_pte_group_nom_luma[],
5864 double time_per_pte_group_vblank_luma[],
5865 double time_per_pte_group_flip_luma[],
5866 double time_per_pte_group_nom_chroma[],
5867 double time_per_pte_group_vblank_chroma[],
5868 double time_per_pte_group_flip_chroma[])
5870 unsigned int meta_chunk_width = 0;
5871 unsigned int min_meta_chunk_width = 0;
5872 unsigned int meta_chunk_per_row_int = 0;
5873 unsigned int meta_row_remainder = 0;
5874 unsigned int meta_chunk_threshold = 0;
5875 unsigned int meta_chunks_per_row_ub = 0;
5876 unsigned int meta_chunk_width_chroma = 0;
5877 unsigned int min_meta_chunk_width_chroma = 0;
5878 unsigned int meta_chunk_per_row_int_chroma = 0;
5879 unsigned int meta_row_remainder_chroma = 0;
5880 unsigned int meta_chunk_threshold_chroma = 0;
5881 unsigned int meta_chunks_per_row_ub_chroma = 0;
5882 unsigned int dpte_group_width_luma = 0;
5883 unsigned int dpte_groups_per_row_luma_ub = 0;
5884 unsigned int dpte_group_width_chroma = 0;
5885 unsigned int dpte_groups_per_row_chroma_ub = 0;
5888 for (k = 0; k < NumberOfActivePlanes; ++k) {
5889 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5890 if (BytePerPixelC[k] == 0) {
5891 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5893 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
5895 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5896 if (BytePerPixelC[k] == 0) {
5897 DST_Y_PER_META_ROW_NOM_C[k] = 0;
5899 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
5903 for (k = 0; k < NumberOfActivePlanes; ++k) {
5904 if (DCCEnable[k] == true) {
5905 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
5906 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
5907 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5908 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5909 if (SourceScan[k] != dm_vert) {
5910 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5912 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
5914 if (meta_row_remainder <= meta_chunk_threshold) {
5915 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5917 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5919 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5920 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5921 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5922 if (BytePerPixelC[k] == 0) {
5923 TimePerChromaMetaChunkNominal[k] = 0;
5924 TimePerChromaMetaChunkVBlank[k] = 0;
5925 TimePerChromaMetaChunkFlip[k] = 0;
5927 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5928 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5929 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
5930 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
5931 if (SourceScan[k] != dm_vert) {
5932 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
5934 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
5936 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
5937 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5939 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5941 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5942 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5943 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5946 TimePerMetaChunkNominal[k] = 0;
5947 TimePerMetaChunkVBlank[k] = 0;
5948 TimePerMetaChunkFlip[k] = 0;
5949 TimePerChromaMetaChunkNominal[k] = 0;
5950 TimePerChromaMetaChunkVBlank[k] = 0;
5951 TimePerChromaMetaChunkFlip[k] = 0;
5955 for (k = 0; k < NumberOfActivePlanes; ++k) {
5956 if (GPUVMEnable == true) {
5957 if (SourceScan[k] != dm_vert) {
5958 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5960 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5962 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
5963 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5964 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5965 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5966 if (BytePerPixelC[k] == 0) {
5967 time_per_pte_group_nom_chroma[k] = 0;
5968 time_per_pte_group_vblank_chroma[k] = 0;
5969 time_per_pte_group_flip_chroma[k] = 0;
5971 if (SourceScan[k] != dm_vert) {
5972 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5974 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5976 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
5977 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5978 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5979 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5982 time_per_pte_group_nom_luma[k] = 0;
5983 time_per_pte_group_vblank_luma[k] = 0;
5984 time_per_pte_group_flip_luma[k] = 0;
5985 time_per_pte_group_nom_chroma[k] = 0;
5986 time_per_pte_group_vblank_chroma[k] = 0;
5987 time_per_pte_group_flip_chroma[k] = 0;
5992 static void CalculateVMGroupAndRequestTimes(
5993 unsigned int NumberOfActivePlanes,
5995 unsigned int GPUVMMaxPageTableLevels,
5996 unsigned int HTotal[],
5997 int BytePerPixelC[],
5998 double DestinationLinesToRequestVMInVBlank[],
5999 double DestinationLinesToRequestVMInImmediateFlip[],
6001 double PixelClock[],
6002 int dpte_row_width_luma_ub[],
6003 int dpte_row_width_chroma_ub[],
6004 int vm_group_bytes[],
6005 unsigned int dpde0_bytes_per_frame_ub_l[],
6006 unsigned int dpde0_bytes_per_frame_ub_c[],
6007 int meta_pte_bytes_per_frame_ub_l[],
6008 int meta_pte_bytes_per_frame_ub_c[],
6009 double TimePerVMGroupVBlank[],
6010 double TimePerVMGroupFlip[],
6011 double TimePerVMRequestVBlank[],
6012 double TimePerVMRequestFlip[])
6014 int num_group_per_lower_vm_stage = 0;
6015 int num_req_per_lower_vm_stage = 0;
6018 for (k = 0; k < NumberOfActivePlanes; ++k) {
6019 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6020 if (DCCEnable[k] == false) {
6021 if (BytePerPixelC[k] > 0) {
6022 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
6023 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k])
6024 / (double) (vm_group_bytes[k]), 1);
6026 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
6027 / (double) (vm_group_bytes[k]), 1);
6030 if (GPUVMMaxPageTableLevels == 1) {
6031 if (BytePerPixelC[k] > 0) {
6032 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
6033 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k])
6034 / (double) (vm_group_bytes[k]), 1);
6036 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
6037 / (double) (vm_group_bytes[k]), 1);
6040 if (BytePerPixelC[k] > 0) {
6041 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6042 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6043 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6044 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6046 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6047 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6052 if (DCCEnable[k] == false) {
6053 if (BytePerPixelC[k] > 0) {
6054 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6056 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6059 if (GPUVMMaxPageTableLevels == 1) {
6060 if (BytePerPixelC[k] > 0) {
6061 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
6062 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6064 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6067 if (BytePerPixelC[k] > 0) {
6068 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6069 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k]
6070 / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6072 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6073 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6078 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
6079 / num_group_per_lower_vm_stage;
6080 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
6081 / num_group_per_lower_vm_stage;
6082 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
6083 / num_req_per_lower_vm_stage;
6084 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
6085 / num_req_per_lower_vm_stage;
6087 if (GPUVMMaxPageTableLevels > 2) {
6088 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6089 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6090 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6091 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6095 TimePerVMGroupVBlank[k] = 0;
6096 TimePerVMGroupFlip[k] = 0;
6097 TimePerVMRequestVBlank[k] = 0;
6098 TimePerVMRequestFlip[k] = 0;
6103 static void CalculateStutterEfficiency(
6104 int NumberOfActivePlanes,
6105 long ROBBufferSizeInKByte,
6106 double TotalDataReadBandwidth,
6110 bool SynchronizedVBlank,
6112 unsigned int DETBufferSizeY[],
6113 int BytePerPixelY[],
6114 double BytePerPixelDETY[],
6115 double SwathWidthY[],
6118 double DCCRateLuma[],
6119 double DCCRateChroma[],
6122 double PixelClock[],
6124 enum scan_direction_class SourceScan[],
6125 int BlockHeight256BytesY[],
6126 int BlockWidth256BytesY[],
6127 int BlockHeight256BytesC[],
6128 int BlockWidth256BytesC[],
6129 int DCCYMaxUncompressedBlock[],
6130 int DCCCMaxUncompressedBlock[],
6133 bool WritebackEnable[],
6134 double ReadBandwidthPlaneLuma[],
6135 double ReadBandwidthPlaneChroma[],
6136 double meta_row_bw[],
6137 double dpte_row_bw[],
6138 double *StutterEfficiencyNotIncludingVBlank,
6139 double *StutterEfficiency,
6140 double *StutterPeriodOut)
6142 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
6143 double FrameTimeForMinFullDETBufferingTime = 0;
6144 double StutterPeriod = 0;
6145 double AverageReadBandwidth = 0;
6146 double TotalRowReadBandwidth = 0;
6147 double AverageDCCCompressionRate = 0;
6148 double PartOfBurstThatFitsInROB = 0;
6149 double StutterBurstTime = 0;
6150 int TotalActiveWriteback = 0;
6151 double VBlankTime = 0;
6152 double SmallestVBlank = 0;
6153 int BytePerPixelYCriticalPlane = 0;
6154 double SwathWidthYCriticalPlane = 0;
6155 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
6156 double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
6157 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6158 double MaximumEffectiveCompressionLuma = 0;
6159 double MaximumEffectiveCompressionChroma = 0;
6162 for (k = 0; k < NumberOfActivePlanes; ++k) {
6163 LinesInDETY[k] = DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
6164 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
6165 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
6168 StutterPeriod = FullDETBufferingTimeY[0];
6169 FrameTimeForMinFullDETBufferingTime = VTotal[0] * HTotal[0] / PixelClock[0];
6170 BytePerPixelYCriticalPlane = BytePerPixelY[0];
6171 SwathWidthYCriticalPlane = SwathWidthY[0];
6172 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[0]
6173 - (LinesInDETY[0] - LinesInDETYRoundedDownToSwath[0]);
6175 for (k = 0; k < NumberOfActivePlanes; ++k) {
6176 if (FullDETBufferingTimeY[k] < StutterPeriod) {
6177 StutterPeriod = FullDETBufferingTimeY[k];
6178 FrameTimeForMinFullDETBufferingTime = VTotal[k] * HTotal[k] / PixelClock[k];
6179 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6180 SwathWidthYCriticalPlane = SwathWidthY[k];
6181 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k]
6182 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]);
6186 AverageReadBandwidth = 0;
6187 TotalRowReadBandwidth = 0;
6188 for (k = 0; k < NumberOfActivePlanes; ++k) {
6189 if (DCCEnable[k] == true) {
6190 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k])
6191 || (SourceScan[k] != dm_vert
6192 && BlockHeight256BytesY[k] > SwathHeightY[k])
6193 || DCCYMaxUncompressedBlock[k] < 256) {
6194 MaximumEffectiveCompressionLuma = 2;
6196 MaximumEffectiveCompressionLuma = 4;
6198 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(DCCRateLuma[k], MaximumEffectiveCompressionLuma);
6200 if (ReadBandwidthPlaneChroma[k] > 0) {
6201 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6202 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k])
6203 || DCCCMaxUncompressedBlock[k] < 256) {
6204 MaximumEffectiveCompressionChroma = 2;
6206 MaximumEffectiveCompressionChroma = 4;
6208 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneChroma[k] / dml_min(DCCRateChroma[k], MaximumEffectiveCompressionChroma);
6211 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6213 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6216 AverageDCCCompressionRate = TotalDataReadBandwidth / AverageReadBandwidth;
6217 PartOfBurstThatFitsInROB = dml_min(StutterPeriod * TotalDataReadBandwidth, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6218 StutterBurstTime = PartOfBurstThatFitsInROB / AverageDCCCompressionRate / ReturnBW + (StutterPeriod * TotalDataReadBandwidth
6219 - PartOfBurstThatFitsInROB) / (DCFCLK * 64) + StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6220 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6222 TotalActiveWriteback = 0;
6223 for (k = 0; k < NumberOfActivePlanes; ++k) {
6224 if (WritebackEnable[k] == true) {
6225 TotalActiveWriteback = TotalActiveWriteback + 1;
6229 if (TotalActiveWriteback == 0) {
6230 *StutterEfficiencyNotIncludingVBlank = (1
6231 - (SRExitTime + StutterBurstTime) / StutterPeriod) * 100;
6233 *StutterEfficiencyNotIncludingVBlank = 0;
6236 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6237 SmallestVBlank = (VTotal[0] - VActive[0]) * HTotal[0] / PixelClock[0];
6241 for (k = 0; k < NumberOfActivePlanes; ++k) {
6242 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6243 VBlankTime = (VTotal[k] - VActive[k]) * HTotal[k] / PixelClock[k];
6247 SmallestVBlank = dml_min(SmallestVBlank, VBlankTime);
6250 *StutterEfficiency = (*StutterEfficiencyNotIncludingVBlank / 100.0 * (FrameTimeForMinFullDETBufferingTime - SmallestVBlank) + SmallestVBlank) / FrameTimeForMinFullDETBufferingTime * 100;
6252 if (StutterPeriodOut)
6253 *StutterPeriodOut = StutterPeriod;
6256 static void CalculateSwathAndDETConfiguration(
6257 bool ForceSingleDPP,
6258 int NumberOfActivePlanes,
6259 unsigned int DETBufferSizeInKByte,
6260 double MaximumSwathWidthLuma[],
6261 double MaximumSwathWidthChroma[],
6262 enum scan_direction_class SourceScan[],
6263 enum source_format_class SourcePixelFormat[],
6264 enum dm_swizzle_mode SurfaceTiling[],
6265 int ViewportWidth[],
6266 int ViewportHeight[],
6267 int SurfaceWidthY[],
6268 int SurfaceWidthC[],
6269 int SurfaceHeightY[],
6270 int SurfaceHeightC[],
6271 int Read256BytesBlockHeightY[],
6272 int Read256BytesBlockHeightC[],
6273 int Read256BytesBlockWidthY[],
6274 int Read256BytesBlockWidthC[],
6275 enum odm_combine_mode ODMCombineEnabled[],
6276 int BlendingAndTiming[],
6279 double BytePerPixDETY[],
6280 double BytePerPixDETC[],
6283 double HRatioChroma[],
6285 int swath_width_luma_ub[],
6286 int swath_width_chroma_ub[],
6287 double SwathWidth[],
6288 double SwathWidthChroma[],
6291 unsigned int DETBufferSizeY[],
6292 unsigned int DETBufferSizeC[],
6293 bool ViewportSizeSupportPerPlane[],
6294 bool *ViewportSizeSupport)
6296 int MaximumSwathHeightY[DC__NUM_DPP__MAX] = { 0 };
6297 int MaximumSwathHeightC[DC__NUM_DPP__MAX] = { 0 };
6298 int MinimumSwathHeightY = 0;
6299 int MinimumSwathHeightC = 0;
6300 long RoundedUpMaxSwathSizeBytesY = 0;
6301 long RoundedUpMaxSwathSizeBytesC = 0;
6302 long RoundedUpMinSwathSizeBytesY = 0;
6303 long RoundedUpMinSwathSizeBytesC = 0;
6304 long RoundedUpSwathSizeBytesY = 0;
6305 long RoundedUpSwathSizeBytesC = 0;
6306 double SwathWidthSingleDPP[DC__NUM_DPP__MAX] = { 0 };
6307 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX] = { 0 };
6310 CalculateSwathWidth(
6312 NumberOfActivePlanes,
6324 Read256BytesBlockHeightY,
6325 Read256BytesBlockHeightC,
6326 Read256BytesBlockWidthY,
6327 Read256BytesBlockWidthC,
6332 SwathWidthSingleDPP,
6333 SwathWidthSingleDPPChroma,
6336 MaximumSwathHeightY,
6337 MaximumSwathHeightC,
6338 swath_width_luma_ub,
6339 swath_width_chroma_ub);
6341 *ViewportSizeSupport = true;
6342 for (k = 0; k < NumberOfActivePlanes; ++k) {
6343 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32
6344 || SourcePixelFormat[k] == dm_444_16
6345 || SourcePixelFormat[k] == dm_mono_16
6346 || SourcePixelFormat[k] == dm_mono_8
6347 || SourcePixelFormat[k] == dm_rgbe)) {
6348 if (SurfaceTiling[k] == dm_sw_linear
6349 || (SourcePixelFormat[k] == dm_444_64
6350 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6351 && SourceScan[k] != dm_vert)) {
6352 MinimumSwathHeightY = MaximumSwathHeightY[k];
6353 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6354 MinimumSwathHeightY = MaximumSwathHeightY[k];
6356 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6358 MinimumSwathHeightC = MaximumSwathHeightC[k];
6360 if (SurfaceTiling[k] == dm_sw_linear) {
6361 MinimumSwathHeightY = MaximumSwathHeightY[k];
6362 MinimumSwathHeightC = MaximumSwathHeightC[k];
6363 } else if (SourcePixelFormat[k] == dm_rgbe_alpha
6364 && SourceScan[k] == dm_vert) {
6365 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6366 MinimumSwathHeightC = MaximumSwathHeightC[k];
6367 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6368 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6369 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6370 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6371 MinimumSwathHeightY = MaximumSwathHeightY[k];
6372 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6374 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6375 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6379 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6380 * MaximumSwathHeightY[k];
6381 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6382 * MinimumSwathHeightY;
6383 if (SourcePixelFormat[k] == dm_420_10) {
6384 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6385 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6387 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6388 * MaximumSwathHeightC[k];
6389 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6390 * MinimumSwathHeightC;
6391 if (SourcePixelFormat[k] == dm_420_10) {
6392 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6393 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6396 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6397 <= DETBufferSizeInKByte * 1024 / 2) {
6398 SwathHeightY[k] = MaximumSwathHeightY[k];
6399 SwathHeightC[k] = MaximumSwathHeightC[k];
6400 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6401 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6402 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6403 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6404 <= DETBufferSizeInKByte * 1024 / 2) {
6405 SwathHeightY[k] = MinimumSwathHeightY;
6406 SwathHeightC[k] = MaximumSwathHeightC[k];
6407 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6408 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6409 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6410 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6411 <= DETBufferSizeInKByte * 1024 / 2) {
6412 SwathHeightY[k] = MaximumSwathHeightY[k];
6413 SwathHeightC[k] = MinimumSwathHeightC;
6414 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6415 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6417 SwathHeightY[k] = MinimumSwathHeightY;
6418 SwathHeightC[k] = MinimumSwathHeightC;
6419 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6420 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6423 if (SwathHeightC[k] == 0) {
6424 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024;
6425 DETBufferSizeC[k] = 0;
6426 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6427 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 / 2;
6428 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 2;
6430 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 * 2 / 3;
6431 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 3;
6434 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6435 > DETBufferSizeInKByte * 1024 / 2
6436 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6437 || (SwathHeightC[k] > 0
6438 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6439 *ViewportSizeSupport = false;
6440 ViewportSizeSupportPerPlane[k] = false;
6442 ViewportSizeSupportPerPlane[k] = true;
6447 static void CalculateSwathWidth(
6448 bool ForceSingleDPP,
6449 int NumberOfActivePlanes,
6450 enum source_format_class SourcePixelFormat[],
6451 enum scan_direction_class SourceScan[],
6452 unsigned int ViewportWidth[],
6453 unsigned int ViewportHeight[],
6454 unsigned int SurfaceWidthY[],
6455 unsigned int SurfaceWidthC[],
6456 unsigned int SurfaceHeightY[],
6457 unsigned int SurfaceHeightC[],
6458 enum odm_combine_mode ODMCombineEnabled[],
6461 int Read256BytesBlockHeightY[],
6462 int Read256BytesBlockHeightC[],
6463 int Read256BytesBlockWidthY[],
6464 int Read256BytesBlockWidthC[],
6465 int BlendingAndTiming[],
6466 unsigned int HActive[],
6469 double SwathWidthSingleDPPY[],
6470 double SwathWidthSingleDPPC[],
6471 double SwathWidthY[],
6472 double SwathWidthC[],
6473 int MaximumSwathHeightY[],
6474 int MaximumSwathHeightC[],
6475 unsigned int swath_width_luma_ub[],
6476 unsigned int swath_width_chroma_ub[])
6479 long surface_width_ub_l;
6480 long surface_height_ub_l;
6481 long surface_width_ub_c;
6482 long surface_height_ub_c;
6484 for (k = 0; k < NumberOfActivePlanes; ++k) {
6485 enum odm_combine_mode MainPlaneODMCombine = 0;
6486 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6487 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6488 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6489 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6491 if (SourceScan[k] != dm_vert) {
6492 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6494 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6497 MainPlaneODMCombine = ODMCombineEnabled[k];
6498 for (j = 0; j < NumberOfActivePlanes; ++j) {
6499 if (BlendingAndTiming[k] == j) {
6500 MainPlaneODMCombine = ODMCombineEnabled[j];
6504 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6505 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6506 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6507 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6508 } else if (DPPPerPlane[k] == 2) {
6509 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6511 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6514 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6515 SwathWidthC[k] = SwathWidthY[k] / 2;
6516 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6518 SwathWidthC[k] = SwathWidthY[k];
6519 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6522 if (ForceSingleDPP == true) {
6523 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6524 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6527 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6528 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6529 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6530 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6532 if (SourceScan[k] != dm_vert) {
6533 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6534 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6535 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6536 Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6537 if (BytePerPixC[k] > 0) {
6538 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6539 Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6541 swath_width_chroma_ub[k] = 0;
6544 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6545 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6546 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6547 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6548 if (BytePerPixC[k] > 0) {
6549 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6550 Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6552 swath_width_chroma_ub[k] = 0;
6558 static double CalculateExtraLatency(
6559 long RoundTripPingLatencyCycles,
6560 long ReorderingBytes,
6562 int TotalNumberOfActiveDPP,
6563 int PixelChunkSizeInKByte,
6564 int TotalNumberOfDCCActiveDPP,
6569 int NumberOfActivePlanes,
6571 int dpte_group_bytes[],
6572 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6573 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6574 double HostVMMinPageSize,
6575 int HostVMMaxNonCachedPageTableLevels)
6577 double ExtraLatencyBytes = 0;
6578 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6580 TotalNumberOfActiveDPP,
6581 PixelChunkSizeInKByte,
6582 TotalNumberOfDCCActiveDPP,
6586 NumberOfActivePlanes,
6589 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6590 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6592 HostVMMaxNonCachedPageTableLevels);
6594 return (RoundTripPingLatencyCycles + 32) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6597 static double CalculateExtraLatencyBytes(
6598 long ReorderingBytes,
6599 int TotalNumberOfActiveDPP,
6600 int PixelChunkSizeInKByte,
6601 int TotalNumberOfDCCActiveDPP,
6605 int NumberOfActivePlanes,
6607 int dpte_group_bytes[],
6608 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6609 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6610 double HostVMMinPageSize,
6611 int HostVMMaxNonCachedPageTableLevels)
6614 double HostVMInefficiencyFactor = 0;
6615 int HostVMDynamicLevels = 0;
6618 if (GPUVMEnable == true && HostVMEnable == true) {
6619 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6620 if (HostVMMinPageSize < 2048) {
6621 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6622 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6623 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6625 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6628 HostVMInefficiencyFactor = 1;
6629 HostVMDynamicLevels = 0;
6632 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6634 if (GPUVMEnable == true) {
6635 for (k = 0; k < NumberOfActivePlanes; ++k) {
6636 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6643 static double CalculateUrgentLatency(
6644 double UrgentLatencyPixelDataOnly,
6645 double UrgentLatencyPixelMixedWithVMData,
6646 double UrgentLatencyVMDataOnly,
6647 bool DoUrgentLatencyAdjustment,
6648 double UrgentLatencyAdjustmentFabricClockComponent,
6649 double UrgentLatencyAdjustmentFabricClockReference,
6654 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6655 if (DoUrgentLatencyAdjustment == true) {
6656 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6662 static void UseMinimumDCFCLK(
6663 struct display_mode_lib *mode_lib,
6664 int MaxInterDCNTileRepeaters,
6665 int MaxPrefetchMode,
6666 double FinalDRAMClockChangeLatency,
6667 double SREnterPlusExitTime,
6669 int RoundTripPingLatencyCycles,
6670 int ReorderingBytes,
6671 int PixelChunkSizeInKByte,
6674 int GPUVMMaxPageTableLevels,
6676 int NumberOfActivePlanes,
6677 double HostVMMinPageSize,
6678 int HostVMMaxNonCachedPageTableLevels,
6679 bool DynamicMetadataVMEnabled,
6680 enum immediate_flip_requirement ImmediateFlipRequirement,
6681 bool ProgressiveToInterlaceUnitInOPP,
6682 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
6683 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6684 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6685 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
6688 int DynamicMetadataTransmittedBytes[],
6689 int DynamicMetadataLinesBeforeActiveRequired[],
6691 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
6692 double RequiredDISPCLK[][2],
6693 double UrgLatency[],
6694 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
6695 double ProjectedDCFCLKDeepSleep[][2],
6696 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
6697 double TotalVActivePixelBandwidth[][2],
6698 double TotalVActiveCursorBandwidth[][2],
6699 double TotalMetaRowBandwidth[][2],
6700 double TotalDPTERowBandwidth[][2],
6701 unsigned int TotalNumberOfActiveDPP[][2],
6702 unsigned int TotalNumberOfDCCActiveDPP[][2],
6703 int dpte_group_bytes[],
6704 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
6705 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
6706 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
6707 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
6708 int BytePerPixelY[],
6709 int BytePerPixelC[],
6711 double PixelClock[],
6712 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
6713 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
6714 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
6715 bool DynamicMetadataEnable[],
6716 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
6717 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
6718 double ReadBandwidthLuma[],
6719 double ReadBandwidthChroma[],
6720 double DCFCLKPerState[],
6721 double DCFCLKState[][2])
6723 double NormalEfficiency = 0;
6724 double PTEEfficiency = 0;
6725 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
6726 unsigned int i, j, k;
6728 NormalEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6729 : PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
6730 PTEEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
6731 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
6732 for (i = 0; i < mode_lib->soc.num_states; ++i) {
6733 for (j = 0; j <= 1; ++j) {
6734 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
6735 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX] = { 0 };
6736 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX] = { 0 };
6737 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX] = { 0 };
6738 double MinimumTWait = 0;
6739 double NonDPTEBandwidth = 0;
6740 double DPTEBandwidth = 0;
6741 double DCFCLKRequiredForAverageBandwidth = 0;
6742 double ExtraLatencyBytes = 0;
6743 double ExtraLatencyCycles = 0;
6744 double DCFCLKRequiredForPeakBandwidth = 0;
6745 int NoOfDPPState[DC__NUM_DPP__MAX] = { 0 };
6746 double MinimumTvmPlus2Tr0 = 0;
6748 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6749 for (k = 0; k < NumberOfActivePlanes; ++k) {
6750 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6751 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
6754 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
6755 NoOfDPPState[k] = NoOfDPP[i][j][k];
6758 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
6759 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
6760 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
6761 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
6762 DCFCLKRequiredForAverageBandwidth = dml_max3(ProjectedDCFCLKDeepSleep[i][j],
6763 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth / (MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6764 (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / ReturnBusWidth);
6766 ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, TotalNumberOfDCCActiveDPP[i][j],
6767 MetaChunkSize, GPUVMEnable, HostVMEnable, NumberOfActivePlanes, NoOfDPPState, dpte_group_bytes,
6768 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6769 HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
6770 ExtraLatencyCycles = RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
6771 for (k = 0; k < NumberOfActivePlanes; ++k) {
6772 double DCFCLKCyclesRequiredInPrefetch = { 0 };
6773 double ExpectedPrefetchBWAcceleration = { 0 };
6774 double PrefetchTime = { 0 };
6776 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
6777 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
6778 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
6779 / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * DPTEBytesPerRow[i][j][k] / PTEEfficiency
6780 / NormalEfficiency / ReturnBusWidth + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
6781 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
6782 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k]) / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
6783 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
6784 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
6785 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait - UrgLatency[i] * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels
6786 : GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
6788 if (PrefetchTime > 0) {
6789 double ExpectedVRatioPrefetch = { 0 };
6790 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
6791 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
6792 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
6793 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
6794 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
6795 + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / ReturnBusWidth;
6798 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
6800 if (DynamicMetadataEnable[k] == true) {
6801 double TsetupPipe = { 0 };
6802 double TdmbfPipe = { 0 };
6803 double TdmsksPipe = { 0 };
6804 double TdmecPipe = { 0 };
6805 double AllowedTimeForUrgentExtraLatency = { 0 };
6807 CalculateDynamicMetadataParameters(
6808 MaxInterDCNTileRepeaters,
6809 RequiredDPPCLK[i][j][k],
6810 RequiredDISPCLK[i][j],
6811 ProjectedDCFCLKDeepSleep[i][j],
6814 VTotal[k] - VActive[k],
6815 DynamicMetadataTransmittedBytes[k],
6816 DynamicMetadataLinesBeforeActiveRequired[k],
6818 ProgressiveToInterlaceUnitInOPP,
6823 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TsetupPipe
6824 - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
6825 if (AllowedTimeForUrgentExtraLatency > 0) {
6826 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
6827 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
6829 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
6833 DCFCLKRequiredForPeakBandwidth = 0;
6834 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
6835 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
6837 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? (HostVMEnable == true ?
6838 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
6839 for (k = 0; k < NumberOfActivePlanes; ++k) {
6840 double MaximumTvmPlus2Tr0PlusTsw = { 0 };
6841 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
6842 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
6843 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
6845 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
6846 / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
6847 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
6850 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
6851 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
6856 #endif /* CONFIG_DRM_AMD_DC_DCN */