2 * Copyright 2020 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
26 #ifdef CONFIG_DRM_AMD_DC_DCN3_0
29 #include "../display_mode_lib.h"
30 #include "display_mode_vba_30.h"
31 #include "../dml_inline_defs.h"
36 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
38 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
39 * ways. Unless there is something clearly wrong with it the code should
40 * remain as-is as it provides us with a guarantee from HW that it is correct.
48 double DCFCLKDeepSleep;
49 unsigned int DPPPerPlane;
51 enum scan_direction_class SourceScan;
52 unsigned int BlockWidth256BytesY;
53 unsigned int BlockHeight256BytesY;
54 unsigned int BlockWidth256BytesC;
55 unsigned int BlockHeight256BytesC;
56 unsigned int InterlaceEnable;
57 unsigned int NumberOfCursors;
60 unsigned int DCCEnable;
61 bool ODMCombineEnabled;
65 #define BPP_BLENDED_PIPE 0xffffffff
66 #define DCN30_MAX_DSC_IMAGE_WIDTH 5184
68 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
69 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
70 struct display_mode_lib *mode_lib);
71 static unsigned int dscceComputeDelay(
74 unsigned int sliceWidth,
75 unsigned int numSlices,
76 enum output_format_class pixelFormat,
77 enum output_encoder_class Output);
78 static unsigned int dscComputeDelay(
79 enum output_format_class pixelFormat,
80 enum output_encoder_class Output);
81 // Super monster function with some 45 argument
82 static bool CalculatePrefetchSchedule(
83 struct display_mode_lib *mode_lib,
84 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
85 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
87 unsigned int DSCDelay,
88 double DPPCLKDelaySubtotalPlusCNVCFormater,
89 double DPPCLKDelaySCL,
90 double DPPCLKDelaySCLLBOnly,
91 double DPPCLKDelayCNVCCursor,
92 double DISPCLKDelaySubtotal,
93 unsigned int DPP_RECOUT_WIDTH,
94 enum output_format_class OutputFormat,
95 unsigned int MaxInterDCNTileRepeaters,
96 unsigned int VStartup,
97 unsigned int MaxVStartup,
98 unsigned int GPUVMPageTableLevels,
101 unsigned int HostVMMaxNonCachedPageTableLevels,
102 double HostVMMinPageSize,
103 bool DynamicMetadataEnable,
104 bool DynamicMetadataVMEnabled,
105 int DynamicMetadataLinesBeforeActiveRequired,
106 unsigned int DynamicMetadataTransmittedBytes,
107 double UrgentLatency,
108 double UrgentExtraLatency,
110 unsigned int PDEAndMetaPTEBytesFrame,
111 unsigned int MetaRowByte,
112 unsigned int PixelPTEBytesPerRow,
113 double PrefetchSourceLinesY,
114 unsigned int SwathWidthY,
116 double VInitPreFillY,
117 unsigned int MaxNumSwathY,
118 double PrefetchSourceLinesC,
119 unsigned int SwathWidthC,
121 double VInitPreFillC,
122 unsigned int MaxNumSwathC,
123 long swath_width_luma_ub,
124 long swath_width_chroma_ub,
125 unsigned int SwathHeightY,
126 unsigned int SwathHeightC,
128 bool ProgressiveToInterlaceUnitInOPP,
129 double *DSTXAfterScaler,
130 double *DSTYAfterScaler,
131 double *DestinationLinesForPrefetch,
132 double *PrefetchBandwidth,
133 double *DestinationLinesToRequestVMInVBlank,
134 double *DestinationLinesToRequestRowInVBlank,
135 double *VRatioPrefetchY,
136 double *VRatioPrefetchC,
137 double *RequiredPrefetchPixDataBWLuma,
138 double *RequiredPrefetchPixDataBWChroma,
139 bool *NotEnoughTimeForDynamicMetadata,
141 double *prefetch_vmrow_bw,
144 unsigned int *VUpdateOffsetPix,
145 double *VUpdateWidthPix,
146 double *VReadyOffsetPix);
147 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
148 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
149 static void CalculateDCCConfiguration(
151 bool DCCProgrammingAssumesScanDirectionUnknown,
152 enum source_format_class SourcePixelFormat,
153 unsigned int ViewportWidthLuma,
154 unsigned int ViewportWidthChroma,
155 unsigned int ViewportHeightLuma,
156 unsigned int ViewportHeightChroma,
157 double DETBufferSize,
158 unsigned int RequestHeight256ByteLuma,
159 unsigned int RequestHeight256ByteChroma,
160 enum dm_swizzle_mode TilingFormat,
161 unsigned int BytePerPixelY,
162 unsigned int BytePerPixelC,
163 double BytePerPixelDETY,
164 double BytePerPixelDETC,
165 enum scan_direction_class ScanOrientation,
166 unsigned int *MaxUncompressedBlockLuma,
167 unsigned int *MaxUncompressedBlockChroma,
168 unsigned int *MaxCompressedBlockLuma,
169 unsigned int *MaxCompressedBlockChroma,
170 unsigned int *IndependentBlockLuma,
171 unsigned int *IndependentBlockChroma);
172 static double CalculatePrefetchSourceLines(
173 struct display_mode_lib *mode_lib,
177 bool ProgressiveToInterlaceUnitInOPP,
178 unsigned int SwathHeight,
179 unsigned int ViewportYStart,
180 double *VInitPreFill,
181 unsigned int *MaxNumSwath);
182 static unsigned int CalculateVMAndRowBytes(
183 struct display_mode_lib *mode_lib,
185 unsigned int BlockHeight256Bytes,
186 unsigned int BlockWidth256Bytes,
187 enum source_format_class SourcePixelFormat,
188 unsigned int SurfaceTiling,
189 unsigned int BytePerPixel,
190 enum scan_direction_class ScanDirection,
191 unsigned int SwathWidth,
192 unsigned int ViewportHeight,
195 unsigned int HostVMMaxNonCachedPageTableLevels,
196 unsigned int GPUVMMinPageSize,
197 unsigned int HostVMMinPageSize,
198 unsigned int PTEBufferSizeInRequests,
200 unsigned int DCCMetaPitch,
201 unsigned int *MacroTileWidth,
202 unsigned int *MetaRowByte,
203 unsigned int *PixelPTEBytesPerRow,
204 bool *PTEBufferSizeNotExceeded,
205 unsigned int *dpte_row_width_ub,
206 unsigned int *dpte_row_height,
207 unsigned int *MetaRequestWidth,
208 unsigned int *MetaRequestHeight,
209 unsigned int *meta_row_width,
210 unsigned int *meta_row_height,
211 unsigned int *vm_group_bytes,
212 unsigned int *dpte_group_bytes,
213 unsigned int *PixelPTEReqWidth,
214 unsigned int *PixelPTEReqHeight,
215 unsigned int *PTERequestSize,
216 unsigned int *DPDE0BytesFrame,
217 unsigned int *MetaPTEBytesFrame);
218 static double CalculateTWait(
219 unsigned int PrefetchMode,
220 double DRAMClockChangeLatency,
221 double UrgentLatency,
222 double SREnterPlusExitTime);
223 static void CalculateRowBandwidth(
225 enum source_format_class SourcePixelFormat,
230 unsigned int MetaRowByteLuma,
231 unsigned int MetaRowByteChroma,
232 unsigned int meta_row_height_luma,
233 unsigned int meta_row_height_chroma,
234 unsigned int PixelPTEBytesPerRowLuma,
235 unsigned int PixelPTEBytesPerRowChroma,
236 unsigned int dpte_row_height_luma,
237 unsigned int dpte_row_height_chroma,
239 double *dpte_row_bw);
240 static void CalculateFlipSchedule(
241 struct display_mode_lib *mode_lib,
242 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
243 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
244 double UrgentExtraLatency,
245 double UrgentLatency,
246 unsigned int GPUVMMaxPageTableLevels,
248 unsigned int HostVMMaxNonCachedPageTableLevels,
250 double HostVMMinPageSize,
251 double PDEAndMetaPTEBytesPerFrame,
253 double DPTEBytesPerRow,
254 double BandwidthAvailableForImmediateFlip,
255 unsigned int TotImmediateFlipBytes,
256 enum source_format_class SourcePixelFormat,
262 unsigned int dpte_row_height,
263 unsigned int meta_row_height,
264 unsigned int dpte_row_height_chroma,
265 unsigned int meta_row_height_chroma,
266 double *DestinationLinesToRequestVMInImmediateFlip,
267 double *DestinationLinesToRequestRowInImmediateFlip,
268 double *final_flip_bw,
269 bool *ImmediateFlipSupportedForPipe);
270 static double CalculateWriteBackDelay(
271 enum source_format_class WritebackPixelFormat,
272 double WritebackHRatio,
273 double WritebackVRatio,
274 unsigned int WritebackVTaps,
275 long WritebackDestinationWidth,
276 long WritebackDestinationHeight,
277 long WritebackSourceHeight,
278 unsigned int HTotal);
279 static void CalculateDynamicMetadataParameters(
280 int MaxInterDCNTileRepeaters,
283 double DCFClkDeepSleep,
287 long DynamicMetadataTransmittedBytes,
288 long DynamicMetadataLinesBeforeActiveRequired,
290 bool ProgressiveToInterlaceUnitInOPP,
295 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
296 struct display_mode_lib *mode_lib,
297 unsigned int PrefetchMode,
298 unsigned int NumberOfActivePlanes,
299 unsigned int MaxLineBufferLines,
300 unsigned int LineBufferSize,
301 unsigned int DPPOutputBufferPixels,
302 double DETBufferSizeInKByte,
303 unsigned int WritebackInterfaceBufferSize,
307 unsigned int dpte_group_bytes[],
308 unsigned int MetaChunkSize,
309 double UrgentLatency,
311 double WritebackLatency,
312 double WritebackChunkSize,
314 double DRAMClockChangeLatency,
316 double SREnterPlusExitTime,
317 double DCFCLKDeepSleep,
318 unsigned int DPPPerPlane[],
321 double DETBufferSizeY[],
322 double DETBufferSizeC[],
323 unsigned int SwathHeightY[],
324 unsigned int SwathHeightC[],
325 unsigned int LBBitPerPixel[],
326 double SwathWidthY[],
327 double SwathWidthC[],
329 double HRatioChroma[],
330 unsigned int vtaps[],
331 unsigned int VTAPsChroma[],
333 double VRatioChroma[],
334 unsigned int HTotal[],
336 unsigned int BlendingAndTiming[],
337 double BytePerPixelDETY[],
338 double BytePerPixelDETC[],
339 double DSTXAfterScaler[],
340 double DSTYAfterScaler[],
341 bool WritebackEnable[],
342 enum source_format_class WritebackPixelFormat[],
343 double WritebackDestinationWidth[],
344 double WritebackDestinationHeight[],
345 double WritebackSourceHeight[],
346 enum clock_change_support *DRAMClockChangeSupport,
347 double *UrgentWatermark,
348 double *WritebackUrgentWatermark,
349 double *DRAMClockChangeWatermark,
350 double *WritebackDRAMClockChangeWatermark,
351 double *StutterExitWatermark,
352 double *StutterEnterPlusExitWatermark,
353 double *MinActiveDRAMClockChangeLatencySupported);
354 static void CalculateDCFCLKDeepSleep(
355 struct display_mode_lib *mode_lib,
356 unsigned int NumberOfActivePlanes,
360 double VRatioChroma[],
361 double SwathWidthY[],
362 double SwathWidthC[],
363 unsigned int DPPPerPlane[],
365 double HRatioChroma[],
367 double PSCL_THROUGHPUT[],
368 double PSCL_THROUGHPUT_CHROMA[],
370 double ReadBandwidthLuma[],
371 double ReadBandwidthChroma[],
373 double *DCFCLKDeepSleep);
374 static void CalculateUrgentBurstFactor(
375 long swath_width_luma_ub,
376 long swath_width_chroma_ub,
377 unsigned int DETBufferSizeInKByte,
378 unsigned int SwathHeightY,
379 unsigned int SwathHeightC,
381 double UrgentLatency,
382 double CursorBufferSize,
383 unsigned int CursorWidth,
384 unsigned int CursorBPP,
387 double BytePerPixelInDETY,
388 double BytePerPixelInDETC,
389 double DETBufferSizeY,
390 double DETBufferSizeC,
391 double *UrgentBurstFactorCursor,
392 double *UrgentBurstFactorLuma,
393 double *UrgentBurstFactorChroma,
394 bool *NotEnoughUrgentLatencyHiding);
396 static void UseMinimumDCFCLK(
397 struct display_mode_lib *mode_lib,
398 int MaxInterDCNTileRepeaters,
400 double FinalDRAMClockChangeLatency,
401 double SREnterPlusExitTime,
403 int RoundTripPingLatencyCycles,
405 int PixelChunkSizeInKByte,
408 int GPUVMMaxPageTableLevels,
410 int NumberOfActivePlanes,
411 double HostVMMinPageSize,
412 int HostVMMaxNonCachedPageTableLevels,
413 bool DynamicMetadataVMEnabled,
414 enum immediate_flip_requirement ImmediateFlipRequirement,
415 bool ProgressiveToInterlaceUnitInOPP,
416 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
417 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
418 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
419 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
422 int DynamicMetadataTransmittedBytes[],
423 int DynamicMetadataLinesBeforeActiveRequired[],
425 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
426 double RequiredDISPCLK[][2],
428 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
429 double ProjectedDCFCLKDeepSleep[][2],
430 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
431 double TotalVActivePixelBandwidth[][2],
432 double TotalVActiveCursorBandwidth[][2],
433 double TotalMetaRowBandwidth[][2],
434 double TotalDPTERowBandwidth[][2],
435 unsigned int TotalNumberOfActiveDPP[][2],
436 unsigned int TotalNumberOfDCCActiveDPP[][2],
437 int dpte_group_bytes[],
438 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
439 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
440 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
441 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
446 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
447 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
448 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
449 bool DynamicMetadataEnable[],
450 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
451 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
452 double ReadBandwidthLuma[],
453 double ReadBandwidthChroma[],
454 double DCFCLKPerState[],
455 double DCFCLKState[][2]);
456 static void CalculatePixelDeliveryTimes(
457 unsigned int NumberOfActivePlanes,
459 double VRatioChroma[],
460 double VRatioPrefetchY[],
461 double VRatioPrefetchC[],
462 unsigned int swath_width_luma_ub[],
463 unsigned int swath_width_chroma_ub[],
464 unsigned int DPPPerPlane[],
466 double HRatioChroma[],
468 double PSCL_THROUGHPUT[],
469 double PSCL_THROUGHPUT_CHROMA[],
472 enum scan_direction_class SourceScan[],
473 unsigned int NumberOfCursors[],
474 unsigned int CursorWidth[][2],
475 unsigned int CursorBPP[][2],
476 unsigned int BlockWidth256BytesY[],
477 unsigned int BlockHeight256BytesY[],
478 unsigned int BlockWidth256BytesC[],
479 unsigned int BlockHeight256BytesC[],
480 double DisplayPipeLineDeliveryTimeLuma[],
481 double DisplayPipeLineDeliveryTimeChroma[],
482 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
483 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
484 double DisplayPipeRequestDeliveryTimeLuma[],
485 double DisplayPipeRequestDeliveryTimeChroma[],
486 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
487 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
488 double CursorRequestDeliveryTime[],
489 double CursorRequestDeliveryTimePrefetch[]);
491 static void CalculateMetaAndPTETimes(
492 int NumberOfActivePlanes,
495 int MinMetaChunkSizeBytes,
498 double VRatioChroma[],
499 double DestinationLinesToRequestRowInVBlank[],
500 double DestinationLinesToRequestRowInImmediateFlip[],
505 enum scan_direction_class SourceScan[],
506 int dpte_row_height[],
507 int dpte_row_height_chroma[],
508 int meta_row_width[],
509 int meta_row_width_chroma[],
510 int meta_row_height[],
511 int meta_row_height_chroma[],
512 int meta_req_width[],
513 int meta_req_width_chroma[],
514 int meta_req_height[],
515 int meta_req_height_chroma[],
516 int dpte_group_bytes[],
517 int PTERequestSizeY[],
518 int PTERequestSizeC[],
519 int PixelPTEReqWidthY[],
520 int PixelPTEReqHeightY[],
521 int PixelPTEReqWidthC[],
522 int PixelPTEReqHeightC[],
523 int dpte_row_width_luma_ub[],
524 int dpte_row_width_chroma_ub[],
525 double DST_Y_PER_PTE_ROW_NOM_L[],
526 double DST_Y_PER_PTE_ROW_NOM_C[],
527 double DST_Y_PER_META_ROW_NOM_L[],
528 double DST_Y_PER_META_ROW_NOM_C[],
529 double TimePerMetaChunkNominal[],
530 double TimePerChromaMetaChunkNominal[],
531 double TimePerMetaChunkVBlank[],
532 double TimePerChromaMetaChunkVBlank[],
533 double TimePerMetaChunkFlip[],
534 double TimePerChromaMetaChunkFlip[],
535 double time_per_pte_group_nom_luma[],
536 double time_per_pte_group_vblank_luma[],
537 double time_per_pte_group_flip_luma[],
538 double time_per_pte_group_nom_chroma[],
539 double time_per_pte_group_vblank_chroma[],
540 double time_per_pte_group_flip_chroma[]);
542 static void CalculateVMGroupAndRequestTimes(
543 unsigned int NumberOfActivePlanes,
545 unsigned int GPUVMMaxPageTableLevels,
546 unsigned int HTotal[],
548 double DestinationLinesToRequestVMInVBlank[],
549 double DestinationLinesToRequestVMInImmediateFlip[],
552 int dpte_row_width_luma_ub[],
553 int dpte_row_width_chroma_ub[],
554 int vm_group_bytes[],
555 unsigned int dpde0_bytes_per_frame_ub_l[],
556 unsigned int dpde0_bytes_per_frame_ub_c[],
557 int meta_pte_bytes_per_frame_ub_l[],
558 int meta_pte_bytes_per_frame_ub_c[],
559 double TimePerVMGroupVBlank[],
560 double TimePerVMGroupFlip[],
561 double TimePerVMRequestVBlank[],
562 double TimePerVMRequestFlip[]);
564 static void CalculateStutterEfficiency(
565 int NumberOfActivePlanes,
566 long ROBBufferSizeInKByte,
567 double TotalDataReadBandwidth,
571 bool SynchronizedVBlank,
573 double DETBufferSizeY[],
575 double BytePerPixelDETY[],
576 double SwathWidthY[],
579 double DCCRateLuma[],
580 double DCCRateChroma[],
585 enum scan_direction_class SourceScan[],
586 int BlockHeight256BytesY[],
587 int BlockWidth256BytesY[],
588 int BlockHeight256BytesC[],
589 int BlockWidth256BytesC[],
590 int DCCYMaxUncompressedBlock[],
591 int DCCCMaxUncompressedBlock[],
594 bool WritebackEnable[],
595 double ReadBandwidthPlaneLuma[],
596 double ReadBandwidthPlaneChroma[],
597 double meta_row_bw[],
598 double dpte_row_bw[],
599 double *StutterEfficiencyNotIncludingVBlank,
600 double *StutterEfficiency);
602 static void CalculateSwathAndDETConfiguration(
604 int NumberOfActivePlanes,
605 long DETBufferSizeInKByte,
606 double MaximumSwathWidthLuma[],
607 double MaximumSwathWidthChroma[],
608 enum scan_direction_class SourceScan[],
609 enum source_format_class SourcePixelFormat[],
610 enum dm_swizzle_mode SurfaceTiling[],
612 int ViewportHeight[],
615 int SurfaceHeightY[],
616 int SurfaceHeightC[],
617 int Read256BytesBlockHeightY[],
618 int Read256BytesBlockHeightC[],
619 int Read256BytesBlockWidthY[],
620 int Read256BytesBlockWidthC[],
621 enum odm_combine_mode ODMCombineEnabled[],
622 int BlendingAndTiming[],
625 double BytePerPixDETY[],
626 double BytePerPixDETC[],
629 double HRatioChroma[],
631 int swath_width_luma_ub[],
632 int swath_width_chroma_ub[],
634 double SwathWidthChroma[],
637 double DETBufferSizeY[],
638 double DETBufferSizeC[],
639 bool ViewportSizeSupportPerPlane[],
640 bool *ViewportSizeSupport);
641 static void CalculateSwathWidth(
643 int NumberOfActivePlanes,
644 enum source_format_class SourcePixelFormat[],
645 enum scan_direction_class SourceScan[],
646 unsigned int ViewportWidth[],
647 unsigned int ViewportHeight[],
648 unsigned int SurfaceWidthY[],
649 unsigned int SurfaceWidthC[],
650 unsigned int SurfaceHeightY[],
651 unsigned int SurfaceHeightC[],
652 enum odm_combine_mode ODMCombineEnabled[],
655 int Read256BytesBlockHeightY[],
656 int Read256BytesBlockHeightC[],
657 int Read256BytesBlockWidthY[],
658 int Read256BytesBlockWidthC[],
659 int BlendingAndTiming[],
660 unsigned int HActive[],
663 double SwathWidthSingleDPPY[],
664 double SwathWidthSingleDPPC[],
665 double SwathWidthY[],
666 double SwathWidthC[],
667 int MaximumSwathHeightY[],
668 int MaximumSwathHeightC[],
669 unsigned int swath_width_luma_ub[],
670 unsigned int swath_width_chroma_ub[]);
671 static double CalculateExtraLatency(
672 long RoundTripPingLatencyCycles,
673 long ReorderingBytes,
675 int TotalNumberOfActiveDPP,
676 int PixelChunkSizeInKByte,
677 int TotalNumberOfDCCActiveDPP,
682 int NumberOfActivePlanes,
684 int dpte_group_bytes[],
685 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
686 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
687 double HostVMMinPageSize,
688 int HostVMMaxNonCachedPageTableLevels);
689 static double CalculateExtraLatencyBytes(
690 long ReorderingBytes,
691 int TotalNumberOfActiveDPP,
692 int PixelChunkSizeInKByte,
693 int TotalNumberOfDCCActiveDPP,
697 int NumberOfActivePlanes,
699 int dpte_group_bytes[],
700 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
701 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
702 double HostVMMinPageSize,
703 int HostVMMaxNonCachedPageTableLevels);
704 static double CalculateUrgentLatency(
705 double UrgentLatencyPixelDataOnly,
706 double UrgentLatencyPixelMixedWithVMData,
707 double UrgentLatencyVMDataOnly,
708 bool DoUrgentLatencyAdjustment,
709 double UrgentLatencyAdjustmentFabricClockComponent,
710 double UrgentLatencyAdjustmentFabricClockReference,
711 double FabricClockSingle);
713 static bool CalculateBytePerPixelAnd256BBlockSizes(
714 enum source_format_class SourcePixelFormat,
715 enum dm_swizzle_mode SurfaceTiling,
716 unsigned int *BytePerPixelY,
717 unsigned int *BytePerPixelC,
718 double *BytePerPixelDETY,
719 double *BytePerPixelDETC,
720 unsigned int *BlockHeight256BytesY,
721 unsigned int *BlockHeight256BytesC,
722 unsigned int *BlockWidth256BytesY,
723 unsigned int *BlockWidth256BytesC);
725 void dml30_recalculate(struct display_mode_lib *mode_lib)
727 ModeSupportAndSystemConfiguration(mode_lib);
728 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
729 DisplayPipeConfiguration(mode_lib);
730 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
733 static unsigned int dscceComputeDelay(
736 unsigned int sliceWidth,
737 unsigned int numSlices,
738 enum output_format_class pixelFormat,
739 enum output_encoder_class Output)
741 // valid bpc = source bits per component in the set of {8, 10, 12}
742 // valid bpp = increments of 1/16 of a bit
743 // min = 6/7/8 in N420/N422/444, respectively
744 // max = such that compression is 1:1
745 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
746 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
747 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
750 unsigned int rcModelSize = 8192;
752 // N422/N420 operate at 2 pixels per clock
753 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L,
756 if (pixelFormat == dm_420)
758 // #all other modes operate at 1 pixel per clock
759 else if (pixelFormat == dm_444)
761 else if (pixelFormat == dm_n422)
766 //initial transmit delay as per PPS
767 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
777 //divide by pixel per cycle to compute slice width as seen by DSC
778 w = sliceWidth / pixelsPerClock;
780 //422 mode has an additional cycle of delay
781 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
786 //main calculation for the dscce
787 ix = initalXmitDelay + 45;
792 ax = (a + 2) / 3 + D + 6 + 1;
793 L = (ax + wx - 1) / wx;
794 if ((ix % w) == 0 && P != 0)
798 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
800 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
801 pixels = Delay * 3 * pixelsPerClock;
805 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
807 unsigned int Delay = 0;
809 if (pixelFormat == dm_420) {
814 // dscc - input deserializer
816 // dscc gets pixels every other cycle
818 // dscc - input cdc fifo
820 // dscc gets pixels every other cycle
822 // dscc - cdc uncertainty
824 // dscc - output cdc fifo
826 // dscc gets pixels every other cycle
828 // dscc - cdc uncertainty
830 // dscc - output serializer
834 } else if (pixelFormat == dm_n422) {
839 // dscc - input deserializer
841 // dscc - input cdc fifo
843 // dscc - cdc uncertainty
845 // dscc - output cdc fifo
847 // dscc - cdc uncertainty
849 // dscc - output serializer
859 // dscc - input deserializer
861 // dscc - input cdc fifo
863 // dscc - cdc uncertainty
865 // dscc - output cdc fifo
867 // dscc - output serializer
869 // dscc - cdc uncertainty
878 static bool CalculatePrefetchSchedule(
879 struct display_mode_lib *mode_lib,
880 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
881 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
883 unsigned int DSCDelay,
884 double DPPCLKDelaySubtotalPlusCNVCFormater,
885 double DPPCLKDelaySCL,
886 double DPPCLKDelaySCLLBOnly,
887 double DPPCLKDelayCNVCCursor,
888 double DISPCLKDelaySubtotal,
889 unsigned int DPP_RECOUT_WIDTH,
890 enum output_format_class OutputFormat,
891 unsigned int MaxInterDCNTileRepeaters,
892 unsigned int VStartup,
893 unsigned int MaxVStartup,
894 unsigned int GPUVMPageTableLevels,
897 unsigned int HostVMMaxNonCachedPageTableLevels,
898 double HostVMMinPageSize,
899 bool DynamicMetadataEnable,
900 bool DynamicMetadataVMEnabled,
901 int DynamicMetadataLinesBeforeActiveRequired,
902 unsigned int DynamicMetadataTransmittedBytes,
903 double UrgentLatency,
904 double UrgentExtraLatency,
906 unsigned int PDEAndMetaPTEBytesFrame,
907 unsigned int MetaRowByte,
908 unsigned int PixelPTEBytesPerRow,
909 double PrefetchSourceLinesY,
910 unsigned int SwathWidthY,
912 double VInitPreFillY,
913 unsigned int MaxNumSwathY,
914 double PrefetchSourceLinesC,
915 unsigned int SwathWidthC,
917 double VInitPreFillC,
918 unsigned int MaxNumSwathC,
919 long swath_width_luma_ub,
920 long swath_width_chroma_ub,
921 unsigned int SwathHeightY,
922 unsigned int SwathHeightC,
924 bool ProgressiveToInterlaceUnitInOPP,
925 double *DSTXAfterScaler,
926 double *DSTYAfterScaler,
927 double *DestinationLinesForPrefetch,
928 double *PrefetchBandwidth,
929 double *DestinationLinesToRequestVMInVBlank,
930 double *DestinationLinesToRequestRowInVBlank,
931 double *VRatioPrefetchY,
932 double *VRatioPrefetchC,
933 double *RequiredPrefetchPixDataBWLuma,
934 double *RequiredPrefetchPixDataBWChroma,
935 bool *NotEnoughTimeForDynamicMetadata,
937 double *prefetch_vmrow_bw,
940 unsigned int *VUpdateOffsetPix,
941 double *VUpdateWidthPix,
942 double *VReadyOffsetPix)
944 bool MyError = false;
945 unsigned int DPPCycles = 0, DISPCLKCycles = 0;
946 double DSTTotalPixelsAfterScaler = 0;
947 double LineTime = 0, Tsetup = 0;
948 double dst_y_prefetch_equ = 0;
950 double prefetch_bw_oto = 0;
953 double Tvm_oto_lines = 0;
954 double Tr0_oto_lines = 0;
955 double dst_y_prefetch_oto = 0;
956 double TimeForFetchingMetaPTE = 0;
957 double TimeForFetchingRowInVBlank = 0;
958 double LinesToRequestPrefetchPixelData = 0;
959 double HostVMInefficiencyFactor = 0;
960 unsigned int HostVMDynamicLevelsTrips = 0;
961 double trip_to_mem = 0;
962 double Tvm_trips = 0;
963 double Tr0_trips = 0;
964 double Tvm_trips_rounded = 0;
965 double Tr0_trips_rounded = 0;
967 double Tpre_rounded = 0;
968 double prefetch_bw_equ = 0;
975 if (GPUVMEnable == true && HostVMEnable == true) {
976 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
977 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
979 HostVMInefficiencyFactor = 1;
980 HostVMDynamicLevelsTrips = 0;
983 CalculateDynamicMetadataParameters(
984 MaxInterDCNTileRepeaters,
987 myPipe->DCFCLKDeepSleep,
991 DynamicMetadataTransmittedBytes,
992 DynamicMetadataLinesBeforeActiveRequired,
993 myPipe->InterlaceEnable,
994 ProgressiveToInterlaceUnitInOPP,
1000 LineTime = myPipe->HTotal / myPipe->PixelClock;
1001 trip_to_mem = UrgentLatency;
1002 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
1004 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
1005 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
1007 *Tdmdl = TWait + UrgentExtraLatency;
1010 if (DynamicMetadataEnable == true) {
1011 if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1012 *NotEnoughTimeForDynamicMetadata = true;
1014 *NotEnoughTimeForDynamicMetadata = false;
1015 dml_print("DML: Not Enough Time for Dynamic Meta!\n");
1016 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1017 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1018 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1019 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1022 *NotEnoughTimeForDynamicMetadata = false;
1025 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1027 if (myPipe->ScalerEnabled)
1028 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1030 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1032 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1034 DISPCLKCycles = DISPCLKDelaySubtotal;
1036 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1039 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK
1042 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1044 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
1045 *DSTYAfterScaler = 1;
1047 *DSTYAfterScaler = 0;
1049 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1050 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1051 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1056 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1057 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1058 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1061 if (GPUVMPageTableLevels >= 3) {
1062 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1065 } else if (!myPipe->DCCEnable)
1068 *Tno_bw = LineTime / 4;
1070 dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
1071 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1073 Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
1074 Tsw_oto = Lsw_oto * LineTime;
1076 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto;
1078 if (GPUVMEnable == true) {
1079 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
1083 Tvm_oto = LineTime / 4.0;
1085 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1087 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
1088 LineTime - Tvm_oto, LineTime / 4);
1090 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1092 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1093 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1094 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1096 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1097 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1099 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1100 dml_print("DML: dst_y_prefetch_equ: %f\n", dst_y_prefetch_equ);
1102 dml_print("DML: LineTime: %f\n", LineTime);
1103 dml_print("DML: VStartup: %d\n", VStartup);
1104 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1105 dml_print("DML: Tsetup: %fus - time from vstartup to vready\n", Tsetup);
1106 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1107 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1108 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1109 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1110 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1111 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1112 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1113 dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1114 dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler);
1116 *PrefetchBandwidth = 0;
1117 *DestinationLinesToRequestVMInVBlank = 0;
1118 *DestinationLinesToRequestRowInVBlank = 0;
1119 *VRatioPrefetchY = 0;
1120 *VRatioPrefetchC = 0;
1121 *RequiredPrefetchPixDataBWLuma = 0;
1122 if (dst_y_prefetch_equ > 1) {
1123 double PrefetchBandwidth1 = 0;
1124 double PrefetchBandwidth2 = 0;
1125 double PrefetchBandwidth3 = 0;
1126 double PrefetchBandwidth4 = 0;
1128 if (Tpre_rounded - *Tno_bw > 0)
1129 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
1130 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1131 + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY
1132 + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1133 / (Tpre_rounded - *Tno_bw);
1135 PrefetchBandwidth1 = 0;
1137 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) {
1138 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw);
1141 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1142 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
1143 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1144 swath_width_luma_ub * BytePerPixelY +
1145 PrefetchSourceLinesC * swath_width_chroma_ub *
1147 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1149 PrefetchBandwidth2 = 0;
1151 if (Tpre_rounded - Tvm_trips_rounded > 0)
1152 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow *
1153 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1154 swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC *
1155 swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded -
1158 PrefetchBandwidth3 = 0;
1160 if (VStartup == MaxVStartup && (PrefetchBandwidth3 > 4 * prefetch_bw_oto) && Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1161 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded);
1164 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1165 PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1166 / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1168 PrefetchBandwidth4 = 0;
1175 if (PrefetchBandwidth1 > 0) {
1176 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
1177 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1186 if (PrefetchBandwidth2 > 0) {
1187 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
1188 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1197 if (PrefetchBandwidth3 > 0) {
1198 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3
1199 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1209 prefetch_bw_equ = PrefetchBandwidth1;
1210 } else if (Case2OK) {
1211 prefetch_bw_equ = PrefetchBandwidth2;
1212 } else if (Case3OK) {
1213 prefetch_bw_equ = PrefetchBandwidth3;
1215 prefetch_bw_equ = PrefetchBandwidth4;
1218 dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ);
1220 if (prefetch_bw_equ > 0) {
1221 if (GPUVMEnable == true) {
1222 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1224 Tvm_equ = LineTime / 4;
1227 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1229 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1231 (LineTime - Tvm_equ) / 2,
1234 Tr0_equ = (LineTime - Tvm_equ) / 2;
1239 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1243 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1244 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1245 TimeForFetchingMetaPTE = Tvm_oto;
1246 TimeForFetchingRowInVBlank = Tr0_oto;
1247 *PrefetchBandwidth = prefetch_bw_oto;
1249 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1250 TimeForFetchingMetaPTE = Tvm_equ;
1251 TimeForFetchingRowInVBlank = Tr0_equ;
1252 *PrefetchBandwidth = prefetch_bw_equ;
1255 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1257 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1260 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank
1261 - 2 * *DestinationLinesToRequestRowInVBlank;
1263 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1265 *VRatioPrefetchY = (double) PrefetchSourceLinesY
1266 / LinesToRequestPrefetchPixelData;
1267 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1268 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1269 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1270 *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1271 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1272 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1275 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1276 *VRatioPrefetchY = 0;
1280 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1281 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1283 if ((SwathHeightC > 4)) {
1284 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1285 *VRatioPrefetchC = dml_max(*VRatioPrefetchC,
1286 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1287 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1290 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1291 *VRatioPrefetchC = 0;
1295 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime;
1296 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime;
1299 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1300 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1301 *VRatioPrefetchY = 0;
1302 *VRatioPrefetchC = 0;
1303 *RequiredPrefetchPixDataBWLuma = 0;
1304 *RequiredPrefetchPixDataBWChroma = 0;
1307 dml_print("DML: Tpre: %fus - sum of tim to request meta pte, 2 x data pte + meta data, swaths\n", (double)LinesToRequestPrefetchPixelData * LineTime + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1308 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1309 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1310 dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1311 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime);
1312 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1313 dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n");
1314 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup);
1315 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1319 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1323 double prefetch_vm_bw = 0;
1324 double prefetch_row_bw = 0;
1326 if (PDEAndMetaPTEBytesFrame == 0) {
1328 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1329 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1333 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1335 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1336 prefetch_row_bw = 0;
1337 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1338 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1340 prefetch_row_bw = 0;
1342 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1345 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1349 *PrefetchBandwidth = 0;
1350 TimeForFetchingMetaPTE = 0;
1351 TimeForFetchingRowInVBlank = 0;
1352 *DestinationLinesToRequestVMInVBlank = 0;
1353 *DestinationLinesToRequestRowInVBlank = 0;
1354 *DestinationLinesForPrefetch = 0;
1355 LinesToRequestPrefetchPixelData = 0;
1356 *VRatioPrefetchY = 0;
1357 *VRatioPrefetchC = 0;
1358 *RequiredPrefetchPixDataBWLuma = 0;
1359 *RequiredPrefetchPixDataBWChroma = 0;
1365 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1367 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1370 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1372 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1375 static void CalculateDCCConfiguration(
1377 bool DCCProgrammingAssumesScanDirectionUnknown,
1378 enum source_format_class SourcePixelFormat,
1379 unsigned int SurfaceWidthLuma,
1380 unsigned int SurfaceWidthChroma,
1381 unsigned int SurfaceHeightLuma,
1382 unsigned int SurfaceHeightChroma,
1383 double DETBufferSize,
1384 unsigned int RequestHeight256ByteLuma,
1385 unsigned int RequestHeight256ByteChroma,
1386 enum dm_swizzle_mode TilingFormat,
1387 unsigned int BytePerPixelY,
1388 unsigned int BytePerPixelC,
1389 double BytePerPixelDETY,
1390 double BytePerPixelDETC,
1391 enum scan_direction_class ScanOrientation,
1392 unsigned int *MaxUncompressedBlockLuma,
1393 unsigned int *MaxUncompressedBlockChroma,
1394 unsigned int *MaxCompressedBlockLuma,
1395 unsigned int *MaxCompressedBlockChroma,
1396 unsigned int *IndependentBlockLuma,
1397 unsigned int *IndependentBlockChroma)
1405 int req128_horz_wc_l = 0;
1406 int req128_horz_wc_c = 0;
1407 int req128_vert_wc_l = 0;
1408 int req128_vert_wc_c = 0;
1409 int segment_order_horz_contiguous_luma = 0;
1410 int segment_order_horz_contiguous_chroma = 0;
1411 int segment_order_vert_contiguous_luma = 0;
1412 int segment_order_vert_contiguous_chroma = 0;
1414 long full_swath_bytes_horz_wc_l = 0;
1415 long full_swath_bytes_horz_wc_c = 0;
1416 long full_swath_bytes_vert_wc_l = 0;
1417 long full_swath_bytes_vert_wc_c = 0;
1419 long swath_buf_size = 0;
1420 double detile_buf_vp_horz_limit = 0;
1421 double detile_buf_vp_vert_limit = 0;
1423 long MAS_vp_horz_limit = 0;
1424 long MAS_vp_vert_limit = 0;
1425 long max_vp_horz_width = 0;
1426 long max_vp_vert_height = 0;
1427 long eff_surf_width_l = 0;
1428 long eff_surf_width_c = 0;
1429 long eff_surf_height_l = 0;
1430 long eff_surf_height_c = 0;
1434 REQ_128BytesNonContiguous,
1435 REQ_128BytesContiguous,
1439 RequestType RequestLuma;
1440 RequestType RequestChroma;
1442 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1448 if (BytePerPixelY == 1)
1450 if (BytePerPixelC == 1)
1452 if (BytePerPixelY == 8
1453 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1454 || TilingFormat == dm_sw_64kb_s_x))
1456 if (BytePerPixelC == 8
1457 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1458 || TilingFormat == dm_sw_64kb_s_x))
1461 if (BytePerPixelC == 0) {
1462 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1463 detile_buf_vp_horz_limit = (double) swath_buf_size
1464 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1465 / (1 + horz_div_l));
1466 detile_buf_vp_vert_limit = (double) swath_buf_size
1467 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1469 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1470 detile_buf_vp_horz_limit = (double) swath_buf_size
1471 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1473 + (double) RequestHeight256ByteChroma
1474 * BytePerPixelC / (1 + horz_div_c)
1476 detile_buf_vp_vert_limit = (double) swath_buf_size
1477 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)
1478 + 256.0 / RequestHeight256ByteChroma
1479 / (1 + vert_div_c) / (1 + yuv420));
1482 if (SourcePixelFormat == dm_420_10) {
1483 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1484 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1487 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1488 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1490 MAS_vp_horz_limit = 5760;
1491 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1492 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1493 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1495 (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1496 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1497 eff_surf_height_l = (
1498 SurfaceHeightLuma > max_vp_vert_height ?
1499 max_vp_vert_height : SurfaceHeightLuma);
1500 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1502 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1503 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1504 if (BytePerPixelC > 0) {
1505 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma
1507 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1509 full_swath_bytes_horz_wc_c = 0;
1510 full_swath_bytes_vert_wc_c = 0;
1513 if (SourcePixelFormat == dm_420_10) {
1514 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1515 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1516 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1517 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1520 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1521 req128_horz_wc_l = 0;
1522 req128_horz_wc_c = 0;
1523 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c
1524 && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c
1526 req128_horz_wc_l = 0;
1527 req128_horz_wc_c = 1;
1528 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c
1529 && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c
1531 req128_horz_wc_l = 1;
1532 req128_horz_wc_c = 0;
1534 req128_horz_wc_l = 1;
1535 req128_horz_wc_c = 1;
1538 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1539 req128_vert_wc_l = 0;
1540 req128_vert_wc_c = 0;
1541 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c
1542 && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c
1544 req128_vert_wc_l = 0;
1545 req128_vert_wc_c = 1;
1546 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c
1547 && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c
1549 req128_vert_wc_l = 1;
1550 req128_vert_wc_c = 0;
1552 req128_vert_wc_l = 1;
1553 req128_vert_wc_c = 1;
1556 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1557 segment_order_horz_contiguous_luma = 0;
1559 segment_order_horz_contiguous_luma = 1;
1561 if ((BytePerPixelY == 8
1562 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1563 || TilingFormat == dm_sw_64kb_d_t
1564 || TilingFormat == dm_sw_64kb_r_x))
1565 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1566 segment_order_vert_contiguous_luma = 0;
1568 segment_order_vert_contiguous_luma = 1;
1570 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1571 segment_order_horz_contiguous_chroma = 0;
1573 segment_order_horz_contiguous_chroma = 1;
1575 if ((BytePerPixelC == 8
1576 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1577 || TilingFormat == dm_sw_64kb_d_t
1578 || TilingFormat == dm_sw_64kb_r_x))
1579 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1580 segment_order_vert_contiguous_chroma = 0;
1582 segment_order_vert_contiguous_chroma = 1;
1585 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1586 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1587 RequestLuma = REQ_256Bytes;
1588 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0)
1589 || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1590 RequestLuma = REQ_128BytesNonContiguous;
1592 RequestLuma = REQ_128BytesContiguous;
1594 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1595 RequestChroma = REQ_256Bytes;
1596 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0)
1597 || (req128_vert_wc_c == 1
1598 && segment_order_vert_contiguous_chroma == 0)) {
1599 RequestChroma = REQ_128BytesNonContiguous;
1601 RequestChroma = REQ_128BytesContiguous;
1603 } else if (ScanOrientation != dm_vert) {
1604 if (req128_horz_wc_l == 0) {
1605 RequestLuma = REQ_256Bytes;
1606 } else if (segment_order_horz_contiguous_luma == 0) {
1607 RequestLuma = REQ_128BytesNonContiguous;
1609 RequestLuma = REQ_128BytesContiguous;
1611 if (req128_horz_wc_c == 0) {
1612 RequestChroma = REQ_256Bytes;
1613 } else if (segment_order_horz_contiguous_chroma == 0) {
1614 RequestChroma = REQ_128BytesNonContiguous;
1616 RequestChroma = REQ_128BytesContiguous;
1619 if (req128_vert_wc_l == 0) {
1620 RequestLuma = REQ_256Bytes;
1621 } else if (segment_order_vert_contiguous_luma == 0) {
1622 RequestLuma = REQ_128BytesNonContiguous;
1624 RequestLuma = REQ_128BytesContiguous;
1626 if (req128_vert_wc_c == 0) {
1627 RequestChroma = REQ_256Bytes;
1628 } else if (segment_order_vert_contiguous_chroma == 0) {
1629 RequestChroma = REQ_128BytesNonContiguous;
1631 RequestChroma = REQ_128BytesContiguous;
1635 if (RequestLuma == REQ_256Bytes) {
1636 *MaxUncompressedBlockLuma = 256;
1637 *MaxCompressedBlockLuma = 256;
1638 *IndependentBlockLuma = 0;
1639 } else if (RequestLuma == REQ_128BytesContiguous) {
1640 *MaxUncompressedBlockLuma = 256;
1641 *MaxCompressedBlockLuma = 128;
1642 *IndependentBlockLuma = 128;
1644 *MaxUncompressedBlockLuma = 256;
1645 *MaxCompressedBlockLuma = 64;
1646 *IndependentBlockLuma = 64;
1649 if (RequestChroma == REQ_256Bytes) {
1650 *MaxUncompressedBlockChroma = 256;
1651 *MaxCompressedBlockChroma = 256;
1652 *IndependentBlockChroma = 0;
1653 } else if (RequestChroma == REQ_128BytesContiguous) {
1654 *MaxUncompressedBlockChroma = 256;
1655 *MaxCompressedBlockChroma = 128;
1656 *IndependentBlockChroma = 128;
1658 *MaxUncompressedBlockChroma = 256;
1659 *MaxCompressedBlockChroma = 64;
1660 *IndependentBlockChroma = 64;
1663 if (DCCEnabled != true || BytePerPixelC == 0) {
1664 *MaxUncompressedBlockChroma = 0;
1665 *MaxCompressedBlockChroma = 0;
1666 *IndependentBlockChroma = 0;
1669 if (DCCEnabled != true) {
1670 *MaxUncompressedBlockLuma = 0;
1671 *MaxCompressedBlockLuma = 0;
1672 *IndependentBlockLuma = 0;
1677 static double CalculatePrefetchSourceLines(
1678 struct display_mode_lib *mode_lib,
1682 bool ProgressiveToInterlaceUnitInOPP,
1683 unsigned int SwathHeight,
1684 unsigned int ViewportYStart,
1685 double *VInitPreFill,
1686 unsigned int *MaxNumSwath)
1688 unsigned int MaxPartialSwath = 0;
1690 if (ProgressiveToInterlaceUnitInOPP)
1691 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1693 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1695 if (!mode_lib->vba.IgnoreViewportPositioning) {
1697 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1699 if (*VInitPreFill > 1.0)
1700 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1702 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1704 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1708 if (ViewportYStart != 0)
1710 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1712 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1714 if (*VInitPreFill > 1.0)
1715 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1717 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1721 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1724 static unsigned int CalculateVMAndRowBytes(
1725 struct display_mode_lib *mode_lib,
1727 unsigned int BlockHeight256Bytes,
1728 unsigned int BlockWidth256Bytes,
1729 enum source_format_class SourcePixelFormat,
1730 unsigned int SurfaceTiling,
1731 unsigned int BytePerPixel,
1732 enum scan_direction_class ScanDirection,
1733 unsigned int SwathWidth,
1734 unsigned int ViewportHeight,
1737 unsigned int HostVMMaxNonCachedPageTableLevels,
1738 unsigned int GPUVMMinPageSize,
1739 unsigned int HostVMMinPageSize,
1740 unsigned int PTEBufferSizeInRequests,
1742 unsigned int DCCMetaPitch,
1743 unsigned int *MacroTileWidth,
1744 unsigned int *MetaRowByte,
1745 unsigned int *PixelPTEBytesPerRow,
1746 bool *PTEBufferSizeNotExceeded,
1747 unsigned int *dpte_row_width_ub,
1748 unsigned int *dpte_row_height,
1749 unsigned int *MetaRequestWidth,
1750 unsigned int *MetaRequestHeight,
1751 unsigned int *meta_row_width,
1752 unsigned int *meta_row_height,
1753 unsigned int *vm_group_bytes,
1754 unsigned int *dpte_group_bytes,
1755 unsigned int *PixelPTEReqWidth,
1756 unsigned int *PixelPTEReqHeight,
1757 unsigned int *PTERequestSize,
1758 unsigned int *DPDE0BytesFrame,
1759 unsigned int *MetaPTEBytesFrame)
1761 unsigned int MPDEBytesFrame = 0;
1762 unsigned int DCCMetaSurfaceBytes = 0;
1763 unsigned int MacroTileSizeBytes = 0;
1764 unsigned int MacroTileHeight = 0;
1765 unsigned int ExtraDPDEBytesFrame = 0;
1766 unsigned int PDEAndMetaPTEBytesFrame = 0;
1767 unsigned int PixelPTEReqHeightPTEs = 0;
1768 unsigned int HostVMDynamicLevels = 0;
1770 double FractionOfPTEReturnDrop;
1772 if (GPUVMEnable == true && HostVMEnable == true) {
1773 if (HostVMMinPageSize < 2048) {
1774 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1775 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1776 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1778 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1782 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1783 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1784 if (ScanDirection != dm_vert) {
1785 *meta_row_height = *MetaRequestHeight;
1786 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1787 + *MetaRequestWidth;
1788 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1790 *meta_row_height = *MetaRequestWidth;
1791 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1792 + *MetaRequestHeight;
1793 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1795 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1796 + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1797 if (GPUVMEnable == true) {
1798 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1799 MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
1801 *MetaPTEBytesFrame = 0;
1805 if (DCCEnable != true) {
1806 *MetaPTEBytesFrame = 0;
1811 if (SurfaceTiling == dm_sw_linear) {
1812 MacroTileSizeBytes = 256;
1813 MacroTileHeight = BlockHeight256Bytes;
1815 MacroTileSizeBytes = 65536;
1816 MacroTileHeight = 16 * BlockHeight256Bytes;
1818 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1820 if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
1821 if (ScanDirection != dm_vert) {
1822 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1824 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1826 ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
1828 *DPDE0BytesFrame = 0;
1829 ExtraDPDEBytesFrame = 0;
1832 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1833 + ExtraDPDEBytesFrame;
1835 if (HostVMEnable == true) {
1836 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1839 if (SurfaceTiling == dm_sw_linear) {
1840 PixelPTEReqHeightPTEs = 1;
1841 *PixelPTEReqHeight = 1;
1842 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1843 *PTERequestSize = 64;
1844 FractionOfPTEReturnDrop = 0;
1845 } else if (MacroTileSizeBytes == 4096) {
1846 PixelPTEReqHeightPTEs = 1;
1847 *PixelPTEReqHeight = MacroTileHeight;
1848 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1849 *PTERequestSize = 64;
1850 if (ScanDirection != dm_vert)
1851 FractionOfPTEReturnDrop = 0;
1853 FractionOfPTEReturnDrop = 7 / 8;
1854 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1855 PixelPTEReqHeightPTEs = 16;
1856 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1857 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1858 *PTERequestSize = 128;
1859 FractionOfPTEReturnDrop = 0;
1861 PixelPTEReqHeightPTEs = 1;
1862 *PixelPTEReqHeight = MacroTileHeight;
1863 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1864 *PTERequestSize = 64;
1865 FractionOfPTEReturnDrop = 0;
1868 if (SurfaceTiling == dm_sw_linear) {
1869 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1870 *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1871 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1872 } else if (ScanDirection != dm_vert) {
1873 *dpte_row_height = *PixelPTEReqHeight;
1874 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1875 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1877 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1878 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1879 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1881 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1882 <= 64 * PTEBufferSizeInRequests) {
1883 *PTEBufferSizeNotExceeded = true;
1885 *PTEBufferSizeNotExceeded = false;
1888 if (GPUVMEnable != true) {
1889 *PixelPTEBytesPerRow = 0;
1890 *PTEBufferSizeNotExceeded = true;
1892 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1894 if (HostVMEnable == true) {
1895 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1898 if (HostVMEnable == true) {
1899 *vm_group_bytes = 512;
1900 *dpte_group_bytes = 512;
1901 } else if (GPUVMEnable == true) {
1902 *vm_group_bytes = 2048;
1903 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1904 *dpte_group_bytes = 512;
1906 *dpte_group_bytes = 2048;
1909 *vm_group_bytes = 0;
1910 *dpte_group_bytes = 0;
1913 return PDEAndMetaPTEBytesFrame;
1916 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1917 struct display_mode_lib *mode_lib)
1919 struct vba_vars_st *v = &mode_lib->vba;
1921 long ReorderBytes = 0;
1922 unsigned int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1923 double MaxTotalRDBandwidth = 0;
1924 double MaxTotalRDBandwidthNoUrgentBurst = 0;
1925 bool DestinationLineTimesForPrefetchLessThan2 = false;
1926 bool VRatioPrefetchMoreThan4 = false;
1929 v->WritebackDISPCLK = 0.0;
1930 v->DISPCLKWithRamping = 0;
1931 v->DISPCLKWithoutRamping = 0;
1932 v->GlobalDPPCLK = 0.0;
1933 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
1934 v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] = dml_min3(
1935 v->ReturnBusWidth * v->DCFCLK,
1936 v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth,
1937 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
1938 if (v->HostVMEnable != true) {
1939 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
1941 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
1943 /* End DAL custom code */
1945 // DISPCLK and DPPCLK Calculation
1947 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1948 if (v->WritebackEnable[k]) {
1949 v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
1950 dml30_CalculateWriteBackDISPCLK(
1951 v->WritebackPixelFormat[k],
1953 v->WritebackHRatio[k],
1954 v->WritebackVRatio[k],
1955 v->WritebackHTaps[k],
1956 v->WritebackVTaps[k],
1957 v->WritebackSourceWidth[k],
1958 v->WritebackDestinationWidth[k],
1960 v->WritebackLineBufferSize));
1964 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1965 if (v->HRatio[k] > 1) {
1966 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1967 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
1969 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1970 v->MaxDCHUBToPSCLThroughput,
1971 v->MaxPSCLToLBThroughput);
1974 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
1975 * dml_max(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
1976 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
1978 if ((v->htaps[k] > 6 || v->vtaps[k] > 6)
1979 && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
1980 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
1983 if ((v->SourcePixelFormat[k] != dm_420_8
1984 && v->SourcePixelFormat[k] != dm_420_10
1985 && v->SourcePixelFormat[k] != dm_420_12
1986 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
1987 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1988 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
1990 if (v->HRatioChroma[k] > 1) {
1991 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1992 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
1994 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1995 v->MaxDCHUBToPSCLThroughput,
1996 v->MaxPSCLToLBThroughput);
1998 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
1999 * dml_max3(v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2000 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 1.0);
2002 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6)
2003 && v->DPPCLKUsingSingleDPPChroma
2004 < 2 * v->PixelClock[k]) {
2005 v->DPPCLKUsingSingleDPPChroma = 2
2009 v->DPPCLKUsingSingleDPP[k] = dml_max(
2010 v->DPPCLKUsingSingleDPPLuma,
2011 v->DPPCLKUsingSingleDPPChroma);
2015 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2016 if (v->BlendingAndTiming[k] != k)
2018 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2019 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2020 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2021 * (1 + v->DISPCLKRampingMargin / 100));
2022 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2023 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2024 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2025 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2026 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2027 * (1 + v->DISPCLKRampingMargin / 100));
2028 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2029 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2031 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2032 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2033 * (1 + v->DISPCLKRampingMargin / 100));
2034 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2035 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2039 v->DISPCLKWithRamping = dml_max(
2040 v->DISPCLKWithRamping,
2041 v->WritebackDISPCLK);
2042 v->DISPCLKWithoutRamping = dml_max(
2043 v->DISPCLKWithoutRamping,
2044 v->WritebackDISPCLK);
2046 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2047 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
2048 v->DISPCLKWithRamping,
2049 v->DISPCLKDPPCLKVCOSpeed);
2050 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
2051 v->DISPCLKWithoutRamping,
2052 v->DISPCLKDPPCLKVCOSpeed);
2053 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2054 v->soc.clock_limits[mode_lib->soc.num_states].dispclk_mhz,
2055 v->DISPCLKDPPCLKVCOSpeed);
2056 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity
2057 > v->MaxDispclkRoundedToDFSGranularity) {
2058 v->DISPCLK_calculated =
2059 v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2060 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity
2061 > v->MaxDispclkRoundedToDFSGranularity) {
2062 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2064 v->DISPCLK_calculated =
2065 v->DISPCLKWithRampingRoundedToDFSGranularity;
2067 v->DISPCLK = v->DISPCLK_calculated;
2068 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2070 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2071 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k]
2073 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2074 v->GlobalDPPCLK = dml_max(
2076 v->DPPCLK_calculated[k]);
2078 v->GlobalDPPCLK = RoundToDFSGranularityUp(
2080 v->DISPCLKDPPCLKVCOSpeed);
2081 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2082 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255
2084 v->DPPCLK_calculated[k] * 255.0
2087 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2088 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2091 // Urgent and B P-State/DRAM Clock Change Watermark
2092 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2093 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2095 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2096 CalculateBytePerPixelAnd256BBlockSizes(
2097 v->SourcePixelFormat[k],
2098 v->SurfaceTiling[k],
2099 &v->BytePerPixelY[k],
2100 &v->BytePerPixelC[k],
2101 &v->BytePerPixelDETY[k],
2102 &v->BytePerPixelDETC[k],
2103 &v->BlockHeight256BytesY[k],
2104 &v->BlockHeight256BytesC[k],
2105 &v->BlockWidth256BytesY[k],
2106 &v->BlockWidth256BytesC[k]);
2109 CalculateSwathWidth(
2111 v->NumberOfActivePlanes,
2112 v->SourcePixelFormat,
2120 v->ODMCombineEnabled,
2123 v->BlockHeight256BytesY,
2124 v->BlockHeight256BytesC,
2125 v->BlockWidth256BytesY,
2126 v->BlockWidth256BytesC,
2127 v->BlendingAndTiming,
2131 v->SwathWidthSingleDPPY,
2132 v->SwathWidthSingleDPPC,
2137 v->swath_width_luma_ub,
2138 v->swath_width_chroma_ub);
2141 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2142 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2143 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioChroma[k];
2144 DTRACE("read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2148 // DCFCLK Deep Sleep
2149 CalculateDCFCLKDeepSleep(
2151 v->NumberOfActivePlanes,
2162 v->PSCL_THROUGHPUT_LUMA,
2163 v->PSCL_THROUGHPUT_CHROMA,
2165 v->ReadBandwidthPlaneLuma,
2166 v->ReadBandwidthPlaneChroma,
2168 &v->DCFCLKDeepSleep);
2171 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2172 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2173 v->DSCCLK_calculated[k] = 0.0;
2175 if (v->OutputFormat[k] == dm_420)
2176 v->DSCFormatFactor = 2;
2177 else if (v->OutputFormat[k] == dm_444)
2178 v->DSCFormatFactor = 1;
2179 else if (v->OutputFormat[k] == dm_n422)
2180 v->DSCFormatFactor = 2;
2182 v->DSCFormatFactor = 1;
2183 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2184 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12
2185 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2186 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2187 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6
2188 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2190 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3
2191 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2196 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2197 double BPP = v->OutputBppPerState[k][v->VoltageLevel];
2199 if (v->DSCEnabled[k] && BPP != 0) {
2200 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2201 v->DSCDelay[k] = dscceComputeDelay(v->DSCInputBitPerComponent[k],
2203 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2204 v->NumberOfDSCSlices[k],
2207 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2208 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2209 v->DSCDelay[k] = 2 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2211 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2212 v->NumberOfDSCSlices[k] / 2.0,
2215 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2217 v->DSCDelay[k] = 4 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2219 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2220 v->NumberOfDSCSlices[k] / 4.0,
2223 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2225 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2231 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2232 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2233 if (j != k && v->BlendingAndTiming[k] == j
2234 && v->DSCEnabled[j])
2235 v->DSCDelay[k] = v->DSCDelay[j];
2238 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2239 unsigned int PDEAndMetaPTEBytesFrameY = 0;
2240 unsigned int PixelPTEBytesPerRowY = 0;
2241 unsigned int MetaRowByteY = 0;
2242 unsigned int MetaRowByteC = 0;
2243 unsigned int PDEAndMetaPTEBytesFrameC = 0;
2244 unsigned int PixelPTEBytesPerRowC = 0;
2245 bool PTEBufferSizeNotExceededY = 0;
2246 bool PTEBufferSizeNotExceededC = 0;
2249 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2250 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2251 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2252 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2254 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2255 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2258 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2261 v->BlockHeight256BytesC[k],
2262 v->BlockWidth256BytesC[k],
2263 v->SourcePixelFormat[k],
2264 v->SurfaceTiling[k],
2265 v->BytePerPixelC[k],
2268 v->ViewportHeightChroma[k],
2271 v->HostVMMaxNonCachedPageTableLevels,
2272 v->GPUVMMinPageSize,
2273 v->HostVMMinPageSize,
2274 v->PTEBufferSizeInRequestsForChroma,
2276 v->DCCMetaPitchC[k],
2277 &v->MacroTileWidthC[k],
2279 &PixelPTEBytesPerRowC,
2280 &PTEBufferSizeNotExceededC,
2281 &v->dpte_row_width_chroma_ub[k],
2282 &v->dpte_row_height_chroma[k],
2283 &v->meta_req_width_chroma[k],
2284 &v->meta_req_height_chroma[k],
2285 &v->meta_row_width_chroma[k],
2286 &v->meta_row_height_chroma[k],
2289 &v->PixelPTEReqWidthC[k],
2290 &v->PixelPTEReqHeightC[k],
2291 &v->PTERequestSizeC[k],
2292 &v->dpde0_bytes_per_frame_ub_c[k],
2293 &v->meta_pte_bytes_per_frame_ub_c[k]);
2295 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2300 v->ProgressiveToInterlaceUnitInOPP,
2302 v->ViewportYStartC[k],
2303 &v->VInitPreFillC[k],
2304 &v->MaxNumSwathC[k]);
2306 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2307 v->PTEBufferSizeInRequestsForChroma = 0;
2308 PixelPTEBytesPerRowC = 0;
2309 PDEAndMetaPTEBytesFrameC = 0;
2311 v->MaxNumSwathC[k] = 0;
2312 v->PrefetchSourceLinesC[k] = 0;
2315 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2318 v->BlockHeight256BytesY[k],
2319 v->BlockWidth256BytesY[k],
2320 v->SourcePixelFormat[k],
2321 v->SurfaceTiling[k],
2322 v->BytePerPixelY[k],
2325 v->ViewportHeight[k],
2328 v->HostVMMaxNonCachedPageTableLevels,
2329 v->GPUVMMinPageSize,
2330 v->HostVMMinPageSize,
2331 v->PTEBufferSizeInRequestsForLuma,
2333 v->DCCMetaPitchY[k],
2334 &v->MacroTileWidthY[k],
2336 &PixelPTEBytesPerRowY,
2337 &PTEBufferSizeNotExceededY,
2338 &v->dpte_row_width_luma_ub[k],
2339 &v->dpte_row_height[k],
2340 &v->meta_req_width[k],
2341 &v->meta_req_height[k],
2342 &v->meta_row_width[k],
2343 &v->meta_row_height[k],
2344 &v->vm_group_bytes[k],
2345 &v->dpte_group_bytes[k],
2346 &v->PixelPTEReqWidthY[k],
2347 &v->PixelPTEReqHeightY[k],
2348 &v->PTERequestSizeY[k],
2349 &v->dpde0_bytes_per_frame_ub_l[k],
2350 &v->meta_pte_bytes_per_frame_ub_l[k]);
2352 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2357 v->ProgressiveToInterlaceUnitInOPP,
2359 v->ViewportYStartY[k],
2360 &v->VInitPreFillY[k],
2361 &v->MaxNumSwathY[k]);
2362 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2363 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
2364 + PDEAndMetaPTEBytesFrameC;
2365 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2367 CalculateRowBandwidth(
2369 v->SourcePixelFormat[k],
2373 v->HTotal[k] / v->PixelClock[k],
2376 v->meta_row_height[k],
2377 v->meta_row_height_chroma[k],
2378 PixelPTEBytesPerRowY,
2379 PixelPTEBytesPerRowC,
2380 v->dpte_row_height[k],
2381 v->dpte_row_height_chroma[k],
2383 &v->dpte_row_bw[k]);
2386 v->TotalDCCActiveDPP = 0;
2387 v->TotalActiveDPP = 0;
2388 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2389 v->TotalActiveDPP = v->TotalActiveDPP
2390 + v->DPPPerPlane[k];
2391 if (v->DCCEnable[k])
2392 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP
2393 + v->DPPPerPlane[k];
2397 ReorderBytes = v->NumberOfChannels * dml_max3(
2398 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2399 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2400 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2402 v->UrgentExtraLatency = CalculateExtraLatency(
2403 v->RoundTripPingLatencyCycles,
2407 v->PixelChunkSizeInKByte,
2408 v->TotalDCCActiveDPP,
2413 v->NumberOfActivePlanes,
2415 v->dpte_group_bytes,
2416 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2417 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2418 v->HostVMMinPageSize,
2419 v->HostVMMaxNonCachedPageTableLevels);
2421 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2423 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2424 if (v->BlendingAndTiming[k] == k) {
2425 if (v->WritebackEnable[k] == true) {
2426 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency +
2427 CalculateWriteBackDelay(v->WritebackPixelFormat[k],
2428 v->WritebackHRatio[k],
2429 v->WritebackVRatio[k],
2430 v->WritebackVTaps[k],
2431 v->WritebackDestinationWidth[k],
2432 v->WritebackDestinationHeight[k],
2433 v->WritebackSourceHeight[k],
2434 v->HTotal[k]) / v->DISPCLK;
2436 v->WritebackDelay[v->VoltageLevel][k] = 0;
2437 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2438 if (v->BlendingAndTiming[j] == k
2439 && v->WritebackEnable[j] == true) {
2440 v->WritebackDelay[v->VoltageLevel][k] = dml_max(v->WritebackDelay[v->VoltageLevel][k],
2441 v->WritebackLatency + CalculateWriteBackDelay(
2442 v->WritebackPixelFormat[j],
2443 v->WritebackHRatio[j],
2444 v->WritebackVRatio[j],
2445 v->WritebackVTaps[j],
2446 v->WritebackDestinationWidth[j],
2447 v->WritebackDestinationHeight[j],
2448 v->WritebackSourceHeight[j],
2449 v->HTotal[k]) / v->DISPCLK);
2455 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2456 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2457 if (v->BlendingAndTiming[k] == j)
2458 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2460 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2461 v->MaxVStartupLines[k] = v->VTotal[k] - v->VActive[k] - dml_max(1.0, dml_ceil((double) v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1));
2464 v->MaximumMaxVStartupLines = 0;
2465 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2466 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2468 if (v->DRAMClockChangeLatencyOverride > 0.0) {
2469 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatencyOverride;
2471 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatency;
2473 v->UrgentLatency = CalculateUrgentLatency(v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock);
2476 v->FractionOfUrgentBandwidth = 0.0;
2477 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2479 v->VStartupLines = 13;
2482 MaxTotalRDBandwidth = 0;
2483 MaxTotalRDBandwidthNoUrgentBurst = 0;
2484 DestinationLineTimesForPrefetchLessThan2 = false;
2485 VRatioPrefetchMoreThan4 = false;
2486 TWait = CalculateTWait(
2488 v->FinalDRAMClockChangeLatency,
2490 v->SREnterPlusExitTime);
2492 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2493 Pipe myPipe = { 0 };
2495 myPipe.DPPCLK = v->DPPCLK[k];
2496 myPipe.DISPCLK = v->DISPCLK;
2497 myPipe.PixelClock = v->PixelClock[k];
2498 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2499 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2500 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2501 myPipe.SourceScan = v->SourceScan[k];
2502 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2503 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2504 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2505 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2506 myPipe.InterlaceEnable = v->Interlace[k];
2507 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2508 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2509 myPipe.HTotal = v->HTotal[k];
2510 myPipe.DCCEnable = v->DCCEnable[k];
2511 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
2513 v->ErrorResult[k] = CalculatePrefetchSchedule(
2515 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2516 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2519 v->DPPCLKDelaySubtotal
2520 + v->DPPCLKDelayCNVCFormater,
2522 v->DPPCLKDelaySCLLBOnly,
2523 v->DPPCLKDelayCNVCCursor,
2524 v->DISPCLKDelaySubtotal,
2525 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2527 v->MaxInterDCNTileRepeaters,
2528 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2529 v->MaxVStartupLines[k],
2530 v->GPUVMMaxPageTableLevels,
2533 v->HostVMMaxNonCachedPageTableLevels,
2534 v->HostVMMinPageSize,
2535 v->DynamicMetadataEnable[k],
2536 v->DynamicMetadataVMEnabled,
2537 v->DynamicMetadataLinesBeforeActiveRequired[k],
2538 v->DynamicMetadataTransmittedBytes[k],
2540 v->UrgentExtraLatency,
2542 v->PDEAndMetaPTEBytesFrame[k],
2544 v->PixelPTEBytesPerRow[k],
2545 v->PrefetchSourceLinesY[k],
2547 v->BytePerPixelY[k],
2548 v->VInitPreFillY[k],
2550 v->PrefetchSourceLinesC[k],
2552 v->BytePerPixelC[k],
2553 v->VInitPreFillC[k],
2555 v->swath_width_luma_ub[k],
2556 v->swath_width_chroma_ub[k],
2560 v->ProgressiveToInterlaceUnitInOPP,
2561 &v->DSTXAfterScaler[k],
2562 &v->DSTYAfterScaler[k],
2563 &v->DestinationLinesForPrefetch[k],
2564 &v->PrefetchBandwidth[k],
2565 &v->DestinationLinesToRequestVMInVBlank[k],
2566 &v->DestinationLinesToRequestRowInVBlank[k],
2567 &v->VRatioPrefetchY[k],
2568 &v->VRatioPrefetchC[k],
2569 &v->RequiredPrefetchPixDataBWLuma[k],
2570 &v->RequiredPrefetchPixDataBWChroma[k],
2571 &v->NotEnoughTimeForDynamicMetadata[k],
2573 &v->prefetch_vmrow_bw[k],
2576 &v->VUpdateOffsetPix[k],
2577 &v->VUpdateWidthPix[k],
2578 &v->VReadyOffsetPix[k]);
2579 if (v->BlendingAndTiming[k] == k) {
2580 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2581 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k];
2582 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[k];
2583 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[k] / 4.0, 1);
2584 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2586 int x = v->BlendingAndTiming[k];
2587 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2588 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[x];
2589 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[x];
2590 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[x] / 4.0, 1);
2591 if (!v->MaxVStartupLines[x])
2592 v->MaxVStartupLines[x] = v->MaxVStartupLines[k];
2593 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[x]);
2597 v->NotEnoughUrgentLatencyHiding = false;
2598 v->NotEnoughUrgentLatencyHidingPre = false;
2600 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2601 v->cursor_bw[k] = v->NumberOfCursors[k]
2602 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2604 / (v->HTotal[k] / v->PixelClock[k])
2606 v->cursor_bw_pre[k] = v->NumberOfCursors[k]
2607 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2609 / (v->HTotal[k] / v->PixelClock[k])
2610 * v->VRatioPrefetchY[k];
2612 CalculateUrgentBurstFactor(
2613 v->swath_width_luma_ub[k],
2614 v->swath_width_chroma_ub[k],
2615 v->DETBufferSizeInKByte,
2618 v->HTotal[k] / v->PixelClock[k],
2620 v->CursorBufferSize,
2621 v->CursorWidth[k][0],
2625 v->BytePerPixelDETY[k],
2626 v->BytePerPixelDETC[k],
2627 v->DETBufferSizeY[k],
2628 v->DETBufferSizeC[k],
2629 &v->UrgentBurstFactorCursor[k],
2630 &v->UrgentBurstFactorLuma[k],
2631 &v->UrgentBurstFactorChroma[k],
2632 &v->NoUrgentLatencyHiding[k]);
2634 CalculateUrgentBurstFactor(
2635 v->swath_width_luma_ub[k],
2636 v->swath_width_chroma_ub[k],
2637 v->DETBufferSizeInKByte,
2640 v->HTotal[k] / v->PixelClock[k],
2642 v->CursorBufferSize,
2643 v->CursorWidth[k][0],
2645 v->VRatioPrefetchY[k],
2646 v->VRatioPrefetchC[k],
2647 v->BytePerPixelDETY[k],
2648 v->BytePerPixelDETC[k],
2649 v->DETBufferSizeY[k],
2650 v->DETBufferSizeC[k],
2651 &v->UrgentBurstFactorCursorPre[k],
2652 &v->UrgentBurstFactorLumaPre[k],
2653 &v->UrgentBurstFactorChromaPre[k],
2654 &v->NoUrgentLatencyHidingPre[k]);
2656 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2657 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2658 v->ReadBandwidthPlaneLuma[k] *
2659 v->UrgentBurstFactorLuma[k] +
2660 v->ReadBandwidthPlaneChroma[k] *
2661 v->UrgentBurstFactorChroma[k] +
2663 v->UrgentBurstFactorCursor[k] +
2664 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2665 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2666 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] *
2667 v->UrgentBurstFactorCursorPre[k]);
2669 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2670 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2671 v->ReadBandwidthPlaneLuma[k] +
2672 v->ReadBandwidthPlaneChroma[k] +
2674 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2675 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2677 if (v->DestinationLinesForPrefetch[k] < 2)
2678 DestinationLineTimesForPrefetchLessThan2 = true;
2679 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2680 VRatioPrefetchMoreThan4 = true;
2681 if (v->NoUrgentLatencyHiding[k] == true)
2682 v->NotEnoughUrgentLatencyHiding = true;
2684 if (v->NoUrgentLatencyHidingPre[k] == true)
2685 v->NotEnoughUrgentLatencyHidingPre = true;
2687 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2690 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding == 0 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2691 && !DestinationLineTimesForPrefetchLessThan2)
2692 v->PrefetchModeSupported = true;
2694 v->PrefetchModeSupported = false;
2695 dml_print("DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2696 dml_print("DML: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", MaxTotalRDBandwidth, v->ReturnBW);
2697 dml_print("DML: VRatioPrefetch %s more than 4\n", (VRatioPrefetchMoreThan4) ? "is" : "is not");
2698 dml_print("DML: DestinationLines for Prefetch %s less than 2\n", (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2701 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2702 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2703 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2704 v->BandwidthAvailableForImmediateFlip =
2705 v->BandwidthAvailableForImmediateFlip
2707 v->ReadBandwidthPlaneLuma[k] * v->UrgentBurstFactorLuma[k]
2708 + v->ReadBandwidthPlaneChroma[k] * v->UrgentBurstFactorChroma[k]
2709 + v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2710 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2711 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2712 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2715 v->TotImmediateFlipBytes = 0;
2716 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2717 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2719 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2720 CalculateFlipSchedule(
2722 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2723 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2724 v->UrgentExtraLatency,
2726 v->GPUVMMaxPageTableLevels,
2728 v->HostVMMaxNonCachedPageTableLevels,
2730 v->HostVMMinPageSize,
2731 v->PDEAndMetaPTEBytesFrame[k],
2733 v->PixelPTEBytesPerRow[k],
2734 v->BandwidthAvailableForImmediateFlip,
2735 v->TotImmediateFlipBytes,
2736 v->SourcePixelFormat[k],
2737 v->HTotal[k] / v->PixelClock[k],
2742 v->dpte_row_height[k],
2743 v->meta_row_height[k],
2744 v->dpte_row_height_chroma[k],
2745 v->meta_row_height_chroma[k],
2746 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2747 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2748 &v->final_flip_bw[k],
2749 &v->ImmediateFlipSupportedForPipe[k]);
2751 v->total_dcn_read_bw_with_flip = 0.0;
2752 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2753 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2754 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3(
2755 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2756 v->DPPPerPlane[k] * v->final_flip_bw[k] +
2757 v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] +
2758 v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k] +
2759 v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2760 v->DPPPerPlane[k] * (v->final_flip_bw[k] +
2761 v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2762 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2763 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2764 v->total_dcn_read_bw_with_flip_no_urgent_burst =
2765 v->total_dcn_read_bw_with_flip_no_urgent_burst +
2766 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2767 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2768 v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2771 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2773 v->ImmediateFlipSupported = true;
2774 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2775 v->ImmediateFlipSupported = false;
2776 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2778 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2779 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2780 v->ImmediateFlipSupported = false;
2784 v->ImmediateFlipSupported = false;
2787 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2788 if (v->ErrorResult[k] || v->NotEnoughTimeForDynamicMetadata[k]) {
2789 v->PrefetchModeSupported = false;
2790 dml_print("DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2794 v->VStartupLines = v->VStartupLines + 1;
2795 v->PrefetchAndImmediateFlipSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable && v->ImmediateFlipRequirement != dm_immediate_flip_required) || v->ImmediateFlipSupported)) ? true : false;
2797 } while (!v->PrefetchModeSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2798 ASSERT(v->PrefetchModeSupported);
2800 //Watermarks and NB P-State/DRAM Clock Change Support
2802 enum clock_change_support DRAMClockChangeSupport = 0; // dummy
2803 CalculateWatermarksAndDRAMSpeedChangeSupport(
2806 v->NumberOfActivePlanes,
2807 v->MaxLineBufferLines,
2809 v->DPPOutputBufferPixels,
2810 v->DETBufferSizeInKByte,
2811 v->WritebackInterfaceBufferSize,
2815 v->dpte_group_bytes,
2818 v->UrgentExtraLatency,
2819 v->WritebackLatency,
2820 v->WritebackChunkSize,
2822 v->FinalDRAMClockChangeLatency,
2824 v->SREnterPlusExitTime,
2844 v->BlendingAndTiming,
2845 v->BytePerPixelDETY,
2846 v->BytePerPixelDETC,
2850 v->WritebackPixelFormat,
2851 v->WritebackDestinationWidth,
2852 v->WritebackDestinationHeight,
2853 v->WritebackSourceHeight,
2854 &DRAMClockChangeSupport,
2855 &v->UrgentWatermark,
2856 &v->WritebackUrgentWatermark,
2857 &v->DRAMClockChangeWatermark,
2858 &v->WritebackDRAMClockChangeWatermark,
2859 &v->StutterExitWatermark,
2860 &v->StutterEnterPlusExitWatermark,
2861 &v->MinActiveDRAMClockChangeLatencySupported);
2863 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2864 if (v->WritebackEnable[k] == true) {
2865 if (v->BlendingAndTiming[k] == k) {
2866 v->ThisVStartup = v->VStartup[k];
2868 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2869 if (v->BlendingAndTiming[k] == j) {
2870 v->ThisVStartup = v->VStartup[j];
2874 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
2875 v->ThisVStartup * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2877 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2884 //Display Pipeline Delivery Time in Prefetch, Groups
2885 CalculatePixelDeliveryTimes(
2886 v->NumberOfActivePlanes,
2891 v->swath_width_luma_ub,
2892 v->swath_width_chroma_ub,
2897 v->PSCL_THROUGHPUT_LUMA,
2898 v->PSCL_THROUGHPUT_CHROMA,
2905 v->BlockWidth256BytesY,
2906 v->BlockHeight256BytesY,
2907 v->BlockWidth256BytesC,
2908 v->BlockHeight256BytesC,
2909 v->DisplayPipeLineDeliveryTimeLuma,
2910 v->DisplayPipeLineDeliveryTimeChroma,
2911 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
2912 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
2913 v->DisplayPipeRequestDeliveryTimeLuma,
2914 v->DisplayPipeRequestDeliveryTimeChroma,
2915 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2916 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
2917 v->CursorRequestDeliveryTime,
2918 v->CursorRequestDeliveryTimePrefetch);
2920 CalculateMetaAndPTETimes(
2921 v->NumberOfActivePlanes,
2924 v->MinMetaChunkSizeBytes,
2928 v->DestinationLinesToRequestRowInVBlank,
2929 v->DestinationLinesToRequestRowInImmediateFlip,
2936 v->dpte_row_height_chroma,
2938 v->meta_row_width_chroma,
2940 v->meta_row_height_chroma,
2942 v->meta_req_width_chroma,
2944 v->meta_req_height_chroma,
2945 v->dpte_group_bytes,
2948 v->PixelPTEReqWidthY,
2949 v->PixelPTEReqHeightY,
2950 v->PixelPTEReqWidthC,
2951 v->PixelPTEReqHeightC,
2952 v->dpte_row_width_luma_ub,
2953 v->dpte_row_width_chroma_ub,
2954 v->DST_Y_PER_PTE_ROW_NOM_L,
2955 v->DST_Y_PER_PTE_ROW_NOM_C,
2956 v->DST_Y_PER_META_ROW_NOM_L,
2957 v->DST_Y_PER_META_ROW_NOM_C,
2958 v->TimePerMetaChunkNominal,
2959 v->TimePerChromaMetaChunkNominal,
2960 v->TimePerMetaChunkVBlank,
2961 v->TimePerChromaMetaChunkVBlank,
2962 v->TimePerMetaChunkFlip,
2963 v->TimePerChromaMetaChunkFlip,
2964 v->time_per_pte_group_nom_luma,
2965 v->time_per_pte_group_vblank_luma,
2966 v->time_per_pte_group_flip_luma,
2967 v->time_per_pte_group_nom_chroma,
2968 v->time_per_pte_group_vblank_chroma,
2969 v->time_per_pte_group_flip_chroma);
2971 CalculateVMGroupAndRequestTimes(
2972 v->NumberOfActivePlanes,
2974 v->GPUVMMaxPageTableLevels,
2977 v->DestinationLinesToRequestVMInVBlank,
2978 v->DestinationLinesToRequestVMInImmediateFlip,
2981 v->dpte_row_width_luma_ub,
2982 v->dpte_row_width_chroma_ub,
2984 v->dpde0_bytes_per_frame_ub_l,
2985 v->dpde0_bytes_per_frame_ub_c,
2986 v->meta_pte_bytes_per_frame_ub_l,
2987 v->meta_pte_bytes_per_frame_ub_c,
2988 v->TimePerVMGroupVBlank,
2989 v->TimePerVMGroupFlip,
2990 v->TimePerVMRequestVBlank,
2991 v->TimePerVMRequestFlip);
2995 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2996 if (PrefetchMode == 0) {
2997 v->AllowDRAMClockChangeDuringVBlank[k] = true;
2998 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2999 v->MinTTUVBlank[k] = dml_max(
3000 v->DRAMClockChangeWatermark,
3002 v->StutterEnterPlusExitWatermark,
3003 v->UrgentWatermark));
3004 } else if (PrefetchMode == 1) {
3005 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3006 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3007 v->MinTTUVBlank[k] = dml_max(
3008 v->StutterEnterPlusExitWatermark,
3009 v->UrgentWatermark);
3011 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3012 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3013 v->MinTTUVBlank[k] = v->UrgentWatermark;
3015 if (!v->DynamicMetadataEnable[k])
3016 v->MinTTUVBlank[k] = v->TCalc
3017 + v->MinTTUVBlank[k];
3020 // DCC Configuration
3022 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3023 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3024 v->SourcePixelFormat[k],
3025 v->SurfaceWidthY[k],
3026 v->SurfaceWidthC[k],
3027 v->SurfaceHeightY[k],
3028 v->SurfaceHeightC[k],
3029 v->DETBufferSizeInKByte * 1024,
3030 v->BlockHeight256BytesY[k],
3031 v->BlockHeight256BytesC[k],
3032 v->SurfaceTiling[k],
3033 v->BytePerPixelY[k],
3034 v->BytePerPixelC[k],
3035 v->BytePerPixelDETY[k],
3036 v->BytePerPixelDETC[k],
3038 &v->DCCYMaxUncompressedBlock[k],
3039 &v->DCCCMaxUncompressedBlock[k],
3040 &v->DCCYMaxCompressedBlock[k],
3041 &v->DCCCMaxCompressedBlock[k],
3042 &v->DCCYIndependentBlock[k],
3043 &v->DCCCIndependentBlock[k]);
3047 //Maximum Bandwidth Used
3048 double TotalWRBandwidth = 0;
3049 double MaxPerPlaneVActiveWRBandwidth = 0;
3050 double WRBandwidth = 0;
3051 double MaxUsedBW = 0;
3052 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3053 if (v->WritebackEnable[k] == true
3054 && v->WritebackPixelFormat[k] == dm_444_32) {
3055 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3056 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3057 } else if (v->WritebackEnable[k] == true) {
3058 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3059 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3061 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3062 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3065 v->TotalDataReadBandwidth = 0;
3066 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3067 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
3068 + v->ReadBandwidthPlaneLuma[k]
3069 + v->ReadBandwidthPlaneChroma[k];
3073 double MaxPerPlaneVActiveRDBandwidth = 0;
3074 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3075 MaxPerPlaneVActiveRDBandwidth = dml_max(MaxPerPlaneVActiveRDBandwidth,
3076 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
3081 MaxUsedBW = MaxTotalRDBandwidth + TotalWRBandwidth;
3085 v->VStartupMargin = 0;
3086 v->FirstMainPlane = true;
3087 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3088 if (v->BlendingAndTiming[k] == k) {
3089 double margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k]
3091 if (v->FirstMainPlane == true) {
3092 v->VStartupMargin = margin;
3093 v->FirstMainPlane = false;
3095 v->VStartupMargin = dml_min(v->VStartupMargin, margin);
3100 // Stutter Efficiency
3101 CalculateStutterEfficiency(
3102 v->NumberOfActivePlanes,
3103 v->ROBBufferSizeInKByte,
3104 v->TotalDataReadBandwidth,
3108 v->SynchronizedVBlank,
3112 v->BytePerPixelDETY,
3123 v->BlockHeight256BytesY,
3124 v->BlockWidth256BytesY,
3125 v->BlockHeight256BytesC,
3126 v->BlockWidth256BytesC,
3127 v->DCCYMaxUncompressedBlock,
3128 v->DCCCMaxUncompressedBlock,
3132 v->ReadBandwidthPlaneLuma,
3133 v->ReadBandwidthPlaneChroma,
3136 &v->StutterEfficiencyNotIncludingVBlank,
3137 &v->StutterEfficiency);
3140 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3142 // Display Pipe Configuration
3143 double BytePerPixDETY[DC__NUM_DPP__MAX] = { 0 };
3144 double BytePerPixDETC[DC__NUM_DPP__MAX] = { 0 };
3145 int BytePerPixY[DC__NUM_DPP__MAX] = { 0 };
3146 int BytePerPixC[DC__NUM_DPP__MAX] = { 0 };
3147 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX] = { 0 };
3148 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX] = { 0 };
3149 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX] = { 0 };
3150 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX] = { 0 };
3151 double dummy1[DC__NUM_DPP__MAX] = { 0 };
3152 double dummy2[DC__NUM_DPP__MAX] = { 0 };
3153 double dummy3[DC__NUM_DPP__MAX] = { 0 };
3154 double dummy4[DC__NUM_DPP__MAX] = { 0 };
3155 int dummy5[DC__NUM_DPP__MAX] = { 0 };
3156 int dummy6[DC__NUM_DPP__MAX] = { 0 };
3157 bool dummy7[DC__NUM_DPP__MAX] = { 0 };
3158 bool dummysinglestring = 0;
3161 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
3163 CalculateBytePerPixelAnd256BBlockSizes(
3164 mode_lib->vba.SourcePixelFormat[k],
3165 mode_lib->vba.SurfaceTiling[k],
3170 &Read256BytesBlockHeightY[k],
3171 &Read256BytesBlockHeightC[k],
3172 &Read256BytesBlockWidthY[k],
3173 &Read256BytesBlockWidthC[k]);
3175 CalculateSwathAndDETConfiguration(
3177 mode_lib->vba.NumberOfActivePlanes,
3178 mode_lib->vba.DETBufferSizeInKByte,
3181 mode_lib->vba.SourceScan,
3182 mode_lib->vba.SourcePixelFormat,
3183 mode_lib->vba.SurfaceTiling,
3184 mode_lib->vba.ViewportWidth,
3185 mode_lib->vba.ViewportHeight,
3186 mode_lib->vba.SurfaceWidthY,
3187 mode_lib->vba.SurfaceWidthC,
3188 mode_lib->vba.SurfaceHeightY,
3189 mode_lib->vba.SurfaceHeightC,
3190 Read256BytesBlockHeightY,
3191 Read256BytesBlockHeightC,
3192 Read256BytesBlockWidthY,
3193 Read256BytesBlockWidthC,
3194 mode_lib->vba.ODMCombineEnabled,
3195 mode_lib->vba.BlendingAndTiming,
3200 mode_lib->vba.HActive,
3201 mode_lib->vba.HRatio,
3202 mode_lib->vba.HRatioChroma,
3203 mode_lib->vba.DPPPerPlane,
3208 mode_lib->vba.SwathHeightY,
3209 mode_lib->vba.SwathHeightC,
3210 mode_lib->vba.DETBufferSizeY,
3211 mode_lib->vba.DETBufferSizeC,
3213 &dummysinglestring);
3216 static bool CalculateBytePerPixelAnd256BBlockSizes(
3217 enum source_format_class SourcePixelFormat,
3218 enum dm_swizzle_mode SurfaceTiling,
3219 unsigned int *BytePerPixelY,
3220 unsigned int *BytePerPixelC,
3221 double *BytePerPixelDETY,
3222 double *BytePerPixelDETC,
3223 unsigned int *BlockHeight256BytesY,
3224 unsigned int *BlockHeight256BytesC,
3225 unsigned int *BlockWidth256BytesY,
3226 unsigned int *BlockWidth256BytesC)
3228 if (SourcePixelFormat == dm_444_64) {
3229 *BytePerPixelDETY = 8;
3230 *BytePerPixelDETC = 0;
3233 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3234 *BytePerPixelDETY = 4;
3235 *BytePerPixelDETC = 0;
3238 } else if (SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_16) {
3239 *BytePerPixelDETY = 2;
3240 *BytePerPixelDETC = 0;
3243 } else if (SourcePixelFormat == dm_444_8) {
3244 *BytePerPixelDETY = 1;
3245 *BytePerPixelDETC = 0;
3248 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3249 *BytePerPixelDETY = 4;
3250 *BytePerPixelDETC = 1;
3253 } else if (SourcePixelFormat == dm_420_8) {
3254 *BytePerPixelDETY = 1;
3255 *BytePerPixelDETC = 2;
3258 } else if (SourcePixelFormat == dm_420_12) {
3259 *BytePerPixelDETY = 2;
3260 *BytePerPixelDETC = 4;
3264 *BytePerPixelDETY = 4.0 / 3;
3265 *BytePerPixelDETC = 8.0 / 3;
3270 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
3271 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
3272 || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
3273 || SourcePixelFormat == dm_rgbe)) {
3274 if (SurfaceTiling == dm_sw_linear) {
3275 *BlockHeight256BytesY = 1;
3276 } else if (SourcePixelFormat == dm_444_64) {
3277 *BlockHeight256BytesY = 4;
3278 } else if (SourcePixelFormat == dm_444_8) {
3279 *BlockHeight256BytesY = 16;
3281 *BlockHeight256BytesY = 8;
3283 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3284 *BlockHeight256BytesC = 0;
3285 *BlockWidth256BytesC = 0;
3287 if (SurfaceTiling == dm_sw_linear) {
3288 *BlockHeight256BytesY = 1;
3289 *BlockHeight256BytesC = 1;
3290 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3291 *BlockHeight256BytesY = 8;
3292 *BlockHeight256BytesC = 16;
3293 } else if (SourcePixelFormat == dm_420_8) {
3294 *BlockHeight256BytesY = 16;
3295 *BlockHeight256BytesC = 8;
3297 *BlockHeight256BytesY = 8;
3298 *BlockHeight256BytesC = 8;
3300 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3301 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3306 static double CalculateTWait(
3307 unsigned int PrefetchMode,
3308 double DRAMClockChangeLatency,
3309 double UrgentLatency,
3310 double SREnterPlusExitTime)
3312 if (PrefetchMode == 0) {
3313 return dml_max(DRAMClockChangeLatency + UrgentLatency,
3314 dml_max(SREnterPlusExitTime, UrgentLatency));
3315 } else if (PrefetchMode == 1) {
3316 return dml_max(SREnterPlusExitTime, UrgentLatency);
3318 return UrgentLatency;
3322 double dml30_CalculateWriteBackDISPCLK(
3323 enum source_format_class WritebackPixelFormat,
3325 double WritebackHRatio,
3326 double WritebackVRatio,
3327 unsigned int WritebackHTaps,
3328 unsigned int WritebackVTaps,
3329 long WritebackSourceWidth,
3330 long WritebackDestinationWidth,
3331 unsigned int HTotal,
3332 unsigned int WritebackLineBufferSize)
3334 double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0;
3336 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3337 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3338 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3339 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3342 static double CalculateWriteBackDelay(
3343 enum source_format_class WritebackPixelFormat,
3344 double WritebackHRatio,
3345 double WritebackVRatio,
3346 unsigned int WritebackVTaps,
3347 long WritebackDestinationWidth,
3348 long WritebackDestinationHeight,
3349 long WritebackSourceHeight,
3350 unsigned int HTotal)
3352 double CalculateWriteBackDelay = 0;
3353 double Line_length = 0;
3354 double Output_lines_last_notclamped = 0;
3355 double WritebackVInit = 0;
3357 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3358 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3359 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3360 if (Output_lines_last_notclamped < 0) {
3361 CalculateWriteBackDelay = 0;
3363 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3365 return CalculateWriteBackDelay;
3369 static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
3370 double DCFClkDeepSleep, double PixelClock, long HTotal, long VBlank, long DynamicMetadataTransmittedBytes,
3371 long DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
3372 double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
3374 double TotalRepeaterDelayTime = 0;
3375 double VUpdateWidthPix = 0;
3376 double VReadyOffsetPix = 0;
3377 double VUpdateOffsetPix = 0;
3378 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3379 VUpdateWidthPix = (14 / DCFClkDeepSleep + 12 / DPPCLK + TotalRepeaterDelayTime) * PixelClock;
3380 VReadyOffsetPix = dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20 / DCFClkDeepSleep + 10 / DPPCLK) * PixelClock;
3381 VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3382 *Tsetup = (VUpdateOffsetPix + VUpdateWidthPix + VReadyOffsetPix) / PixelClock;
3383 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3384 *Tdmec = HTotal / PixelClock;
3385 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3386 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3388 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3390 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3391 *Tdmsks = *Tdmsks / 2;
3395 static void CalculateRowBandwidth(
3397 enum source_format_class SourcePixelFormat,
3399 double VRatioChroma,
3402 unsigned int MetaRowByteLuma,
3403 unsigned int MetaRowByteChroma,
3404 unsigned int meta_row_height_luma,
3405 unsigned int meta_row_height_chroma,
3406 unsigned int PixelPTEBytesPerRowLuma,
3407 unsigned int PixelPTEBytesPerRowChroma,
3408 unsigned int dpte_row_height_luma,
3409 unsigned int dpte_row_height_chroma,
3410 double *meta_row_bw,
3411 double *dpte_row_bw)
3413 if (DCCEnable != true) {
3415 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3416 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3417 + VRatioChroma * MetaRowByteChroma
3418 / (meta_row_height_chroma * LineTime);
3420 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3423 if (GPUVMEnable != true) {
3425 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3426 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3427 + VRatioChroma * PixelPTEBytesPerRowChroma
3428 / (dpte_row_height_chroma * LineTime);
3430 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3434 static void CalculateFlipSchedule(
3435 struct display_mode_lib *mode_lib,
3436 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3437 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3438 double UrgentExtraLatency,
3439 double UrgentLatency,
3440 unsigned int GPUVMMaxPageTableLevels,
3442 unsigned int HostVMMaxNonCachedPageTableLevels,
3444 double HostVMMinPageSize,
3445 double PDEAndMetaPTEBytesPerFrame,
3446 double MetaRowBytes,
3447 double DPTEBytesPerRow,
3448 double BandwidthAvailableForImmediateFlip,
3449 unsigned int TotImmediateFlipBytes,
3450 enum source_format_class SourcePixelFormat,
3453 double VRatioChroma,
3456 unsigned int dpte_row_height,
3457 unsigned int meta_row_height,
3458 unsigned int dpte_row_height_chroma,
3459 unsigned int meta_row_height_chroma,
3460 double *DestinationLinesToRequestVMInImmediateFlip,
3461 double *DestinationLinesToRequestRowInImmediateFlip,
3462 double *final_flip_bw,
3463 bool *ImmediateFlipSupportedForPipe)
3465 double min_row_time = 0.0;
3466 unsigned int HostVMDynamicLevelsTrips = 0;
3467 double TimeForFetchingMetaPTEImmediateFlip = 0;
3468 double TimeForFetchingRowInVBlankImmediateFlip = 0;
3469 double ImmediateFlipBW = 0;
3470 double HostVMInefficiencyFactor = 0;
3472 if (GPUVMEnable == true && HostVMEnable == true) {
3473 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3474 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3476 HostVMInefficiencyFactor = 1;
3477 HostVMDynamicLevelsTrips = 0;
3480 if (GPUVMEnable == true || DCCEnable == true) {
3481 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3484 if (GPUVMEnable == true) {
3485 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3486 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0);
3488 TimeForFetchingMetaPTEImmediateFlip = 0;
3491 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3492 if ((GPUVMEnable == true || DCCEnable == true)) {
3493 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3494 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4);
3496 TimeForFetchingRowInVBlankImmediateFlip = 0;
3499 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3501 if (GPUVMEnable == true) {
3502 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3503 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3504 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3505 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3511 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3512 if (GPUVMEnable == true && DCCEnable != true) {
3513 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3514 } else if (GPUVMEnable != true && DCCEnable == true) {
3515 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3517 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio,
3518 dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
3521 if (GPUVMEnable == true && DCCEnable != true) {
3522 min_row_time = dpte_row_height * LineTime / VRatio;
3523 } else if (GPUVMEnable != true && DCCEnable == true) {
3524 min_row_time = meta_row_height * LineTime / VRatio;
3526 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3530 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3531 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3532 *ImmediateFlipSupportedForPipe = false;
3534 *ImmediateFlipSupportedForPipe = true;
3538 static double TruncToValidBPP(
3546 enum output_encoder_class Output,
3547 enum output_format_class Format,
3548 unsigned int DSCInputBitPerComponent,
3552 enum odm_combine_mode ODMCombine)
3554 double MaxLinkBPP = 0;
3556 double MaxDSCBPP = 0;
3561 if (Format == dm_420) {
3566 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3567 } else if (Format == dm_444) {
3572 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3574 if (Output == dm_hdmi) {
3585 if (Format == dm_n422) {
3587 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3591 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3595 if (DSCEnable && Output == dm_dp) {
3596 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3598 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3601 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3603 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3608 if (DesiredBPP == 0) {
3610 if (MaxLinkBPP < MinDSCBPP) {
3612 } else if (MaxLinkBPP >= MaxDSCBPP) {
3615 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3618 if (MaxLinkBPP >= NonDSCBPP2) {
3620 } else if (MaxLinkBPP >= NonDSCBPP1) {
3622 } else if (MaxLinkBPP >= NonDSCBPP0) {
3629 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
3630 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3639 void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3641 struct vba_vars_st *v = &mode_lib->vba;
3642 int MinPrefetchMode = 0;
3643 int MaxPrefetchMode = 2;
3645 unsigned int j, k, m;
3646 bool EnoughWritebackUnits = true;
3647 bool WritebackModeSupport = true;
3648 bool ViewportExceedsSurface = false;
3649 double MaxTotalVActiveRDBandwidth = 0;
3650 long ReorderingBytes = 0;
3651 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX] = { 0 };
3653 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3655 /*Scale Ratio, taps Support Check*/
3657 v->ScaleRatioAndTapsSupport = true;
3658 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3659 if (v->ScalerEnabled[k] == false
3660 && ((v->SourcePixelFormat[k] != dm_444_64
3661 && v->SourcePixelFormat[k] != dm_444_32
3662 && v->SourcePixelFormat[k] != dm_444_16
3663 && v->SourcePixelFormat[k] != dm_mono_16
3664 && v->SourcePixelFormat[k] != dm_mono_8
3665 && v->SourcePixelFormat[k] != dm_rgbe
3666 && v->SourcePixelFormat[k] != dm_rgbe_alpha)
3667 || v->HRatio[k] != 1.0
3668 || v->htaps[k] != 1.0
3669 || v->VRatio[k] != 1.0
3670 || v->vtaps[k] != 1.0)) {
3671 v->ScaleRatioAndTapsSupport = false;
3672 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0
3673 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3674 || (v->htaps[k] > 1.0
3675 && (v->htaps[k] % 2) == 1)
3676 || v->HRatio[k] > v->MaxHSCLRatio
3677 || v->VRatio[k] > v->MaxVSCLRatio
3678 || v->HRatio[k] > v->htaps[k]
3679 || v->VRatio[k] > v->vtaps[k]
3680 || (v->SourcePixelFormat[k] != dm_444_64
3681 && v->SourcePixelFormat[k] != dm_444_32
3682 && v->SourcePixelFormat[k] != dm_444_16
3683 && v->SourcePixelFormat[k] != dm_mono_16
3684 && v->SourcePixelFormat[k] != dm_mono_8
3685 && v->SourcePixelFormat[k] != dm_rgbe
3686 && (v->VTAPsChroma[k] < 1
3687 || v->VTAPsChroma[k] > 8
3688 || v->HTAPsChroma[k] < 1
3689 || v->HTAPsChroma[k] > 8
3690 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3691 || v->HRatioChroma[k] > v->MaxHSCLRatio
3692 || v->VRatioChroma[k] > v->MaxVSCLRatio
3693 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3694 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3695 v->ScaleRatioAndTapsSupport = false;
3698 /*Source Format, Pixel Format and Scan Support Check*/
3700 v->SourceFormatPixelAndScanSupport = true;
3701 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3702 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3703 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t || v->SurfaceTiling[k] == dm_sw_64kb_d_x)
3704 && !(v->SourcePixelFormat[k] == dm_444_64))) {
3705 v->SourceFormatPixelAndScanSupport = false;
3708 /*Bandwidth Support Check*/
3710 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3711 CalculateBytePerPixelAnd256BBlockSizes(
3712 v->SourcePixelFormat[k],
3713 v->SurfaceTiling[k],
3714 &v->BytePerPixelY[k],
3715 &v->BytePerPixelC[k],
3716 &v->BytePerPixelInDETY[k],
3717 &v->BytePerPixelInDETC[k],
3718 &v->Read256BlockHeightY[k],
3719 &v->Read256BlockHeightC[k],
3720 &v->Read256BlockWidthY[k],
3721 &v->Read256BlockWidthC[k]);
3723 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3724 if (v->SourceScan[k] != dm_vert) {
3725 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3726 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3728 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3729 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3732 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3733 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3734 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3736 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3737 if (v->WritebackEnable[k] == true
3738 && v->WritebackPixelFormat[k] == dm_444_64) {
3739 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3740 * v->WritebackDestinationHeight[k]
3741 / (v->WritebackSourceHeight[k]
3743 / v->PixelClock[k]) * 8.0;
3744 } else if (v->WritebackEnable[k] == true) {
3745 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3746 * v->WritebackDestinationHeight[k]
3747 / (v->WritebackSourceHeight[k]
3749 / v->PixelClock[k]) * 4.0;
3751 v->WriteBandwidth[k] = 0.0;
3755 /*Writeback Latency support check*/
3757 v->WritebackLatencySupport = true;
3758 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3759 if (v->WritebackEnable[k] == true) {
3760 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave ||
3761 v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
3762 if (v->WriteBandwidth[k]
3763 > 2.0 * v->WritebackInterfaceBufferSize * 1024
3764 / v->WritebackLatency) {
3765 v->WritebackLatencySupport = false;
3768 if (v->WriteBandwidth[k]
3769 > v->WritebackInterfaceBufferSize * 1024
3770 / v->WritebackLatency) {
3771 v->WritebackLatencySupport = false;
3777 /*Writeback Mode Support Check*/
3779 v->TotalNumberOfActiveWriteback = 0;
3780 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3781 if (v->WritebackEnable[k] == true) {
3782 v->TotalNumberOfActiveWriteback =
3783 v->TotalNumberOfActiveWriteback + 1;
3787 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3788 EnoughWritebackUnits = false;
3790 if (!v->WritebackSupportInterleaveAndUsingWholeBufferForASingleStream
3791 && (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave
3792 || v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave)) {
3794 WritebackModeSupport = false;
3796 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave && v->TotalNumberOfActiveWriteback > 1) {
3797 WritebackModeSupport = false;
3800 /*Writeback Scale Ratio and Taps Support Check*/
3802 v->WritebackScaleRatioAndTapsSupport = true;
3803 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3804 if (v->WritebackEnable[k] == true) {
3805 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio
3806 || v->WritebackVRatio[k]
3807 > v->WritebackMaxVSCLRatio
3808 || v->WritebackHRatio[k]
3809 < v->WritebackMinHSCLRatio
3810 || v->WritebackVRatio[k]
3811 < v->WritebackMinVSCLRatio
3812 || v->WritebackHTaps[k]
3813 > v->WritebackMaxHSCLTaps
3814 || v->WritebackVTaps[k]
3815 > v->WritebackMaxVSCLTaps
3816 || v->WritebackHRatio[k]
3817 > v->WritebackHTaps[k]
3818 || v->WritebackVRatio[k]
3819 > v->WritebackVTaps[k]
3820 || (v->WritebackHTaps[k] > 2.0
3821 && ((v->WritebackHTaps[k] % 2)
3823 v->WritebackScaleRatioAndTapsSupport = false;
3825 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3826 v->WritebackScaleRatioAndTapsSupport = false;
3830 /*Maximum DISPCLK/DPPCLK Support check*/
3832 v->WritebackRequiredDISPCLK = 0.0;
3833 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3834 if (v->WritebackEnable[k] == true) {
3835 v->WritebackRequiredDISPCLK = dml_max(v->WritebackRequiredDISPCLK,
3836 dml30_CalculateWriteBackDISPCLK(
3837 v->WritebackPixelFormat[k],
3839 v->WritebackHRatio[k],
3840 v->WritebackVRatio[k],
3841 v->WritebackHTaps[k],
3842 v->WritebackVTaps[k],
3843 v->WritebackSourceWidth[k],
3844 v->WritebackDestinationWidth[k],
3846 v->WritebackLineBufferSize));
3849 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3850 if (v->HRatio[k] > 1.0) {
3851 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3853 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3855 if (v->BytePerPixelC[k] == 0.0) {
3856 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3857 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3858 * dml_max3(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 1.0);
3859 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3860 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3863 if (v->HRatioChroma[k] > 1.0) {
3864 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
3865 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3867 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3869 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max5(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3870 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3871 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3872 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3874 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3875 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3876 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3880 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3881 int MaximumSwathWidthSupportLuma = 0;
3882 int MaximumSwathWidthSupportChroma = 0;
3884 if (v->SurfaceTiling[k] == dm_sw_linear) {
3885 MaximumSwathWidthSupportLuma = 8192.0;
3886 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3887 MaximumSwathWidthSupportLuma = 2880.0;
3889 MaximumSwathWidthSupportLuma = 5760.0;
3892 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
3893 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
3895 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
3897 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
3898 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
3899 if (v->BytePerPixelC[k] == 0.0) {
3900 v->MaximumSwathWidthInLineBufferChroma = 0;
3902 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
3903 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
3905 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
3906 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
3909 CalculateSwathAndDETConfiguration(
3911 v->NumberOfActivePlanes,
3912 v->DETBufferSizeInKByte,
3913 v->MaximumSwathWidthLuma,
3914 v->MaximumSwathWidthChroma,
3916 v->SourcePixelFormat,
3924 v->Read256BlockHeightY,
3925 v->Read256BlockHeightC,
3926 v->Read256BlockWidthY,
3927 v->Read256BlockWidthC,
3928 v->odm_combine_dummy,
3929 v->BlendingAndTiming,
3932 v->BytePerPixelInDETY,
3933 v->BytePerPixelInDETC,
3938 v->swath_width_luma_ub,
3939 v->swath_width_chroma_ub,
3946 v->SingleDPPViewportSizeSupportPerPlane,
3947 &v->ViewportSizeSupport[0][0]);
3949 for (i = 0; i < v->soc.num_states; i++) {
3950 for (j = 0; j < 2; j++) {
3951 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
3952 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
3953 v->RequiredDISPCLK[i][j] = 0.0;
3954 v->DISPCLK_DPPCLK_Support[i][j] = true;
3955 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3956 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3957 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3958 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states]
3959 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states])) {
3960 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3962 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3963 * (1 + v->DISPCLKRampingMargin / 100.0);
3964 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states]
3965 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states])) {
3966 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3968 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3969 * (1 + v->DISPCLKRampingMargin / 100.0);
3970 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states]
3971 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states])) {
3972 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3975 if (v->ODMCombinePolicy == dm_odm_combine_policy_none) {
3976 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3977 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3978 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
3979 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3980 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3981 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
3982 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
3983 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3984 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3985 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
3986 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3987 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3988 } else if (v->DSCEnabled[k] && (v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH)) {
3989 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3990 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3992 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3993 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3994 /*420 format workaround*/
3995 if (v->HActive[k] > 4096 && v->OutputFormat[k] == dm_420) {
3996 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3997 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4001 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4002 v->MPCCombine[i][j][k] = false;
4003 v->NoOfDPP[i][j][k] = 4;
4004 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4005 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4006 v->MPCCombine[i][j][k] = false;
4007 v->NoOfDPP[i][j][k] = 2;
4008 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4009 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4010 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= v->MaxDppclkRoundedDownToDFSGranularity
4011 && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4012 v->MPCCombine[i][j][k] = false;
4013 v->NoOfDPP[i][j][k] = 1;
4014 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4016 v->MPCCombine[i][j][k] = true;
4017 v->NoOfDPP[i][j][k] = 2;
4018 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4020 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4021 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4022 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4023 v->DISPCLK_DPPCLK_Support[i][j] = false;
4026 v->TotalNumberOfActiveDPP[i][j] = 0;
4027 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4028 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4029 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4030 if (v->NoOfDPP[i][j][k] == 1)
4031 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4033 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never) {
4034 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4035 double BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4036 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4037 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4038 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4039 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4040 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4041 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4042 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4043 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4046 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4047 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4048 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4049 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4050 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4051 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4054 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4055 v->RequiredDISPCLK[i][j] = 0.0;
4056 v->DISPCLK_DPPCLK_Support[i][j] = true;
4057 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4058 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4059 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4060 v->MPCCombine[i][j][k] = true;
4061 v->NoOfDPP[i][j][k] = 2;
4062 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4064 v->MPCCombine[i][j][k] = false;
4065 v->NoOfDPP[i][j][k] = 1;
4066 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4068 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4069 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4070 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4072 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4074 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4075 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4076 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4077 v->DISPCLK_DPPCLK_Support[i][j] = false;
4080 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4081 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4082 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4085 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4086 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4087 v->DISPCLK_DPPCLK_Support[i][j] = false;
4092 /*Total Available Pipes Support Check*/
4094 for (i = 0; i < v->soc.num_states; i++) {
4095 for (j = 0; j < 2; j++) {
4096 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4097 v->TotalAvailablePipesSupport[i][j] = true;
4099 v->TotalAvailablePipesSupport[i][j] = false;
4103 /*Display IO and DSC Support Check*/
4105 v->NonsupportedDSCInputBPC = false;
4106 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4107 if (!(v->DSCInputBitPerComponent[k] == 12.0
4108 || v->DSCInputBitPerComponent[k] == 10.0
4109 || v->DSCInputBitPerComponent[k] == 8.0)) {
4110 v->NonsupportedDSCInputBPC = true;
4114 /*Number Of DSC Slices*/
4115 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4116 if (v->BlendingAndTiming[k] == k) {
4117 if (v->PixelClockBackEnd[k] > 3200) {
4118 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4119 } else if (v->PixelClockBackEnd[k] > 1360) {
4120 v->NumberOfDSCSlices[k] = 8;
4121 } else if (v->PixelClockBackEnd[k] > 680) {
4122 v->NumberOfDSCSlices[k] = 4;
4123 } else if (v->PixelClockBackEnd[k] > 340) {
4124 v->NumberOfDSCSlices[k] = 2;
4126 v->NumberOfDSCSlices[k] = 1;
4129 v->NumberOfDSCSlices[k] = 0;
4133 for (i = 0; i < v->soc.num_states; i++) {
4134 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4135 v->RequiresDSC[i][k] = false;
4136 v->RequiresFEC[i][k] = false;
4137 if (v->BlendingAndTiming[k] == k) {
4138 if (v->Output[k] == dm_hdmi) {
4139 v->RequiresDSC[i][k] = false;
4140 v->RequiresFEC[i][k] = false;
4141 v->OutputBppPerState[i][k] = TruncToValidBPP(
4142 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4146 v->PixelClockBackEnd[k],
4147 v->ForcedOutputLinkBPP[k],
4151 v->DSCInputBitPerComponent[k],
4152 v->NumberOfDSCSlices[k],
4153 v->AudioSampleRate[k],
4154 v->AudioSampleLayout[k],
4155 v->ODMCombineEnablePerState[i][k]);
4156 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4157 if (v->DSCEnable[k] == true) {
4158 v->RequiresDSC[i][k] = true;
4159 v->LinkDSCEnable = true;
4160 if (v->Output[k] == dm_dp) {
4161 v->RequiresFEC[i][k] = true;
4163 v->RequiresFEC[i][k] = false;
4166 v->RequiresDSC[i][k] = false;
4167 v->LinkDSCEnable = false;
4168 v->RequiresFEC[i][k] = false;
4171 v->Outbpp = BPP_INVALID;
4172 if (v->PHYCLKPerState[i] >= 270.0) {
4173 v->Outbpp = TruncToValidBPP(
4174 (1.0 - v->Downspreading / 100.0) * 2700,
4175 v->OutputLinkDPLanes[k],
4178 v->PixelClockBackEnd[k],
4179 v->ForcedOutputLinkBPP[k],
4183 v->DSCInputBitPerComponent[k],
4184 v->NumberOfDSCSlices[k],
4185 v->AudioSampleRate[k],
4186 v->AudioSampleLayout[k],
4187 v->ODMCombineEnablePerState[i][k]);
4188 v->OutputBppPerState[i][k] = v->Outbpp;
4189 // TODO: Need some other way to handle this nonsense
4190 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4192 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4193 v->Outbpp = TruncToValidBPP(
4194 (1.0 - v->Downspreading / 100.0) * 5400,
4195 v->OutputLinkDPLanes[k],
4198 v->PixelClockBackEnd[k],
4199 v->ForcedOutputLinkBPP[k],
4203 v->DSCInputBitPerComponent[k],
4204 v->NumberOfDSCSlices[k],
4205 v->AudioSampleRate[k],
4206 v->AudioSampleLayout[k],
4207 v->ODMCombineEnablePerState[i][k]);
4208 v->OutputBppPerState[i][k] = v->Outbpp;
4209 // TODO: Need some other way to handle this nonsense
4210 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4212 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4213 v->Outbpp = TruncToValidBPP(
4214 (1.0 - v->Downspreading / 100.0) * 8100,
4215 v->OutputLinkDPLanes[k],
4218 v->PixelClockBackEnd[k],
4219 v->ForcedOutputLinkBPP[k],
4223 v->DSCInputBitPerComponent[k],
4224 v->NumberOfDSCSlices[k],
4225 v->AudioSampleRate[k],
4226 v->AudioSampleLayout[k],
4227 v->ODMCombineEnablePerState[i][k]);
4228 if (v->Outbpp == BPP_INVALID && v->ForcedOutputLinkBPP[k] == 0) {
4229 //if (v->Outbpp == BPP_INVALID && v->DSCEnabled[k] == dm_dsc_enable_only_if_necessary && v->ForcedOutputLinkBPP[k] == 0) {
4230 v->RequiresDSC[i][k] = true;
4231 v->LinkDSCEnable = true;
4232 if (v->Output[k] == dm_dp) {
4233 v->RequiresFEC[i][k] = true;
4235 v->Outbpp = TruncToValidBPP(
4236 (1.0 - v->Downspreading / 100.0) * 8100,
4237 v->OutputLinkDPLanes[k],
4240 v->PixelClockBackEnd[k],
4241 v->ForcedOutputLinkBPP[k],
4245 v->DSCInputBitPerComponent[k],
4246 v->NumberOfDSCSlices[k],
4247 v->AudioSampleRate[k],
4248 v->AudioSampleLayout[k],
4249 v->ODMCombineEnablePerState[i][k]);
4251 v->OutputBppPerState[i][k] = v->Outbpp;
4252 // TODO: Need some other way to handle this nonsense
4253 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4257 v->OutputBppPerState[i][k] = 0;
4261 for (i = 0; i < v->soc.num_states; i++) {
4262 v->DIOSupport[i] = true;
4263 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4264 if (v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
4265 && (v->OutputBppPerState[i][k] == 0
4266 || (v->OutputFormat[k] == dm_420 && v->Interlace[k] == true && v->ProgressiveToInterlaceUnitInOPP == true))) {
4267 v->DIOSupport[i] = false;
4272 for (i = 0; i < v->soc.num_states; ++i) {
4273 v->ODMCombine4To1SupportCheckOK[i] = true;
4274 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4275 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4276 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) {
4277 v->ODMCombine4To1SupportCheckOK[i] = false;
4282 for (i = 0; i < v->soc.num_states; i++) {
4283 v->DSCCLKRequiredMoreThanSupported[i] = false;
4284 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4285 if (v->BlendingAndTiming[k] == k) {
4286 if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4287 if (v->OutputFormat[k] == dm_420) {
4288 v->DSCFormatFactor = 2;
4289 } else if (v->OutputFormat[k] == dm_444) {
4290 v->DSCFormatFactor = 1;
4291 } else if (v->OutputFormat[k] == dm_n422) {
4292 v->DSCFormatFactor = 2;
4294 v->DSCFormatFactor = 1;
4296 if (v->RequiresDSC[i][k] == true) {
4297 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4298 if (v->PixelClockBackEnd[k] / 12.0 / v->DSCFormatFactor
4299 > (1.0 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * v->MaxDSCCLK[i]) {
4300 v->DSCCLKRequiredMoreThanSupported[i] = true;
4302 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4303 if (v->PixelClockBackEnd[k] / 6.0 / v->DSCFormatFactor
4304 > (1.0 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * v->MaxDSCCLK[i]) {
4305 v->DSCCLKRequiredMoreThanSupported[i] = true;
4308 if (v->PixelClockBackEnd[k] / 3.0 / v->DSCFormatFactor
4309 > (1.0 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * v->MaxDSCCLK[i]) {
4310 v->DSCCLKRequiredMoreThanSupported[i] = true;
4318 for (i = 0; i < v->soc.num_states; i++) {
4319 v->NotEnoughDSCUnits[i] = false;
4320 v->TotalDSCUnitsRequired = 0.0;
4321 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4322 if (v->RequiresDSC[i][k] == true) {
4323 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4324 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4325 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4326 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4328 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4332 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4333 v->NotEnoughDSCUnits[i] = true;
4336 /*DSC Delay per state*/
4338 for (i = 0; i < v->soc.num_states; i++) {
4339 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4340 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4343 v->BPP = v->OutputBppPerState[i][k];
4345 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4346 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4347 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4348 v->DSCInputBitPerComponent[k],
4350 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4351 v->NumberOfDSCSlices[k],
4353 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4354 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4355 v->DSCDelayPerState[i][k] = 2.0
4356 * dscceComputeDelay(
4357 v->DSCInputBitPerComponent[k],
4359 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4360 v->NumberOfDSCSlices[k] / 2,
4362 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4364 v->DSCDelayPerState[i][k] = 4.0
4365 * (dscceComputeDelay(
4366 v->DSCInputBitPerComponent[k],
4368 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4369 v->NumberOfDSCSlices[k] / 4,
4371 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4373 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4375 v->DSCDelayPerState[i][k] = 0.0;
4378 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4379 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4380 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4381 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4387 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4389 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4390 for (j = 0; j <= 1; ++j) {
4391 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4392 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4393 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4394 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4397 CalculateSwathAndDETConfiguration(
4399 v->NumberOfActivePlanes,
4400 v->DETBufferSizeInKByte,
4401 v->MaximumSwathWidthLuma,
4402 v->MaximumSwathWidthChroma,
4404 v->SourcePixelFormat,
4412 v->Read256BlockHeightY,
4413 v->Read256BlockHeightC,
4414 v->Read256BlockWidthY,
4415 v->Read256BlockWidthC,
4416 v->ODMCombineEnableThisState,
4417 v->BlendingAndTiming,
4420 v->BytePerPixelInDETY,
4421 v->BytePerPixelInDETC,
4425 v->NoOfDPPThisState,
4426 v->swath_width_luma_ub_this_state,
4427 v->swath_width_chroma_ub_this_state,
4428 v->SwathWidthYThisState,
4429 v->SwathWidthCThisState,
4430 v->SwathHeightYThisState,
4431 v->SwathHeightCThisState,
4432 v->DETBufferSizeYThisState,
4433 v->DETBufferSizeCThisState,
4435 &v->ViewportSizeSupport[i][j]);
4437 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4438 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4439 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4440 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4441 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4442 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4443 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4444 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4445 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4450 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4451 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4454 for (i = 0; i < v->soc.num_states; i++) {
4455 for (j = 0; j < 2; j++) {
4456 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4457 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4458 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4459 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4460 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4461 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4462 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4463 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4464 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4467 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4468 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4469 if (v->DCCEnable[k] == true) {
4470 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4474 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4475 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
4476 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4478 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
4479 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
4480 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4482 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4483 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4486 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4489 v->Read256BlockHeightC[k],
4490 v->Read256BlockWidthY[k],
4491 v->SourcePixelFormat[k],
4492 v->SurfaceTiling[k],
4493 v->BytePerPixelC[k],
4495 v->SwathWidthCThisState[k],
4496 v->ViewportHeightChroma[k],
4499 v->HostVMMaxNonCachedPageTableLevels,
4500 v->GPUVMMinPageSize,
4501 v->HostVMMinPageSize,
4502 v->PTEBufferSizeInRequestsForChroma,
4505 &v->MacroTileWidthC[k],
4507 &v->DPTEBytesPerRowC,
4508 &v->PTEBufferSizeNotExceededC[i][j][k],
4510 &v->dpte_row_height_chroma[k],
4514 &v->meta_row_height_chroma[k],
4521 &v->dummyinteger11);
4523 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4528 v->ProgressiveToInterlaceUnitInOPP,
4529 v->SwathHeightCThisState[k],
4530 v->ViewportYStartC[k],
4534 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4535 v->PTEBufferSizeInRequestsForChroma = 0;
4536 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4537 v->MetaRowBytesC = 0.0;
4538 v->DPTEBytesPerRowC = 0.0;
4539 v->PrefetchLinesC[i][j][k] = 0.0;
4540 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4542 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4545 v->Read256BlockHeightY[k],
4546 v->Read256BlockWidthY[k],
4547 v->SourcePixelFormat[k],
4548 v->SurfaceTiling[k],
4549 v->BytePerPixelY[k],
4551 v->SwathWidthYThisState[k],
4552 v->ViewportHeight[k],
4555 v->HostVMMaxNonCachedPageTableLevels,
4556 v->GPUVMMinPageSize,
4557 v->HostVMMinPageSize,
4558 v->PTEBufferSizeInRequestsForLuma,
4560 v->DCCMetaPitchY[k],
4561 &v->MacroTileWidthY[k],
4563 &v->DPTEBytesPerRowY,
4564 &v->PTEBufferSizeNotExceededY[i][j][k],
4566 &v->dpte_row_height[k],
4570 &v->meta_row_height[k],
4572 &v->dpte_group_bytes[k],
4578 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4583 v->ProgressiveToInterlaceUnitInOPP,
4584 v->SwathHeightYThisState[k],
4585 v->ViewportYStartY[k],
4588 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4589 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4590 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4592 CalculateRowBandwidth(
4594 v->SourcePixelFormat[k],
4598 v->HTotal[k] / v->PixelClock[k],
4601 v->meta_row_height[k],
4602 v->meta_row_height_chroma[k],
4603 v->DPTEBytesPerRowY,
4604 v->DPTEBytesPerRowC,
4605 v->dpte_row_height[k],
4606 v->dpte_row_height_chroma[k],
4607 &v->meta_row_bandwidth[i][j][k],
4608 &v->dpte_row_bandwidth[i][j][k]);
4610 v->UrgLatency[i] = CalculateUrgentLatency(
4611 v->UrgentLatencyPixelDataOnly,
4612 v->UrgentLatencyPixelMixedWithVMData,
4613 v->UrgentLatencyVMDataOnly,
4614 v->DoUrgentLatencyAdjustment,
4615 v->UrgentLatencyAdjustmentFabricClockComponent,
4616 v->UrgentLatencyAdjustmentFabricClockReference,
4617 v->FabricClockPerState[i]);
4619 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4620 CalculateUrgentBurstFactor(
4621 v->swath_width_luma_ub_this_state[k],
4622 v->swath_width_chroma_ub_this_state[k],
4623 v->DETBufferSizeInKByte,
4624 v->SwathHeightYThisState[k],
4625 v->SwathHeightCThisState[k],
4626 v->HTotal[k] / v->PixelClock[k],
4628 v->CursorBufferSize,
4629 v->CursorWidth[k][0],
4633 v->BytePerPixelInDETY[k],
4634 v->BytePerPixelInDETC[k],
4635 v->DETBufferSizeYThisState[k],
4636 v->DETBufferSizeCThisState[k],
4637 &v->UrgentBurstFactorCursor[k],
4638 &v->UrgentBurstFactorLuma[k],
4639 &v->UrgentBurstFactorChroma[k],
4640 &NotUrgentLatencyHiding[k]);
4643 v->NotUrgentLatencyHiding[i][j] = false;
4644 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4645 if (NotUrgentLatencyHiding[k]) {
4646 v->NotUrgentLatencyHiding[i][j] = true;
4650 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4651 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4652 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4653 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4656 v->TotalVActivePixelBandwidth[i][j] = 0;
4657 v->TotalVActiveCursorBandwidth[i][j] = 0;
4658 v->TotalMetaRowBandwidth[i][j] = 0;
4659 v->TotalDPTERowBandwidth[i][j] = 0;
4660 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4661 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4662 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4663 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4664 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4667 CalculateDCFCLKDeepSleep(
4669 v->NumberOfActivePlanes,
4674 v->SwathWidthYThisState,
4675 v->SwathWidthCThisState,
4676 v->NoOfDPPThisState,
4681 v->PSCL_FACTOR_CHROMA,
4682 v->RequiredDPPCLKThisState,
4683 v->ReadBandwidthLuma,
4684 v->ReadBandwidthChroma,
4686 &v->ProjectedDCFCLKDeepSleep[i][j]);
4690 //Calculate Return BW
4692 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4693 for (j = 0; j <= 1; ++j) {
4694 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4695 if (v->BlendingAndTiming[k] == k) {
4696 if (v->WritebackEnable[k] == true) {
4697 v->WritebackDelayTime[k] = v->WritebackLatency
4698 + CalculateWriteBackDelay(
4699 v->WritebackPixelFormat[k],
4700 v->WritebackHRatio[k],
4701 v->WritebackVRatio[k],
4702 v->WritebackVTaps[k],
4703 v->WritebackDestinationWidth[k],
4704 v->WritebackDestinationHeight[k],
4705 v->WritebackSourceHeight[k],
4706 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4708 v->WritebackDelayTime[k] = 0.0;
4710 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4711 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4712 v->WritebackDelayTime[k] = dml_max(
4713 v->WritebackDelayTime[k],
4715 + CalculateWriteBackDelay(
4716 v->WritebackPixelFormat[m],
4717 v->WritebackHRatio[m],
4718 v->WritebackVRatio[m],
4719 v->WritebackVTaps[m],
4720 v->WritebackDestinationWidth[m],
4721 v->WritebackDestinationHeight[m],
4722 v->WritebackSourceHeight[m],
4723 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4728 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4729 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4730 if (v->BlendingAndTiming[k] == m) {
4731 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4735 v->MaxMaxVStartup[i][j] = 0;
4736 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4737 v->MaximumVStartup[i][j][k] = v->VTotal[k] - v->VActive[k]
4738 - dml_max(1.0, dml_ceil(1.0 * v->WritebackDelayTime[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0));
4739 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4744 ReorderingBytes = v->NumberOfChannels
4746 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4747 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4748 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4749 v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
4751 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4752 for (j = 0; j <= 1; ++j) {
4753 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4757 if (v->UseMinimumRequiredDCFCLK == true) {
4760 v->MaxInterDCNTileRepeaters,
4762 v->FinalDRAMClockChangeLatency,
4763 v->SREnterPlusExitTime,
4765 v->RoundTripPingLatencyCycles,
4767 v->PixelChunkSizeInKByte,
4770 v->GPUVMMaxPageTableLevels,
4772 v->NumberOfActivePlanes,
4773 v->HostVMMinPageSize,
4774 v->HostVMMaxNonCachedPageTableLevels,
4775 v->DynamicMetadataVMEnabled,
4776 v->ImmediateFlipRequirement,
4777 v->ProgressiveToInterlaceUnitInOPP,
4778 v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
4779 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4780 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4781 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
4784 v->DynamicMetadataTransmittedBytes,
4785 v->DynamicMetadataLinesBeforeActiveRequired,
4791 v->ProjectedDCFCLKDeepSleep,
4793 v->TotalVActivePixelBandwidth,
4794 v->TotalVActiveCursorBandwidth,
4795 v->TotalMetaRowBandwidth,
4796 v->TotalDPTERowBandwidth,
4797 v->TotalNumberOfActiveDPP,
4798 v->TotalNumberOfDCCActiveDPP,
4799 v->dpte_group_bytes,
4802 v->swath_width_luma_ub_all_states,
4803 v->swath_width_chroma_ub_all_states,
4808 v->PDEAndMetaPTEBytesPerFrame,
4811 v->DynamicMetadataEnable,
4812 v->VActivePixelBandwidth,
4813 v->VActiveCursorBandwidth,
4814 v->ReadBandwidthLuma,
4815 v->ReadBandwidthChroma,
4819 if (v->ClampMinDCFCLK) {
4820 /* Clamp calculated values to actual minimum */
4821 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4822 for (j = 0; j <= 1; ++j) {
4823 if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
4824 v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
4831 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4832 for (j = 0; j <= 1; ++j) {
4833 v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
4834 v->ReturnBusWidth * v->DCFCLKState[i][j],
4835 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth,
4836 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
4837 if (v->HostVMEnable != true) {
4838 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
4841 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j]
4842 * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
4847 //Re-ordering Buffer Support Check
4849 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4850 for (j = 0; j <= 1; ++j) {
4851 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
4852 > (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
4853 v->ROBSupport[i][j] = true;
4855 v->ROBSupport[i][j] = false;
4860 //Vertical Active BW support check
4862 MaxTotalVActiveRDBandwidth = 0;
4863 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4864 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4867 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4868 for (j = 0; j <= 1; ++j) {
4869 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
4870 v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
4871 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
4873 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
4874 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
4876 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
4883 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4884 for (j = 0; j <= 1; ++j) {
4885 int NextPrefetchModeState = MinPrefetchMode;
4887 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
4889 v->BandwidthWithoutPrefetchSupported[i][j] = true;
4890 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + v->TotalDPTERowBandwidth[i][j]
4891 > v->ReturnBWPerState[i][j] || v->NotUrgentLatencyHiding[i][j]) {
4892 v->BandwidthWithoutPrefetchSupported[i][j] = false;
4895 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4896 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4897 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4898 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4899 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4900 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4901 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4902 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4903 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4904 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4905 v->ODMCombineEnabled[k] = v->ODMCombineEnablePerState[i][k];
4908 v->ExtraLatency = CalculateExtraLatency(
4909 v->RoundTripPingLatencyCycles,
4911 v->DCFCLKState[i][j],
4912 v->TotalNumberOfActiveDPP[i][j],
4913 v->PixelChunkSizeInKByte,
4914 v->TotalNumberOfDCCActiveDPP[i][j],
4916 v->ReturnBWPerState[i][j],
4919 v->NumberOfActivePlanes,
4920 v->NoOfDPPThisState,
4921 v->dpte_group_bytes,
4922 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4923 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4924 v->HostVMMinPageSize,
4925 v->HostVMMaxNonCachedPageTableLevels);
4927 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4929 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
4930 v->MaxVStartup = v->NextMaxVStartup;
4932 v->TWait = CalculateTWait(v->PrefetchModePerState[i][j], v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
4934 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4935 Pipe myPipe = { 0 };
4937 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
4938 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
4939 myPipe.PixelClock = v->PixelClock[k];
4940 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
4941 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
4942 myPipe.ScalerEnabled = v->ScalerEnabled[k];
4943 myPipe.SourceScan = v->SourceScan[k];
4944 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
4945 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
4946 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
4947 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
4948 myPipe.InterlaceEnable = v->Interlace[k];
4949 myPipe.NumberOfCursors = v->NumberOfCursors[k];
4950 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
4951 myPipe.HTotal = v->HTotal[k];
4952 myPipe.DCCEnable = v->DCCEnable[k];
4953 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
4955 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
4957 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4958 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4960 v->DSCDelayPerState[i][k],
4961 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
4963 v->DPPCLKDelaySCLLBOnly,
4964 v->DPPCLKDelayCNVCCursor,
4965 v->DISPCLKDelaySubtotal,
4966 v->SwathWidthYThisState[k] / v->HRatio[k],
4968 v->MaxInterDCNTileRepeaters,
4969 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
4970 v->MaximumVStartup[i][j][k],
4971 v->GPUVMMaxPageTableLevels,
4974 v->HostVMMaxNonCachedPageTableLevels,
4975 v->HostVMMinPageSize,
4976 v->DynamicMetadataEnable[k],
4977 v->DynamicMetadataVMEnabled,
4978 v->DynamicMetadataLinesBeforeActiveRequired[k],
4979 v->DynamicMetadataTransmittedBytes[k],
4983 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4984 v->MetaRowBytes[i][j][k],
4985 v->DPTEBytesPerRow[i][j][k],
4986 v->PrefetchLinesY[i][j][k],
4987 v->SwathWidthYThisState[k],
4988 v->BytePerPixelY[k],
4991 v->PrefetchLinesC[i][j][k],
4992 v->SwathWidthCThisState[k],
4993 v->BytePerPixelC[k],
4996 v->swath_width_luma_ub_this_state[k],
4997 v->swath_width_chroma_ub_this_state[k],
4998 v->SwathHeightYThisState[k],
4999 v->SwathHeightCThisState[k],
5001 v->ProgressiveToInterlaceUnitInOPP,
5002 &v->DSTXAfterScaler[k],
5003 &v->DSTYAfterScaler[k],
5004 &v->LineTimesForPrefetch[k],
5006 &v->LinesForMetaPTE[k],
5007 &v->LinesForMetaAndDPTERow[k],
5008 &v->VRatioPreY[i][j][k],
5009 &v->VRatioPreC[i][j][k],
5010 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
5011 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
5012 &v->NoTimeForDynamicMetadata[i][j][k],
5014 &v->prefetch_vmrow_bw[k],
5017 &v->VUpdateOffsetPix[k],
5018 &v->VUpdateWidthPix[k],
5019 &v->VReadyOffsetPix[k]);
5022 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5023 CalculateUrgentBurstFactor(
5024 v->swath_width_luma_ub_this_state[k],
5025 v->swath_width_chroma_ub_this_state[k],
5026 v->DETBufferSizeInKByte,
5027 v->SwathHeightYThisState[k],
5028 v->SwathHeightCThisState[k],
5029 v->HTotal[k] / v->PixelClock[k],
5031 v->CursorBufferSize,
5032 v->CursorWidth[k][0],
5034 v->VRatioPreY[i][j][k],
5035 v->VRatioPreC[i][j][k],
5036 v->BytePerPixelInDETY[k],
5037 v->BytePerPixelInDETC[k],
5038 v->DETBufferSizeYThisState[k],
5039 v->DETBufferSizeCThisState[k],
5040 &v->UrgentBurstFactorCursorPre[k],
5041 &v->UrgentBurstFactorLumaPre[k],
5042 &v->UrgentBurstFactorChroma[k],
5043 &v->NoUrgentLatencyHidingPre[k]);
5046 v->MaximumReadBandwidthWithPrefetch = 0.0;
5047 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5048 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k])
5049 * v->VRatioPreY[i][j][k];
5051 v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch
5053 v->VActivePixelBandwidth[i][j][k],
5054 v->VActiveCursorBandwidth[i][j][k]
5055 + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]),
5056 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5058 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
5059 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5060 * v->UrgentBurstFactorChromaPre[k])
5061 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5064 v->NotEnoughUrgentLatencyHidingPre = false;
5065 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5066 if (v->NoUrgentLatencyHidingPre[k] == true) {
5067 v->NotEnoughUrgentLatencyHidingPre = true;
5071 v->PrefetchSupported[i][j] = true;
5072 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5073 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5074 v->PrefetchSupported[i][j] = false;
5076 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5077 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5078 || v->NoTimeForPrefetch[i][j][k] == true) {
5079 v->PrefetchSupported[i][j] = false;
5083 v->DynamicMetadataSupported[i][j] = true;
5084 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5085 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5086 v->DynamicMetadataSupported[i][j] = false;
5090 v->VRatioInPrefetchSupported[i][j] = true;
5091 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5092 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5093 v->VRatioInPrefetchSupported[i][j] = false;
5096 v->AnyLinesForVMOrRowTooLarge = false;
5097 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5098 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5099 v->AnyLinesForVMOrRowTooLarge = true;
5103 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5104 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5105 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5106 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5108 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5110 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
5111 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5112 * v->UrgentBurstFactorChromaPre[k])
5113 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5115 v->TotImmediateFlipBytes = 0.0;
5116 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5117 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k]
5118 + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k];
5121 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5122 CalculateFlipSchedule(
5124 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
5125 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
5128 v->GPUVMMaxPageTableLevels,
5130 v->HostVMMaxNonCachedPageTableLevels,
5132 v->HostVMMinPageSize,
5133 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5134 v->MetaRowBytes[i][j][k],
5135 v->DPTEBytesPerRow[i][j][k],
5136 v->BandwidthAvailableForImmediateFlip,
5137 v->TotImmediateFlipBytes,
5138 v->SourcePixelFormat[k],
5139 v->HTotal[k] / v->PixelClock[k],
5144 v->dpte_row_height[k],
5145 v->meta_row_height[k],
5146 v->dpte_row_height_chroma[k],
5147 v->meta_row_height_chroma[k],
5148 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5149 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5150 &v->final_flip_bw[k],
5151 &v->ImmediateFlipSupportedForPipe[k]);
5153 v->total_dcn_read_bw_with_flip = 0.0;
5154 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5155 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5157 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5158 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5159 + v->VActiveCursorBandwidth[i][j][k],
5161 * (v->final_flip_bw[k]
5162 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5163 * v->UrgentBurstFactorLumaPre[k]
5164 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5165 * v->UrgentBurstFactorChromaPre[k])
5166 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5168 v->ImmediateFlipSupportedForState[i][j] = true;
5169 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5170 v->ImmediateFlipSupportedForState[i][j] = false;
5172 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5173 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5174 v->ImmediateFlipSupportedForState[i][j] = false;
5178 v->ImmediateFlipSupportedForState[i][j] = false;
5180 if (v->MaxVStartup <= 13 || v->AnyLinesForVMOrRowTooLarge == false) {
5181 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5182 NextPrefetchModeState = NextPrefetchModeState + 1;
5184 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5186 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5187 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement != dm_immediate_flip_required)
5188 || v->ImmediateFlipSupportedForState[i][j] == true))
5189 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5191 CalculateWatermarksAndDRAMSpeedChangeSupport(
5193 v->PrefetchModePerState[i][j],
5194 v->NumberOfActivePlanes,
5195 v->MaxLineBufferLines,
5197 v->DPPOutputBufferPixels,
5198 v->DETBufferSizeInKByte,
5199 v->WritebackInterfaceBufferSize,
5200 v->DCFCLKState[i][j],
5201 v->ReturnBWPerState[i][j],
5203 v->dpte_group_bytes,
5207 v->WritebackLatency,
5208 v->WritebackChunkSize,
5209 v->SOCCLKPerState[i],
5210 v->FinalDRAMClockChangeLatency,
5212 v->SREnterPlusExitTime,
5213 v->ProjectedDCFCLKDeepSleep[i][j],
5214 v->NoOfDPPThisState,
5216 v->RequiredDPPCLKThisState,
5217 v->DETBufferSizeYThisState,
5218 v->DETBufferSizeCThisState,
5219 v->SwathHeightYThisState,
5220 v->SwathHeightCThisState,
5222 v->SwathWidthYThisState,
5223 v->SwathWidthCThisState,
5232 v->BlendingAndTiming,
5233 v->BytePerPixelInDETY,
5234 v->BytePerPixelInDETC,
5238 v->WritebackPixelFormat,
5239 v->WritebackDestinationWidth,
5240 v->WritebackDestinationHeight,
5241 v->WritebackSourceHeight,
5242 &v->DRAMClockChangeSupport[i][j],
5243 &v->UrgentWatermark,
5244 &v->WritebackUrgentWatermark,
5245 &v->DRAMClockChangeWatermark,
5246 &v->WritebackDRAMClockChangeWatermark,
5247 &v->StutterExitWatermark,
5248 &v->StutterEnterPlusExitWatermark,
5249 &v->MinActiveDRAMClockChangeLatencySupported);
5253 /*PTE Buffer Size Check*/
5255 for (i = 0; i < v->soc.num_states; i++) {
5256 for (j = 0; j < 2; j++) {
5257 v->PTEBufferSizeNotExceeded[i][j] = true;
5258 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5259 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5260 v->PTEBufferSizeNotExceeded[i][j] = false;
5265 /*Cursor Support Check*/
5267 v->CursorSupport = true;
5268 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5269 if (v->CursorWidth[k][0] > 0.0) {
5270 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5271 v->CursorSupport = false;
5275 /*Valid Pitch Check*/
5277 v->PitchSupport = true;
5278 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5279 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5280 if (v->DCCEnable[k] == true) {
5281 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5283 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5285 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
5286 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_mono_8) {
5287 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5288 if (v->DCCEnable[k] == true) {
5289 v->AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 64.0 * v->Read256BlockWidthC[k]);
5291 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5294 v->AlignedCPitch[k] = v->PitchC[k];
5295 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5297 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k]
5298 || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5299 v->PitchSupport = false;
5303 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5304 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k])
5305 ViewportExceedsSurface = true;
5307 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5308 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
5309 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5310 ViewportExceedsSurface = true;
5314 /*Mode Support, Voltage State and SOC Configuration*/
5316 for (i = v->soc.num_states - 1; i >= 0; i--) {
5317 for (j = 0; j < 2; j++) {
5318 if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
5319 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
5320 && v->NotEnoughDSCUnits[i] == 0 && v->DSCCLKRequiredMoreThanSupported[i] == 0
5321 && v->DTBCLKRequiredMoreThanSupported[i] == 0
5322 && v->ROBSupport[i][j] == 1 && v->DISPCLK_DPPCLK_Support[i][j] == 1 && v->TotalAvailablePipesSupport[i][j] == 1
5323 && EnoughWritebackUnits == 1 && WritebackModeSupport == 1
5324 && v->WritebackLatencySupport == 1 && v->WritebackScaleRatioAndTapsSupport == 1 && v->CursorSupport == 1 && v->PitchSupport == 1
5325 && ViewportExceedsSurface == 0 && v->PrefetchSupported[i][j] == 1 && v->DynamicMetadataSupported[i][j] == 1
5326 && v->TotalVerticalActiveBandwidthSupport[i][j] == 1 && v->VRatioInPrefetchSupported[i][j] == 1
5327 && v->PTEBufferSizeNotExceeded[i][j] == 1 && v->NonsupportedDSCInputBPC == 0
5328 && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement != dm_immediate_flip_required)
5329 || v->ImmediateFlipSupportedForState[i][j] == true)) {
5330 v->ModeSupport[i][j] = true;
5332 v->ModeSupport[i][j] = false;
5337 unsigned int MaximumMPCCombine = 0;
5338 for (i = v->soc.num_states; i >= 0; i--) {
5339 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5340 v->VoltageLevel = i;
5341 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5342 if (v->ModeSupport[i][1] == true) {
5343 MaximumMPCCombine = 1;
5345 MaximumMPCCombine = 0;
5349 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5350 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5351 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5352 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5354 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5355 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5356 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5357 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5358 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5359 v->maxMpcComb = MaximumMPCCombine;
5363 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5364 struct display_mode_lib *mode_lib,
5365 unsigned int PrefetchMode,
5366 unsigned int NumberOfActivePlanes,
5367 unsigned int MaxLineBufferLines,
5368 unsigned int LineBufferSize,
5369 unsigned int DPPOutputBufferPixels,
5370 double DETBufferSizeInKByte,
5371 unsigned int WritebackInterfaceBufferSize,
5375 unsigned int dpte_group_bytes[],
5376 unsigned int MetaChunkSize,
5377 double UrgentLatency,
5378 double ExtraLatency,
5379 double WritebackLatency,
5380 double WritebackChunkSize,
5382 double DRAMClockChangeLatency,
5384 double SREnterPlusExitTime,
5385 double DCFCLKDeepSleep,
5386 unsigned int DPPPerPlane[],
5389 double DETBufferSizeY[],
5390 double DETBufferSizeC[],
5391 unsigned int SwathHeightY[],
5392 unsigned int SwathHeightC[],
5393 unsigned int LBBitPerPixel[],
5394 double SwathWidthY[],
5395 double SwathWidthC[],
5397 double HRatioChroma[],
5398 unsigned int vtaps[],
5399 unsigned int VTAPsChroma[],
5401 double VRatioChroma[],
5402 unsigned int HTotal[],
5403 double PixelClock[],
5404 unsigned int BlendingAndTiming[],
5405 double BytePerPixelDETY[],
5406 double BytePerPixelDETC[],
5407 double DSTXAfterScaler[],
5408 double DSTYAfterScaler[],
5409 bool WritebackEnable[],
5410 enum source_format_class WritebackPixelFormat[],
5411 double WritebackDestinationWidth[],
5412 double WritebackDestinationHeight[],
5413 double WritebackSourceHeight[],
5414 enum clock_change_support *DRAMClockChangeSupport,
5415 double *UrgentWatermark,
5416 double *WritebackUrgentWatermark,
5417 double *DRAMClockChangeWatermark,
5418 double *WritebackDRAMClockChangeWatermark,
5419 double *StutterExitWatermark,
5420 double *StutterEnterPlusExitWatermark,
5421 double *MinActiveDRAMClockChangeLatencySupported)
5423 double EffectiveLBLatencyHidingY = 0;
5424 double EffectiveLBLatencyHidingC = 0;
5425 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5426 double LinesInDETC = 0;
5427 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5428 unsigned int LinesInDETCRoundedDownToSwath = 0;
5429 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5430 double FullDETBufferingTimeC = 0;
5431 double ActiveDRAMClockChangeLatencyMarginY = 0;
5432 double ActiveDRAMClockChangeLatencyMarginC = 0;
5433 double WritebackDRAMClockChangeLatencyMargin = 0;
5434 double PlaneWithMinActiveDRAMClockChangeMargin = 0;
5435 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 0;
5436 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5437 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5438 double WritebackDRAMClockChangeLatencyHiding = 0;
5441 mode_lib->vba.TotalActiveDPP = 0;
5442 mode_lib->vba.TotalDCCActiveDPP = 0;
5443 for (k = 0; k < NumberOfActivePlanes; ++k) {
5444 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5445 if (DCCEnable[k] == true) {
5446 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5450 *UrgentWatermark = UrgentLatency + ExtraLatency;
5452 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5454 mode_lib->vba.TotalActiveWriteback = 0;
5455 for (k = 0; k < NumberOfActivePlanes; ++k) {
5456 if (WritebackEnable[k] == true) {
5457 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5461 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5462 *WritebackUrgentWatermark = WritebackLatency;
5464 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5467 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5468 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5470 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5473 for (k = 0; k < NumberOfActivePlanes; ++k) {
5475 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5477 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5479 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5481 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5483 LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5484 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5485 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5486 if (BytePerPixelDETC[k] > 0) {
5487 LinesInDETC = mode_lib->vba.DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5488 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5489 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5492 FullDETBufferingTimeC = 999999;
5495 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5497 if (NumberOfActivePlanes > 1) {
5498 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5501 if (BytePerPixelDETC[k] > 0) {
5502 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5504 if (NumberOfActivePlanes > 1) {
5505 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5507 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5509 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5512 if (WritebackEnable[k] == true) {
5514 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5515 if (WritebackPixelFormat[k] == dm_444_64) {
5516 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5518 if (mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave || mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
5519 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2;
5521 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - mode_lib->vba.WritebackDRAMClockChangeWatermark;
5522 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5526 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5527 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5528 for (k = 0; k < NumberOfActivePlanes; ++k) {
5529 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5530 mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5531 if (BlendingAndTiming[k] == k) {
5532 PlaneWithMinActiveDRAMClockChangeMargin = k;
5534 for (j = 0; j < NumberOfActivePlanes; ++j) {
5535 if (BlendingAndTiming[k] == j) {
5536 PlaneWithMinActiveDRAMClockChangeMargin = j;
5543 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5545 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5546 for (k = 0; k < NumberOfActivePlanes; ++k) {
5547 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5548 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5552 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5553 for (k = 0; k < NumberOfActivePlanes; ++k) {
5554 if (BlendingAndTiming[k] == k) {
5555 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5559 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
5560 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5561 } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) {
5562 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5564 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5567 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5568 for (k = 0; k < NumberOfActivePlanes; ++k) {
5569 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5570 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k];
5571 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (HTotal[k] / PixelClock[k]) / VRatio[k];
5575 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5576 *StutterEnterPlusExitWatermark = dml_max(SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane);
5580 static void CalculateDCFCLKDeepSleep(
5581 struct display_mode_lib *mode_lib,
5582 unsigned int NumberOfActivePlanes,
5583 int BytePerPixelY[],
5584 int BytePerPixelC[],
5586 double VRatioChroma[],
5587 double SwathWidthY[],
5588 double SwathWidthC[],
5589 unsigned int DPPPerPlane[],
5591 double HRatioChroma[],
5592 double PixelClock[],
5593 double PSCL_THROUGHPUT[],
5594 double PSCL_THROUGHPUT_CHROMA[],
5596 double ReadBandwidthLuma[],
5597 double ReadBandwidthChroma[],
5599 double *DCFCLKDeepSleep)
5601 double DisplayPipeLineDeliveryTimeLuma = 0;
5602 double DisplayPipeLineDeliveryTimeChroma = 0;
5604 double ReadBandwidth = 0.0;
5606 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5607 for (k = 0; k < NumberOfActivePlanes; ++k) {
5609 if (VRatio[k] <= 1) {
5610 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5612 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5614 if (BytePerPixelC[k] == 0) {
5615 DisplayPipeLineDeliveryTimeChroma = 0;
5617 if (VRatioChroma[k] <= 1) {
5618 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5620 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5624 if (BytePerPixelC[k] > 0) {
5625 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(1.1 * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 1.1 * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5627 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5629 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(mode_lib->vba.DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5633 for (k = 0; k < NumberOfActivePlanes; ++k) {
5634 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5637 *DCFCLKDeepSleep = dml_max(8.0, ReadBandwidth / ReturnBusWidth);
5639 for (k = 0; k < NumberOfActivePlanes; ++k) {
5640 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5644 static void CalculateUrgentBurstFactor(
5645 long swath_width_luma_ub,
5646 long swath_width_chroma_ub,
5647 unsigned int DETBufferSizeInKByte,
5648 unsigned int SwathHeightY,
5649 unsigned int SwathHeightC,
5651 double UrgentLatency,
5652 double CursorBufferSize,
5653 unsigned int CursorWidth,
5654 unsigned int CursorBPP,
5657 double BytePerPixelInDETY,
5658 double BytePerPixelInDETC,
5659 double DETBufferSizeY,
5660 double DETBufferSizeC,
5661 double *UrgentBurstFactorCursor,
5662 double *UrgentBurstFactorLuma,
5663 double *UrgentBurstFactorChroma,
5664 bool *NotEnoughUrgentLatencyHiding)
5666 double LinesInDETLuma = 0;
5667 double LinesInDETChroma = 0;
5668 unsigned int LinesInCursorBuffer = 0;
5669 double CursorBufferSizeInTime = 0;
5670 double DETBufferSizeInTimeLuma = 0;
5671 double DETBufferSizeInTimeChroma = 0;
5673 *NotEnoughUrgentLatencyHiding = 0;
5675 if (CursorWidth > 0) {
5676 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5678 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5679 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5680 *NotEnoughUrgentLatencyHiding = 1;
5681 *UrgentBurstFactorCursor = 0;
5683 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5686 *UrgentBurstFactorCursor = 1;
5690 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5692 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5693 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5694 *NotEnoughUrgentLatencyHiding = 1;
5695 *UrgentBurstFactorLuma = 0;
5697 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5700 *UrgentBurstFactorLuma = 1;
5703 if (BytePerPixelInDETC > 0) {
5704 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5706 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5707 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5708 *NotEnoughUrgentLatencyHiding = 1;
5709 *UrgentBurstFactorChroma = 0;
5711 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5714 *UrgentBurstFactorChroma = 1;
5719 static void CalculatePixelDeliveryTimes(
5720 unsigned int NumberOfActivePlanes,
5722 double VRatioChroma[],
5723 double VRatioPrefetchY[],
5724 double VRatioPrefetchC[],
5725 unsigned int swath_width_luma_ub[],
5726 unsigned int swath_width_chroma_ub[],
5727 unsigned int DPPPerPlane[],
5729 double HRatioChroma[],
5730 double PixelClock[],
5731 double PSCL_THROUGHPUT[],
5732 double PSCL_THROUGHPUT_CHROMA[],
5734 int BytePerPixelC[],
5735 enum scan_direction_class SourceScan[],
5736 unsigned int NumberOfCursors[],
5737 unsigned int CursorWidth[][2],
5738 unsigned int CursorBPP[][2],
5739 unsigned int BlockWidth256BytesY[],
5740 unsigned int BlockHeight256BytesY[],
5741 unsigned int BlockWidth256BytesC[],
5742 unsigned int BlockHeight256BytesC[],
5743 double DisplayPipeLineDeliveryTimeLuma[],
5744 double DisplayPipeLineDeliveryTimeChroma[],
5745 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5746 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5747 double DisplayPipeRequestDeliveryTimeLuma[],
5748 double DisplayPipeRequestDeliveryTimeChroma[],
5749 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5750 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5751 double CursorRequestDeliveryTime[],
5752 double CursorRequestDeliveryTimePrefetch[])
5754 double req_per_swath_ub = 0;
5757 for (k = 0; k < NumberOfActivePlanes; ++k) {
5758 if (VRatio[k] <= 1) {
5759 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5761 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5764 if (BytePerPixelC[k] == 0) {
5765 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5767 if (VRatioChroma[k] <= 1) {
5768 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5770 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5774 if (VRatioPrefetchY[k] <= 1) {
5775 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5777 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5780 if (BytePerPixelC[k] == 0) {
5781 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5783 if (VRatioPrefetchC[k] <= 1) {
5784 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5786 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5791 for (k = 0; k < NumberOfActivePlanes; ++k) {
5792 if (SourceScan[k] != dm_vert) {
5793 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5795 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5797 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5798 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5799 if (BytePerPixelC[k] == 0) {
5800 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5801 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5803 if (SourceScan[k] != dm_vert) {
5804 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5806 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5808 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5809 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5813 for (k = 0; k < NumberOfActivePlanes; ++k) {
5814 int cursor_req_per_width = 0;
5815 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5816 if (NumberOfCursors[k] > 0) {
5817 if (VRatio[k] <= 1) {
5818 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5820 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5822 if (VRatioPrefetchY[k] <= 1) {
5823 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5825 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5828 CursorRequestDeliveryTime[k] = 0;
5829 CursorRequestDeliveryTimePrefetch[k] = 0;
5834 static void CalculateMetaAndPTETimes(
5835 int NumberOfActivePlanes,
5838 int MinMetaChunkSizeBytes,
5841 double VRatioChroma[],
5842 double DestinationLinesToRequestRowInVBlank[],
5843 double DestinationLinesToRequestRowInImmediateFlip[],
5845 double PixelClock[],
5846 int BytePerPixelY[],
5847 int BytePerPixelC[],
5848 enum scan_direction_class SourceScan[],
5849 int dpte_row_height[],
5850 int dpte_row_height_chroma[],
5851 int meta_row_width[],
5852 int meta_row_width_chroma[],
5853 int meta_row_height[],
5854 int meta_row_height_chroma[],
5855 int meta_req_width[],
5856 int meta_req_width_chroma[],
5857 int meta_req_height[],
5858 int meta_req_height_chroma[],
5859 int dpte_group_bytes[],
5860 int PTERequestSizeY[],
5861 int PTERequestSizeC[],
5862 int PixelPTEReqWidthY[],
5863 int PixelPTEReqHeightY[],
5864 int PixelPTEReqWidthC[],
5865 int PixelPTEReqHeightC[],
5866 int dpte_row_width_luma_ub[],
5867 int dpte_row_width_chroma_ub[],
5868 double DST_Y_PER_PTE_ROW_NOM_L[],
5869 double DST_Y_PER_PTE_ROW_NOM_C[],
5870 double DST_Y_PER_META_ROW_NOM_L[],
5871 double DST_Y_PER_META_ROW_NOM_C[],
5872 double TimePerMetaChunkNominal[],
5873 double TimePerChromaMetaChunkNominal[],
5874 double TimePerMetaChunkVBlank[],
5875 double TimePerChromaMetaChunkVBlank[],
5876 double TimePerMetaChunkFlip[],
5877 double TimePerChromaMetaChunkFlip[],
5878 double time_per_pte_group_nom_luma[],
5879 double time_per_pte_group_vblank_luma[],
5880 double time_per_pte_group_flip_luma[],
5881 double time_per_pte_group_nom_chroma[],
5882 double time_per_pte_group_vblank_chroma[],
5883 double time_per_pte_group_flip_chroma[])
5885 unsigned int meta_chunk_width = 0;
5886 unsigned int min_meta_chunk_width = 0;
5887 unsigned int meta_chunk_per_row_int = 0;
5888 unsigned int meta_row_remainder = 0;
5889 unsigned int meta_chunk_threshold = 0;
5890 unsigned int meta_chunks_per_row_ub = 0;
5891 unsigned int meta_chunk_width_chroma = 0;
5892 unsigned int min_meta_chunk_width_chroma = 0;
5893 unsigned int meta_chunk_per_row_int_chroma = 0;
5894 unsigned int meta_row_remainder_chroma = 0;
5895 unsigned int meta_chunk_threshold_chroma = 0;
5896 unsigned int meta_chunks_per_row_ub_chroma = 0;
5897 unsigned int dpte_group_width_luma = 0;
5898 unsigned int dpte_groups_per_row_luma_ub = 0;
5899 unsigned int dpte_group_width_chroma = 0;
5900 unsigned int dpte_groups_per_row_chroma_ub = 0;
5903 for (k = 0; k < NumberOfActivePlanes; ++k) {
5904 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5905 if (BytePerPixelC[k] == 0) {
5906 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5908 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
5910 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5911 if (BytePerPixelC[k] == 0) {
5912 DST_Y_PER_META_ROW_NOM_C[k] = 0;
5914 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
5918 for (k = 0; k < NumberOfActivePlanes; ++k) {
5919 if (DCCEnable[k] == true) {
5920 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
5921 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
5922 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5923 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5924 if (SourceScan[k] != dm_vert) {
5925 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5927 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
5929 if (meta_row_remainder <= meta_chunk_threshold) {
5930 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5932 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5934 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5935 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5936 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5937 if (BytePerPixelC[k] == 0) {
5938 TimePerChromaMetaChunkNominal[k] = 0;
5939 TimePerChromaMetaChunkVBlank[k] = 0;
5940 TimePerChromaMetaChunkFlip[k] = 0;
5942 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5943 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5944 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
5945 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
5946 if (SourceScan[k] != dm_vert) {
5947 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
5949 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
5951 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
5952 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5954 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5956 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5957 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5958 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5961 TimePerMetaChunkNominal[k] = 0;
5962 TimePerMetaChunkVBlank[k] = 0;
5963 TimePerMetaChunkFlip[k] = 0;
5964 TimePerChromaMetaChunkNominal[k] = 0;
5965 TimePerChromaMetaChunkVBlank[k] = 0;
5966 TimePerChromaMetaChunkFlip[k] = 0;
5970 for (k = 0; k < NumberOfActivePlanes; ++k) {
5971 if (GPUVMEnable == true) {
5972 if (SourceScan[k] != dm_vert) {
5973 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5975 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5977 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
5978 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5979 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5980 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5981 if (BytePerPixelC[k] == 0) {
5982 time_per_pte_group_nom_chroma[k] = 0;
5983 time_per_pte_group_vblank_chroma[k] = 0;
5984 time_per_pte_group_flip_chroma[k] = 0;
5986 if (SourceScan[k] != dm_vert) {
5987 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5989 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5991 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
5992 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5993 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5994 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5997 time_per_pte_group_nom_luma[k] = 0;
5998 time_per_pte_group_vblank_luma[k] = 0;
5999 time_per_pte_group_flip_luma[k] = 0;
6000 time_per_pte_group_nom_chroma[k] = 0;
6001 time_per_pte_group_vblank_chroma[k] = 0;
6002 time_per_pte_group_flip_chroma[k] = 0;
6007 static void CalculateVMGroupAndRequestTimes(
6008 unsigned int NumberOfActivePlanes,
6010 unsigned int GPUVMMaxPageTableLevels,
6011 unsigned int HTotal[],
6012 int BytePerPixelC[],
6013 double DestinationLinesToRequestVMInVBlank[],
6014 double DestinationLinesToRequestVMInImmediateFlip[],
6016 double PixelClock[],
6017 int dpte_row_width_luma_ub[],
6018 int dpte_row_width_chroma_ub[],
6019 int vm_group_bytes[],
6020 unsigned int dpde0_bytes_per_frame_ub_l[],
6021 unsigned int dpde0_bytes_per_frame_ub_c[],
6022 int meta_pte_bytes_per_frame_ub_l[],
6023 int meta_pte_bytes_per_frame_ub_c[],
6024 double TimePerVMGroupVBlank[],
6025 double TimePerVMGroupFlip[],
6026 double TimePerVMRequestVBlank[],
6027 double TimePerVMRequestFlip[])
6029 int num_group_per_lower_vm_stage = 0;
6030 int num_req_per_lower_vm_stage = 0;
6033 for (k = 0; k < NumberOfActivePlanes; ++k) {
6034 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6035 if (DCCEnable[k] == false) {
6036 if (BytePerPixelC[k] > 0) {
6037 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
6038 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k])
6039 / (double) (vm_group_bytes[k]), 1);
6041 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
6042 / (double) (vm_group_bytes[k]), 1);
6045 if (GPUVMMaxPageTableLevels == 1) {
6046 if (BytePerPixelC[k] > 0) {
6047 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
6048 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k])
6049 / (double) (vm_group_bytes[k]), 1);
6051 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
6052 / (double) (vm_group_bytes[k]), 1);
6055 if (BytePerPixelC[k] > 0) {
6056 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6057 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6058 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6059 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6061 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6062 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6067 if (DCCEnable[k] == false) {
6068 if (BytePerPixelC[k] > 0) {
6069 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6071 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6074 if (GPUVMMaxPageTableLevels == 1) {
6075 if (BytePerPixelC[k] > 0) {
6076 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
6077 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6079 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6082 if (BytePerPixelC[k] > 0) {
6083 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6084 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k]
6085 / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6087 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6088 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6093 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
6094 / num_group_per_lower_vm_stage;
6095 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
6096 / num_group_per_lower_vm_stage;
6097 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
6098 / num_req_per_lower_vm_stage;
6099 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
6100 / num_req_per_lower_vm_stage;
6102 if (GPUVMMaxPageTableLevels > 2) {
6103 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6104 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6105 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6106 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6110 TimePerVMGroupVBlank[k] = 0;
6111 TimePerVMGroupFlip[k] = 0;
6112 TimePerVMRequestVBlank[k] = 0;
6113 TimePerVMRequestFlip[k] = 0;
6118 static void CalculateStutterEfficiency(
6119 int NumberOfActivePlanes,
6120 long ROBBufferSizeInKByte,
6121 double TotalDataReadBandwidth,
6125 bool SynchronizedVBlank,
6127 double DETBufferSizeY[],
6128 int BytePerPixelY[],
6129 double BytePerPixelDETY[],
6130 double SwathWidthY[],
6133 double DCCRateLuma[],
6134 double DCCRateChroma[],
6137 double PixelClock[],
6139 enum scan_direction_class SourceScan[],
6140 int BlockHeight256BytesY[],
6141 int BlockWidth256BytesY[],
6142 int BlockHeight256BytesC[],
6143 int BlockWidth256BytesC[],
6144 int DCCYMaxUncompressedBlock[],
6145 int DCCCMaxUncompressedBlock[],
6148 bool WritebackEnable[],
6149 double ReadBandwidthPlaneLuma[],
6150 double ReadBandwidthPlaneChroma[],
6151 double meta_row_bw[],
6152 double dpte_row_bw[],
6153 double *StutterEfficiencyNotIncludingVBlank,
6154 double *StutterEfficiency)
6156 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
6157 double FrameTimeForMinFullDETBufferingTime = 0;
6158 double StutterPeriod = 0;
6159 double AverageReadBandwidth = 0;
6160 double TotalRowReadBandwidth = 0;
6161 double AverageDCCCompressionRate = 0;
6162 double PartOfBurstThatFitsInROB = 0;
6163 double StutterBurstTime = 0;
6164 int TotalActiveWriteback = 0;
6165 double VBlankTime = 0;
6166 double SmallestVBlank = 0;
6167 int BytePerPixelYCriticalPlane = 0;
6168 double SwathWidthYCriticalPlane = 0;
6169 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
6170 double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
6171 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6172 double MaximumEffectiveCompressionLuma = 0;
6173 double MaximumEffectiveCompressionChroma = 0;
6176 for (k = 0; k < NumberOfActivePlanes; ++k) {
6177 LinesInDETY[k] = DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
6178 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
6179 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
6182 StutterPeriod = FullDETBufferingTimeY[0];
6183 FrameTimeForMinFullDETBufferingTime = VTotal[0] * HTotal[0] / PixelClock[0];
6184 BytePerPixelYCriticalPlane = BytePerPixelY[0];
6185 SwathWidthYCriticalPlane = SwathWidthY[0];
6186 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[0]
6187 - (LinesInDETY[0] - LinesInDETYRoundedDownToSwath[0]);
6189 for (k = 0; k < NumberOfActivePlanes; ++k) {
6190 if (FullDETBufferingTimeY[k] < StutterPeriod) {
6191 StutterPeriod = FullDETBufferingTimeY[k];
6192 FrameTimeForMinFullDETBufferingTime = VTotal[k] * HTotal[k] / PixelClock[k];
6193 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6194 SwathWidthYCriticalPlane = SwathWidthY[k];
6195 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k]
6196 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]);
6200 AverageReadBandwidth = 0;
6201 TotalRowReadBandwidth = 0;
6202 for (k = 0; k < NumberOfActivePlanes; ++k) {
6203 if (DCCEnable[k] == true) {
6204 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k])
6205 || (SourceScan[k] != dm_vert
6206 && BlockHeight256BytesY[k] > SwathHeightY[k])
6207 || DCCYMaxUncompressedBlock[k] < 256) {
6208 MaximumEffectiveCompressionLuma = 2;
6210 MaximumEffectiveCompressionLuma = 4;
6212 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(DCCRateLuma[k], MaximumEffectiveCompressionLuma);
6214 if (ReadBandwidthPlaneChroma[k] > 0) {
6215 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6216 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k])
6217 || DCCCMaxUncompressedBlock[k] < 256) {
6218 MaximumEffectiveCompressionChroma = 2;
6220 MaximumEffectiveCompressionChroma = 4;
6222 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneChroma[k] / dml_min(DCCRateChroma[k], MaximumEffectiveCompressionChroma);
6225 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6227 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6230 AverageDCCCompressionRate = TotalDataReadBandwidth / AverageReadBandwidth;
6231 PartOfBurstThatFitsInROB = dml_min(StutterPeriod * TotalDataReadBandwidth, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6232 StutterBurstTime = PartOfBurstThatFitsInROB / AverageDCCCompressionRate / ReturnBW + (StutterPeriod * TotalDataReadBandwidth
6233 - PartOfBurstThatFitsInROB) / (DCFCLK * 64) + StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6234 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6236 TotalActiveWriteback = 0;
6237 for (k = 0; k < NumberOfActivePlanes; ++k) {
6238 if (WritebackEnable[k] == true) {
6239 TotalActiveWriteback = TotalActiveWriteback + 1;
6243 if (TotalActiveWriteback == 0) {
6244 *StutterEfficiencyNotIncludingVBlank = (1
6245 - (SRExitTime + StutterBurstTime) / StutterPeriod) * 100;
6247 *StutterEfficiencyNotIncludingVBlank = 0;
6250 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6251 SmallestVBlank = (VTotal[0] - VActive[0]) * HTotal[0] / PixelClock[0];
6255 for (k = 0; k < NumberOfActivePlanes; ++k) {
6256 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6257 VBlankTime = (VTotal[k] - VActive[k]) * HTotal[k] / PixelClock[k];
6261 SmallestVBlank = dml_min(SmallestVBlank, VBlankTime);
6264 *StutterEfficiency = (*StutterEfficiencyNotIncludingVBlank / 100.0 * (FrameTimeForMinFullDETBufferingTime - SmallestVBlank) + SmallestVBlank) / FrameTimeForMinFullDETBufferingTime * 100;
6267 static void CalculateSwathAndDETConfiguration(
6268 bool ForceSingleDPP,
6269 int NumberOfActivePlanes,
6270 long DETBufferSizeInKByte,
6271 double MaximumSwathWidthLuma[],
6272 double MaximumSwathWidthChroma[],
6273 enum scan_direction_class SourceScan[],
6274 enum source_format_class SourcePixelFormat[],
6275 enum dm_swizzle_mode SurfaceTiling[],
6276 int ViewportWidth[],
6277 int ViewportHeight[],
6278 int SurfaceWidthY[],
6279 int SurfaceWidthC[],
6280 int SurfaceHeightY[],
6281 int SurfaceHeightC[],
6282 int Read256BytesBlockHeightY[],
6283 int Read256BytesBlockHeightC[],
6284 int Read256BytesBlockWidthY[],
6285 int Read256BytesBlockWidthC[],
6286 enum odm_combine_mode ODMCombineEnabled[],
6287 int BlendingAndTiming[],
6290 double BytePerPixDETY[],
6291 double BytePerPixDETC[],
6294 double HRatioChroma[],
6296 int swath_width_luma_ub[],
6297 int swath_width_chroma_ub[],
6298 double SwathWidth[],
6299 double SwathWidthChroma[],
6302 double DETBufferSizeY[],
6303 double DETBufferSizeC[],
6304 bool ViewportSizeSupportPerPlane[],
6305 bool *ViewportSizeSupport)
6307 int MaximumSwathHeightY[DC__NUM_DPP__MAX] = { 0 };
6308 int MaximumSwathHeightC[DC__NUM_DPP__MAX] = { 0 };
6309 int MinimumSwathHeightY = 0;
6310 int MinimumSwathHeightC = 0;
6311 long RoundedUpMaxSwathSizeBytesY = 0;
6312 long RoundedUpMaxSwathSizeBytesC = 0;
6313 long RoundedUpMinSwathSizeBytesY = 0;
6314 long RoundedUpMinSwathSizeBytesC = 0;
6315 long RoundedUpSwathSizeBytesY = 0;
6316 long RoundedUpSwathSizeBytesC = 0;
6317 double SwathWidthSingleDPP[DC__NUM_DPP__MAX] = { 0 };
6318 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX] = { 0 };
6321 CalculateSwathWidth(
6323 NumberOfActivePlanes,
6335 Read256BytesBlockHeightY,
6336 Read256BytesBlockHeightC,
6337 Read256BytesBlockWidthY,
6338 Read256BytesBlockWidthC,
6343 SwathWidthSingleDPP,
6344 SwathWidthSingleDPPChroma,
6347 MaximumSwathHeightY,
6348 MaximumSwathHeightC,
6349 swath_width_luma_ub,
6350 swath_width_chroma_ub);
6352 *ViewportSizeSupport = true;
6353 for (k = 0; k < NumberOfActivePlanes; ++k) {
6354 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32
6355 || SourcePixelFormat[k] == dm_444_16
6356 || SourcePixelFormat[k] == dm_mono_16
6357 || SourcePixelFormat[k] == dm_mono_8
6358 || SourcePixelFormat[k] == dm_rgbe)) {
6359 if (SurfaceTiling[k] == dm_sw_linear
6360 || (SourcePixelFormat[k] == dm_444_64
6361 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6362 && SourceScan[k] != dm_vert)) {
6363 MinimumSwathHeightY = MaximumSwathHeightY[k];
6364 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6365 MinimumSwathHeightY = MaximumSwathHeightY[k];
6367 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6369 MinimumSwathHeightC = MaximumSwathHeightC[k];
6371 if (SurfaceTiling[k] == dm_sw_linear) {
6372 MinimumSwathHeightY = MaximumSwathHeightY[k];
6373 MinimumSwathHeightC = MaximumSwathHeightC[k];
6374 } else if (SourcePixelFormat[k] == dm_rgbe_alpha
6375 && SourceScan[k] == dm_vert) {
6376 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6377 MinimumSwathHeightC = MaximumSwathHeightC[k];
6378 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6379 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6380 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6381 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6382 MinimumSwathHeightY = MaximumSwathHeightY[k];
6383 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6385 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6386 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6390 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6391 * MaximumSwathHeightY[k];
6392 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6393 * MinimumSwathHeightY;
6394 if (SourcePixelFormat[k] == dm_420_10) {
6395 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6396 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6398 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6399 * MaximumSwathHeightC[k];
6400 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6401 * MinimumSwathHeightC;
6402 if (SourcePixelFormat[k] == dm_420_10) {
6403 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6404 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6407 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6408 <= DETBufferSizeInKByte * 1024 / 2) {
6409 SwathHeightY[k] = MaximumSwathHeightY[k];
6410 SwathHeightC[k] = MaximumSwathHeightC[k];
6411 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6412 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6413 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6414 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6415 <= DETBufferSizeInKByte * 1024 / 2) {
6416 SwathHeightY[k] = MinimumSwathHeightY;
6417 SwathHeightC[k] = MaximumSwathHeightC[k];
6418 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6419 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6420 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6421 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6422 <= DETBufferSizeInKByte * 1024 / 2) {
6423 SwathHeightY[k] = MaximumSwathHeightY[k];
6424 SwathHeightC[k] = MinimumSwathHeightC;
6425 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6426 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6428 SwathHeightY[k] = MinimumSwathHeightY;
6429 SwathHeightC[k] = MinimumSwathHeightC;
6430 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6431 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6434 if (SwathHeightC[k] == 0) {
6435 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024;
6436 DETBufferSizeC[k] = 0;
6437 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6438 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 / 2;
6439 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 2;
6441 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 * 2 / 3;
6442 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 3;
6445 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6446 > DETBufferSizeInKByte * 1024 / 2
6447 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6448 || (SwathHeightC[k] > 0
6449 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6450 *ViewportSizeSupport = false;
6451 ViewportSizeSupportPerPlane[k] = false;
6453 ViewportSizeSupportPerPlane[k] = true;
6458 static void CalculateSwathWidth(
6459 bool ForceSingleDPP,
6460 int NumberOfActivePlanes,
6461 enum source_format_class SourcePixelFormat[],
6462 enum scan_direction_class SourceScan[],
6463 unsigned int ViewportWidth[],
6464 unsigned int ViewportHeight[],
6465 unsigned int SurfaceWidthY[],
6466 unsigned int SurfaceWidthC[],
6467 unsigned int SurfaceHeightY[],
6468 unsigned int SurfaceHeightC[],
6469 enum odm_combine_mode ODMCombineEnabled[],
6472 int Read256BytesBlockHeightY[],
6473 int Read256BytesBlockHeightC[],
6474 int Read256BytesBlockWidthY[],
6475 int Read256BytesBlockWidthC[],
6476 int BlendingAndTiming[],
6477 unsigned int HActive[],
6480 double SwathWidthSingleDPPY[],
6481 double SwathWidthSingleDPPC[],
6482 double SwathWidthY[],
6483 double SwathWidthC[],
6484 int MaximumSwathHeightY[],
6485 int MaximumSwathHeightC[],
6486 unsigned int swath_width_luma_ub[],
6487 unsigned int swath_width_chroma_ub[])
6490 long surface_width_ub_l;
6491 long surface_height_ub_l;
6492 long surface_width_ub_c;
6493 long surface_height_ub_c;
6495 for (k = 0; k < NumberOfActivePlanes; ++k) {
6496 enum odm_combine_mode MainPlaneODMCombine = 0;
6497 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6498 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6499 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6500 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6502 if (SourceScan[k] != dm_vert) {
6503 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6505 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6508 MainPlaneODMCombine = ODMCombineEnabled[k];
6509 for (j = 0; j < NumberOfActivePlanes; ++j) {
6510 if (BlendingAndTiming[k] == j) {
6511 MainPlaneODMCombine = ODMCombineEnabled[j];
6515 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6516 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6517 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6518 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6519 } else if (DPPPerPlane[k] == 2) {
6520 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6522 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6525 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6526 SwathWidthC[k] = SwathWidthY[k] / 2;
6527 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6529 SwathWidthC[k] = SwathWidthY[k];
6530 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6533 if (ForceSingleDPP == true) {
6534 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6535 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6538 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6539 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6540 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6541 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6543 if (SourceScan[k] != dm_vert) {
6544 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6545 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6546 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6547 Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6548 if (BytePerPixC[k] > 0) {
6549 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6550 Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6552 swath_width_chroma_ub[k] = 0;
6555 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6556 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6557 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6558 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6559 if (BytePerPixC[k] > 0) {
6560 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6561 Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6563 swath_width_chroma_ub[k] = 0;
6569 static double CalculateExtraLatency(
6570 long RoundTripPingLatencyCycles,
6571 long ReorderingBytes,
6573 int TotalNumberOfActiveDPP,
6574 int PixelChunkSizeInKByte,
6575 int TotalNumberOfDCCActiveDPP,
6580 int NumberOfActivePlanes,
6582 int dpte_group_bytes[],
6583 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6584 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6585 double HostVMMinPageSize,
6586 int HostVMMaxNonCachedPageTableLevels)
6588 double ExtraLatencyBytes = 0;
6589 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6591 TotalNumberOfActiveDPP,
6592 PixelChunkSizeInKByte,
6593 TotalNumberOfDCCActiveDPP,
6597 NumberOfActivePlanes,
6600 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6601 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6603 HostVMMaxNonCachedPageTableLevels);
6605 return (RoundTripPingLatencyCycles + 32) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6608 static double CalculateExtraLatencyBytes(
6609 long ReorderingBytes,
6610 int TotalNumberOfActiveDPP,
6611 int PixelChunkSizeInKByte,
6612 int TotalNumberOfDCCActiveDPP,
6616 int NumberOfActivePlanes,
6618 int dpte_group_bytes[],
6619 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6620 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6621 double HostVMMinPageSize,
6622 int HostVMMaxNonCachedPageTableLevels)
6625 double HostVMInefficiencyFactor = 0;
6626 int HostVMDynamicLevels = 0;
6629 if (GPUVMEnable == true && HostVMEnable == true) {
6630 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6631 if (HostVMMinPageSize < 2048) {
6632 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6633 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6634 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6636 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6639 HostVMInefficiencyFactor = 1;
6640 HostVMDynamicLevels = 0;
6643 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6645 if (GPUVMEnable == true) {
6646 for (k = 0; k < NumberOfActivePlanes; ++k) {
6647 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6654 static double CalculateUrgentLatency(
6655 double UrgentLatencyPixelDataOnly,
6656 double UrgentLatencyPixelMixedWithVMData,
6657 double UrgentLatencyVMDataOnly,
6658 bool DoUrgentLatencyAdjustment,
6659 double UrgentLatencyAdjustmentFabricClockComponent,
6660 double UrgentLatencyAdjustmentFabricClockReference,
6665 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6666 if (DoUrgentLatencyAdjustment == true) {
6667 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6673 static void UseMinimumDCFCLK(
6674 struct display_mode_lib *mode_lib,
6675 int MaxInterDCNTileRepeaters,
6676 int MaxPrefetchMode,
6677 double FinalDRAMClockChangeLatency,
6678 double SREnterPlusExitTime,
6680 int RoundTripPingLatencyCycles,
6681 int ReorderingBytes,
6682 int PixelChunkSizeInKByte,
6685 int GPUVMMaxPageTableLevels,
6687 int NumberOfActivePlanes,
6688 double HostVMMinPageSize,
6689 int HostVMMaxNonCachedPageTableLevels,
6690 bool DynamicMetadataVMEnabled,
6691 enum immediate_flip_requirement ImmediateFlipRequirement,
6692 bool ProgressiveToInterlaceUnitInOPP,
6693 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
6694 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6695 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6696 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
6699 int DynamicMetadataTransmittedBytes[],
6700 int DynamicMetadataLinesBeforeActiveRequired[],
6702 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
6703 double RequiredDISPCLK[][2],
6704 double UrgLatency[],
6705 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
6706 double ProjectedDCFCLKDeepSleep[][2],
6707 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
6708 double TotalVActivePixelBandwidth[][2],
6709 double TotalVActiveCursorBandwidth[][2],
6710 double TotalMetaRowBandwidth[][2],
6711 double TotalDPTERowBandwidth[][2],
6712 unsigned int TotalNumberOfActiveDPP[][2],
6713 unsigned int TotalNumberOfDCCActiveDPP[][2],
6714 int dpte_group_bytes[],
6715 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
6716 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
6717 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
6718 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
6719 int BytePerPixelY[],
6720 int BytePerPixelC[],
6722 double PixelClock[],
6723 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
6724 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
6725 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
6726 bool DynamicMetadataEnable[],
6727 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
6728 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
6729 double ReadBandwidthLuma[],
6730 double ReadBandwidthChroma[],
6731 double DCFCLKPerState[],
6732 double DCFCLKState[][2])
6734 double NormalEfficiency = 0;
6735 double PTEEfficiency = 0;
6736 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
6737 unsigned int i, j, k;
6739 NormalEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6740 : PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
6741 PTEEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
6742 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
6743 for (i = 0; i < mode_lib->soc.num_states; ++i) {
6744 for (j = 0; j <= 1; ++j) {
6745 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
6746 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX] = { 0 };
6747 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX] = { 0 };
6748 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX] = { 0 };
6749 double MinimumTWait = 0;
6750 double NonDPTEBandwidth = 0;
6751 double DPTEBandwidth = 0;
6752 double DCFCLKRequiredForAverageBandwidth = 0;
6753 double ExtraLatencyBytes = 0;
6754 double ExtraLatencyCycles = 0;
6755 double DCFCLKRequiredForPeakBandwidth = 0;
6756 int NoOfDPPState[DC__NUM_DPP__MAX] = { 0 };
6757 double MinimumTvmPlus2Tr0 = 0;
6759 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6760 for (k = 0; k < NumberOfActivePlanes; ++k) {
6761 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6762 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
6765 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
6766 NoOfDPPState[k] = NoOfDPP[i][j][k];
6769 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
6770 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
6771 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
6772 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
6773 DCFCLKRequiredForAverageBandwidth = dml_max3(ProjectedDCFCLKDeepSleep[i][j],
6774 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth / (MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6775 (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / ReturnBusWidth);
6777 ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, TotalNumberOfDCCActiveDPP[i][j],
6778 MetaChunkSize, GPUVMEnable, HostVMEnable, NumberOfActivePlanes, NoOfDPPState, dpte_group_bytes,
6779 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6780 HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
6781 ExtraLatencyCycles = RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
6782 for (k = 0; k < NumberOfActivePlanes; ++k) {
6783 double DCFCLKCyclesRequiredInPrefetch = { 0 };
6784 double ExpectedPrefetchBWAcceleration = { 0 };
6785 double PrefetchTime = { 0 };
6787 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
6788 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
6789 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
6790 / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * DPTEBytesPerRow[i][j][k] / PTEEfficiency
6791 / NormalEfficiency / ReturnBusWidth + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
6792 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
6793 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k]) / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
6794 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
6795 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
6796 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait - UrgLatency[i] * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels
6797 : GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
6799 if (PrefetchTime > 0) {
6800 double ExpectedVRatioPrefetch = { 0 };
6801 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
6802 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
6803 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
6804 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
6805 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
6806 + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / ReturnBusWidth;
6809 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
6811 if (DynamicMetadataEnable[k] == true) {
6812 double TsetupPipe = { 0 };
6813 double TdmbfPipe = { 0 };
6814 double TdmsksPipe = { 0 };
6815 double TdmecPipe = { 0 };
6816 double AllowedTimeForUrgentExtraLatency = { 0 };
6818 CalculateDynamicMetadataParameters(
6819 MaxInterDCNTileRepeaters,
6820 RequiredDPPCLK[i][j][k],
6821 RequiredDISPCLK[i][j],
6822 ProjectedDCFCLKDeepSleep[i][j],
6825 VTotal[k] - VActive[k],
6826 DynamicMetadataTransmittedBytes[k],
6827 DynamicMetadataLinesBeforeActiveRequired[k],
6829 ProgressiveToInterlaceUnitInOPP,
6834 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TsetupPipe
6835 - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
6836 if (AllowedTimeForUrgentExtraLatency > 0) {
6837 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
6838 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
6840 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
6844 DCFCLKRequiredForPeakBandwidth = 0;
6845 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
6846 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
6848 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? (HostVMEnable == true ?
6849 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
6850 for (k = 0; k < NumberOfActivePlanes; ++k) {
6851 double MaximumTvmPlus2Tr0PlusTsw = { 0 };
6852 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
6853 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
6854 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
6856 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
6857 / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
6858 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
6861 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
6862 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
6867 #endif /* CONFIG_DRM_AMD_DC_DCN3_0 */