2 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
26 #ifdef CONFIG_DRM_AMD_DC_DCN3_1
29 #include "../display_mode_lib.h"
30 #include "display_mode_vba_31.h"
31 #include "../dml_inline_defs.h"
35 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
37 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
38 * ways. Unless there is something clearly wrong with it the code should
39 * remain as-is as it provides us with a guarantee from HW that it is correct.
43 #define BPP_BLENDED_PIPE 0xffffffff
44 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
45 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
47 // For DML-C changes that hasn't been propagated to VBA yet
48 //#define __DML_VBA_ALLOW_DELTA__
50 // Move these to ip paramaters/constant
52 // At which vstartup the DML start to try if the mode can be supported
53 #define __DML_VBA_MIN_VSTARTUP__ 9
55 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
56 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
58 // fudge factor for min dcfclk calclation
59 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
65 double DCFCLKDeepSleep;
66 unsigned int DPPPerPlane;
68 enum scan_direction_class SourceScan;
69 unsigned int BlockWidth256BytesY;
70 unsigned int BlockHeight256BytesY;
71 unsigned int BlockWidth256BytesC;
72 unsigned int BlockHeight256BytesC;
73 unsigned int InterlaceEnable;
74 unsigned int NumberOfCursors;
77 unsigned int DCCEnable;
78 bool ODMCombineIsEnabled;
79 enum source_format_class SourcePixelFormat;
82 bool ProgressiveToInterlaceUnitInOPP;
86 #define BPP_BLENDED_PIPE 0xffffffff
88 static bool CalculateBytePerPixelAnd256BBlockSizes(
89 enum source_format_class SourcePixelFormat,
90 enum dm_swizzle_mode SurfaceTiling,
91 unsigned int *BytePerPixelY,
92 unsigned int *BytePerPixelC,
93 double *BytePerPixelDETY,
94 double *BytePerPixelDETC,
95 unsigned int *BlockHeight256BytesY,
96 unsigned int *BlockHeight256BytesC,
97 unsigned int *BlockWidth256BytesY,
98 unsigned int *BlockWidth256BytesC);
99 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
100 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
101 static unsigned int dscceComputeDelay(
104 unsigned int sliceWidth,
105 unsigned int numSlices,
106 enum output_format_class pixelFormat,
107 enum output_encoder_class Output);
108 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
109 static bool CalculatePrefetchSchedule(
110 struct display_mode_lib *mode_lib,
111 double HostVMInefficiencyFactor,
113 unsigned int DSCDelay,
114 double DPPCLKDelaySubtotalPlusCNVCFormater,
115 double DPPCLKDelaySCL,
116 double DPPCLKDelaySCLLBOnly,
117 double DPPCLKDelayCNVCCursor,
118 double DISPCLKDelaySubtotal,
119 unsigned int DPP_RECOUT_WIDTH,
120 enum output_format_class OutputFormat,
121 unsigned int MaxInterDCNTileRepeaters,
122 unsigned int VStartup,
123 unsigned int MaxVStartup,
124 unsigned int GPUVMPageTableLevels,
127 unsigned int HostVMMaxNonCachedPageTableLevels,
128 double HostVMMinPageSize,
129 bool DynamicMetadataEnable,
130 bool DynamicMetadataVMEnabled,
131 int DynamicMetadataLinesBeforeActiveRequired,
132 unsigned int DynamicMetadataTransmittedBytes,
133 double UrgentLatency,
134 double UrgentExtraLatency,
136 unsigned int PDEAndMetaPTEBytesFrame,
137 unsigned int MetaRowByte,
138 unsigned int PixelPTEBytesPerRow,
139 double PrefetchSourceLinesY,
140 unsigned int SwathWidthY,
141 double VInitPreFillY,
142 unsigned int MaxNumSwathY,
143 double PrefetchSourceLinesC,
144 unsigned int SwathWidthC,
145 double VInitPreFillC,
146 unsigned int MaxNumSwathC,
147 int swath_width_luma_ub,
148 int swath_width_chroma_ub,
149 unsigned int SwathHeightY,
150 unsigned int SwathHeightC,
152 double *DSTXAfterScaler,
153 double *DSTYAfterScaler,
154 double *DestinationLinesForPrefetch,
155 double *PrefetchBandwidth,
156 double *DestinationLinesToRequestVMInVBlank,
157 double *DestinationLinesToRequestRowInVBlank,
158 double *VRatioPrefetchY,
159 double *VRatioPrefetchC,
160 double *RequiredPrefetchPixDataBWLuma,
161 double *RequiredPrefetchPixDataBWChroma,
162 bool *NotEnoughTimeForDynamicMetadata,
164 double *prefetch_vmrow_bw,
168 int *VUpdateOffsetPix,
169 double *VUpdateWidthPix,
170 double *VReadyOffsetPix);
171 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
172 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
173 static void CalculateDCCConfiguration(
175 bool DCCProgrammingAssumesScanDirectionUnknown,
176 enum source_format_class SourcePixelFormat,
177 unsigned int SurfaceWidthLuma,
178 unsigned int SurfaceWidthChroma,
179 unsigned int SurfaceHeightLuma,
180 unsigned int SurfaceHeightChroma,
181 double DETBufferSize,
182 unsigned int RequestHeight256ByteLuma,
183 unsigned int RequestHeight256ByteChroma,
184 enum dm_swizzle_mode TilingFormat,
185 unsigned int BytePerPixelY,
186 unsigned int BytePerPixelC,
187 double BytePerPixelDETY,
188 double BytePerPixelDETC,
189 enum scan_direction_class ScanOrientation,
190 unsigned int *MaxUncompressedBlockLuma,
191 unsigned int *MaxUncompressedBlockChroma,
192 unsigned int *MaxCompressedBlockLuma,
193 unsigned int *MaxCompressedBlockChroma,
194 unsigned int *IndependentBlockLuma,
195 unsigned int *IndependentBlockChroma);
196 static double CalculatePrefetchSourceLines(
197 struct display_mode_lib *mode_lib,
201 bool ProgressiveToInterlaceUnitInOPP,
202 unsigned int SwathHeight,
203 unsigned int ViewportYStart,
204 double *VInitPreFill,
205 unsigned int *MaxNumSwath);
206 static unsigned int CalculateVMAndRowBytes(
207 struct display_mode_lib *mode_lib,
209 unsigned int BlockHeight256Bytes,
210 unsigned int BlockWidth256Bytes,
211 enum source_format_class SourcePixelFormat,
212 unsigned int SurfaceTiling,
213 unsigned int BytePerPixel,
214 enum scan_direction_class ScanDirection,
215 unsigned int SwathWidth,
216 unsigned int ViewportHeight,
219 unsigned int HostVMMaxNonCachedPageTableLevels,
220 unsigned int GPUVMMinPageSize,
221 unsigned int HostVMMinPageSize,
222 unsigned int PTEBufferSizeInRequests,
224 unsigned int DCCMetaPitch,
225 unsigned int *MacroTileWidth,
226 unsigned int *MetaRowByte,
227 unsigned int *PixelPTEBytesPerRow,
228 bool *PTEBufferSizeNotExceeded,
229 int *dpte_row_width_ub,
230 unsigned int *dpte_row_height,
231 unsigned int *MetaRequestWidth,
232 unsigned int *MetaRequestHeight,
233 unsigned int *meta_row_width,
234 unsigned int *meta_row_height,
236 unsigned int *dpte_group_bytes,
237 unsigned int *PixelPTEReqWidth,
238 unsigned int *PixelPTEReqHeight,
239 unsigned int *PTERequestSize,
240 int *DPDE0BytesFrame,
241 int *MetaPTEBytesFrame);
242 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
243 static void CalculateRowBandwidth(
245 enum source_format_class SourcePixelFormat,
250 unsigned int MetaRowByteLuma,
251 unsigned int MetaRowByteChroma,
252 unsigned int meta_row_height_luma,
253 unsigned int meta_row_height_chroma,
254 unsigned int PixelPTEBytesPerRowLuma,
255 unsigned int PixelPTEBytesPerRowChroma,
256 unsigned int dpte_row_height_luma,
257 unsigned int dpte_row_height_chroma,
259 double *dpte_row_bw);
261 static void CalculateFlipSchedule(
262 struct display_mode_lib *mode_lib,
263 double HostVMInefficiencyFactor,
264 double UrgentExtraLatency,
265 double UrgentLatency,
266 unsigned int GPUVMMaxPageTableLevels,
268 unsigned int HostVMMaxNonCachedPageTableLevels,
270 double HostVMMinPageSize,
271 double PDEAndMetaPTEBytesPerFrame,
273 double DPTEBytesPerRow,
274 double BandwidthAvailableForImmediateFlip,
275 unsigned int TotImmediateFlipBytes,
276 enum source_format_class SourcePixelFormat,
282 unsigned int dpte_row_height,
283 unsigned int meta_row_height,
284 unsigned int dpte_row_height_chroma,
285 unsigned int meta_row_height_chroma,
286 double *DestinationLinesToRequestVMInImmediateFlip,
287 double *DestinationLinesToRequestRowInImmediateFlip,
288 double *final_flip_bw,
289 bool *ImmediateFlipSupportedForPipe);
290 static double CalculateWriteBackDelay(
291 enum source_format_class WritebackPixelFormat,
292 double WritebackHRatio,
293 double WritebackVRatio,
294 unsigned int WritebackVTaps,
295 int WritebackDestinationWidth,
296 int WritebackDestinationHeight,
297 int WritebackSourceHeight,
298 unsigned int HTotal);
300 static void CalculateVupdateAndDynamicMetadataParameters(
301 int MaxInterDCNTileRepeaters,
304 double DCFClkDeepSleep,
308 int DynamicMetadataTransmittedBytes,
309 int DynamicMetadataLinesBeforeActiveRequired,
311 bool ProgressiveToInterlaceUnitInOPP,
316 int *VUpdateOffsetPix,
317 double *VUpdateWidthPix,
318 double *VReadyOffsetPix);
320 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
321 struct display_mode_lib *mode_lib,
322 unsigned int PrefetchMode,
323 unsigned int NumberOfActivePlanes,
324 unsigned int MaxLineBufferLines,
325 unsigned int LineBufferSize,
326 unsigned int WritebackInterfaceBufferSize,
329 bool SynchronizedVBlank,
330 unsigned int dpte_group_bytes[],
331 unsigned int MetaChunkSize,
332 double UrgentLatency,
334 double WritebackLatency,
335 double WritebackChunkSize,
337 double DRAMClockChangeLatency,
339 double SREnterPlusExitTime,
341 double SREnterPlusExitZ8Time,
342 double DCFCLKDeepSleep,
343 unsigned int DETBufferSizeY[],
344 unsigned int DETBufferSizeC[],
345 unsigned int SwathHeightY[],
346 unsigned int SwathHeightC[],
347 unsigned int LBBitPerPixel[],
348 double SwathWidthY[],
349 double SwathWidthC[],
351 double HRatioChroma[],
352 unsigned int vtaps[],
353 unsigned int VTAPsChroma[],
355 double VRatioChroma[],
356 unsigned int HTotal[],
358 unsigned int BlendingAndTiming[],
359 unsigned int DPPPerPlane[],
360 double BytePerPixelDETY[],
361 double BytePerPixelDETC[],
362 double DSTXAfterScaler[],
363 double DSTYAfterScaler[],
364 bool WritebackEnable[],
365 enum source_format_class WritebackPixelFormat[],
366 double WritebackDestinationWidth[],
367 double WritebackDestinationHeight[],
368 double WritebackSourceHeight[],
369 bool UnboundedRequestEnabled,
370 int unsigned CompressedBufferSizeInkByte,
371 enum clock_change_support *DRAMClockChangeSupport,
372 double *UrgentWatermark,
373 double *WritebackUrgentWatermark,
374 double *DRAMClockChangeWatermark,
375 double *WritebackDRAMClockChangeWatermark,
376 double *StutterExitWatermark,
377 double *StutterEnterPlusExitWatermark,
378 double *Z8StutterExitWatermark,
379 double *Z8StutterEnterPlusExitWatermark,
380 double *MinActiveDRAMClockChangeLatencySupported);
382 static void CalculateDCFCLKDeepSleep(
383 struct display_mode_lib *mode_lib,
384 unsigned int NumberOfActivePlanes,
388 double VRatioChroma[],
389 double SwathWidthY[],
390 double SwathWidthC[],
391 unsigned int DPPPerPlane[],
393 double HRatioChroma[],
395 double PSCL_THROUGHPUT[],
396 double PSCL_THROUGHPUT_CHROMA[],
398 double ReadBandwidthLuma[],
399 double ReadBandwidthChroma[],
401 double *DCFCLKDeepSleep);
403 static void CalculateUrgentBurstFactor(
404 int swath_width_luma_ub,
405 int swath_width_chroma_ub,
406 unsigned int SwathHeightY,
407 unsigned int SwathHeightC,
409 double UrgentLatency,
410 double CursorBufferSize,
411 unsigned int CursorWidth,
412 unsigned int CursorBPP,
415 double BytePerPixelInDETY,
416 double BytePerPixelInDETC,
417 double DETBufferSizeY,
418 double DETBufferSizeC,
419 double *UrgentBurstFactorCursor,
420 double *UrgentBurstFactorLuma,
421 double *UrgentBurstFactorChroma,
422 bool *NotEnoughUrgentLatencyHiding);
424 static void UseMinimumDCFCLK(
425 struct display_mode_lib *mode_lib,
426 int MaxInterDCNTileRepeaters,
428 double FinalDRAMClockChangeLatency,
429 double SREnterPlusExitTime,
431 int RoundTripPingLatencyCycles,
433 int PixelChunkSizeInKByte,
436 int GPUVMMaxPageTableLevels,
438 int NumberOfActivePlanes,
439 double HostVMMinPageSize,
440 int HostVMMaxNonCachedPageTableLevels,
441 bool DynamicMetadataVMEnabled,
442 enum immediate_flip_requirement ImmediateFlipRequirement,
443 bool ProgressiveToInterlaceUnitInOPP,
444 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
445 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
448 int DynamicMetadataTransmittedBytes[],
449 int DynamicMetadataLinesBeforeActiveRequired[],
451 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
452 double RequiredDISPCLK[][2],
454 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
455 double ProjectedDCFCLKDeepSleep[][2],
456 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
457 double TotalVActivePixelBandwidth[][2],
458 double TotalVActiveCursorBandwidth[][2],
459 double TotalMetaRowBandwidth[][2],
460 double TotalDPTERowBandwidth[][2],
461 unsigned int TotalNumberOfActiveDPP[][2],
462 unsigned int TotalNumberOfDCCActiveDPP[][2],
463 int dpte_group_bytes[],
464 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
465 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
466 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
467 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
472 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
473 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
474 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
475 bool DynamicMetadataEnable[],
476 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
477 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
478 double ReadBandwidthLuma[],
479 double ReadBandwidthChroma[],
480 double DCFCLKPerState[],
481 double DCFCLKState[][2]);
483 static void CalculatePixelDeliveryTimes(
484 unsigned int NumberOfActivePlanes,
486 double VRatioChroma[],
487 double VRatioPrefetchY[],
488 double VRatioPrefetchC[],
489 unsigned int swath_width_luma_ub[],
490 unsigned int swath_width_chroma_ub[],
491 unsigned int DPPPerPlane[],
493 double HRatioChroma[],
495 double PSCL_THROUGHPUT[],
496 double PSCL_THROUGHPUT_CHROMA[],
499 enum scan_direction_class SourceScan[],
500 unsigned int NumberOfCursors[],
501 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
502 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
503 unsigned int BlockWidth256BytesY[],
504 unsigned int BlockHeight256BytesY[],
505 unsigned int BlockWidth256BytesC[],
506 unsigned int BlockHeight256BytesC[],
507 double DisplayPipeLineDeliveryTimeLuma[],
508 double DisplayPipeLineDeliveryTimeChroma[],
509 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
510 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
511 double DisplayPipeRequestDeliveryTimeLuma[],
512 double DisplayPipeRequestDeliveryTimeChroma[],
513 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
514 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
515 double CursorRequestDeliveryTime[],
516 double CursorRequestDeliveryTimePrefetch[]);
518 static void CalculateMetaAndPTETimes(
519 int NumberOfActivePlanes,
522 int MinMetaChunkSizeBytes,
525 double VRatioChroma[],
526 double DestinationLinesToRequestRowInVBlank[],
527 double DestinationLinesToRequestRowInImmediateFlip[],
532 enum scan_direction_class SourceScan[],
533 int dpte_row_height[],
534 int dpte_row_height_chroma[],
535 int meta_row_width[],
536 int meta_row_width_chroma[],
537 int meta_row_height[],
538 int meta_row_height_chroma[],
539 int meta_req_width[],
540 int meta_req_width_chroma[],
541 int meta_req_height[],
542 int meta_req_height_chroma[],
543 int dpte_group_bytes[],
544 int PTERequestSizeY[],
545 int PTERequestSizeC[],
546 int PixelPTEReqWidthY[],
547 int PixelPTEReqHeightY[],
548 int PixelPTEReqWidthC[],
549 int PixelPTEReqHeightC[],
550 int dpte_row_width_luma_ub[],
551 int dpte_row_width_chroma_ub[],
552 double DST_Y_PER_PTE_ROW_NOM_L[],
553 double DST_Y_PER_PTE_ROW_NOM_C[],
554 double DST_Y_PER_META_ROW_NOM_L[],
555 double DST_Y_PER_META_ROW_NOM_C[],
556 double TimePerMetaChunkNominal[],
557 double TimePerChromaMetaChunkNominal[],
558 double TimePerMetaChunkVBlank[],
559 double TimePerChromaMetaChunkVBlank[],
560 double TimePerMetaChunkFlip[],
561 double TimePerChromaMetaChunkFlip[],
562 double time_per_pte_group_nom_luma[],
563 double time_per_pte_group_vblank_luma[],
564 double time_per_pte_group_flip_luma[],
565 double time_per_pte_group_nom_chroma[],
566 double time_per_pte_group_vblank_chroma[],
567 double time_per_pte_group_flip_chroma[]);
569 static void CalculateVMGroupAndRequestTimes(
570 unsigned int NumberOfActivePlanes,
572 unsigned int GPUVMMaxPageTableLevels,
573 unsigned int HTotal[],
575 double DestinationLinesToRequestVMInVBlank[],
576 double DestinationLinesToRequestVMInImmediateFlip[],
579 int dpte_row_width_luma_ub[],
580 int dpte_row_width_chroma_ub[],
581 int vm_group_bytes[],
582 unsigned int dpde0_bytes_per_frame_ub_l[],
583 unsigned int dpde0_bytes_per_frame_ub_c[],
584 int meta_pte_bytes_per_frame_ub_l[],
585 int meta_pte_bytes_per_frame_ub_c[],
586 double TimePerVMGroupVBlank[],
587 double TimePerVMGroupFlip[],
588 double TimePerVMRequestVBlank[],
589 double TimePerVMRequestFlip[]);
591 static void CalculateStutterEfficiency(
592 struct display_mode_lib *mode_lib,
593 int CompressedBufferSizeInkByte,
594 bool UnboundedRequestEnabled,
595 int ConfigReturnBufferSizeInKByte,
596 int MetaFIFOSizeInKEntries,
597 int ZeroSizeBufferEntries,
598 int NumberOfActivePlanes,
599 int ROBBufferSizeInKByte,
600 double TotalDataReadBandwidth,
603 double COMPBUF_RESERVED_SPACE_64B,
604 double COMPBUF_RESERVED_SPACE_ZS,
607 bool SynchronizedVBlank,
608 double Z8StutterEnterPlusExitWatermark,
609 double StutterEnterPlusExitWatermark,
610 bool ProgressiveToInterlaceUnitInOPP,
612 double MinTTUVBlank[],
614 unsigned int DETBufferSizeY[],
616 double BytePerPixelDETY[],
617 double SwathWidthY[],
620 double NetDCCRateLuma[],
621 double NetDCCRateChroma[],
622 double DCCFractionOfZeroSizeRequestsLuma[],
623 double DCCFractionOfZeroSizeRequestsChroma[],
628 enum scan_direction_class SourceScan[],
629 int BlockHeight256BytesY[],
630 int BlockWidth256BytesY[],
631 int BlockHeight256BytesC[],
632 int BlockWidth256BytesC[],
633 int DCCYMaxUncompressedBlock[],
634 int DCCCMaxUncompressedBlock[],
637 bool WritebackEnable[],
638 double ReadBandwidthPlaneLuma[],
639 double ReadBandwidthPlaneChroma[],
640 double meta_row_bw[],
641 double dpte_row_bw[],
642 double *StutterEfficiencyNotIncludingVBlank,
643 double *StutterEfficiency,
644 int *NumberOfStutterBurstsPerFrame,
645 double *Z8StutterEfficiencyNotIncludingVBlank,
646 double *Z8StutterEfficiency,
647 int *Z8NumberOfStutterBurstsPerFrame,
648 double *StutterPeriod);
650 static void CalculateSwathAndDETConfiguration(
652 int NumberOfActivePlanes,
653 unsigned int DETBufferSizeInKByte,
654 double MaximumSwathWidthLuma[],
655 double MaximumSwathWidthChroma[],
656 enum scan_direction_class SourceScan[],
657 enum source_format_class SourcePixelFormat[],
658 enum dm_swizzle_mode SurfaceTiling[],
660 int ViewportHeight[],
663 int SurfaceHeightY[],
664 int SurfaceHeightC[],
665 int Read256BytesBlockHeightY[],
666 int Read256BytesBlockHeightC[],
667 int Read256BytesBlockWidthY[],
668 int Read256BytesBlockWidthC[],
669 enum odm_combine_mode ODMCombineEnabled[],
670 int BlendingAndTiming[],
673 double BytePerPixDETY[],
674 double BytePerPixDETC[],
677 double HRatioChroma[],
679 int swath_width_luma_ub[],
680 int swath_width_chroma_ub[],
682 double SwathWidthChroma[],
685 unsigned int DETBufferSizeY[],
686 unsigned int DETBufferSizeC[],
687 bool ViewportSizeSupportPerPlane[],
688 bool *ViewportSizeSupport);
689 static void CalculateSwathWidth(
691 int NumberOfActivePlanes,
692 enum source_format_class SourcePixelFormat[],
693 enum scan_direction_class SourceScan[],
695 int ViewportHeight[],
698 int SurfaceHeightY[],
699 int SurfaceHeightC[],
700 enum odm_combine_mode ODMCombineEnabled[],
703 int Read256BytesBlockHeightY[],
704 int Read256BytesBlockHeightC[],
705 int Read256BytesBlockWidthY[],
706 int Read256BytesBlockWidthC[],
707 int BlendingAndTiming[],
711 double SwathWidthSingleDPPY[],
712 double SwathWidthSingleDPPC[],
713 double SwathWidthY[],
714 double SwathWidthC[],
715 int MaximumSwathHeightY[],
716 int MaximumSwathHeightC[],
717 int swath_width_luma_ub[],
718 int swath_width_chroma_ub[]);
720 static double CalculateExtraLatency(
721 int RoundTripPingLatencyCycles,
724 int TotalNumberOfActiveDPP,
725 int PixelChunkSizeInKByte,
726 int TotalNumberOfDCCActiveDPP,
731 int NumberOfActivePlanes,
733 int dpte_group_bytes[],
734 double HostVMInefficiencyFactor,
735 double HostVMMinPageSize,
736 int HostVMMaxNonCachedPageTableLevels);
738 static double CalculateExtraLatencyBytes(
740 int TotalNumberOfActiveDPP,
741 int PixelChunkSizeInKByte,
742 int TotalNumberOfDCCActiveDPP,
746 int NumberOfActivePlanes,
748 int dpte_group_bytes[],
749 double HostVMInefficiencyFactor,
750 double HostVMMinPageSize,
751 int HostVMMaxNonCachedPageTableLevels);
753 static double CalculateUrgentLatency(
754 double UrgentLatencyPixelDataOnly,
755 double UrgentLatencyPixelMixedWithVMData,
756 double UrgentLatencyVMDataOnly,
757 bool DoUrgentLatencyAdjustment,
758 double UrgentLatencyAdjustmentFabricClockComponent,
759 double UrgentLatencyAdjustmentFabricClockReference,
760 double FabricClockSingle);
762 static void CalculateUnboundedRequestAndCompressedBufferSize(
763 unsigned int DETBufferSizeInKByte,
764 int ConfigReturnBufferSizeInKByte,
765 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
769 int CompressedBufferSegmentSizeInkByteFinal,
770 enum output_encoder_class *Output,
771 bool *UnboundedRequestEnabled,
772 int *CompressedBufferSizeInkByte);
774 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
776 void dml31_recalculate(struct display_mode_lib *mode_lib)
778 ModeSupportAndSystemConfiguration(mode_lib);
779 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
780 DisplayPipeConfiguration(mode_lib);
781 #ifdef __DML_VBA_DEBUG__
782 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
784 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
787 static unsigned int dscceComputeDelay(
790 unsigned int sliceWidth,
791 unsigned int numSlices,
792 enum output_format_class pixelFormat,
793 enum output_encoder_class Output)
795 // valid bpc = source bits per component in the set of {8, 10, 12}
796 // valid bpp = increments of 1/16 of a bit
797 // min = 6/7/8 in N420/N422/444, respectively
798 // max = such that compression is 1:1
799 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
800 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
801 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
804 unsigned int rcModelSize = 8192;
806 // N422/N420 operate at 2 pixels per clock
807 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
809 if (pixelFormat == dm_420)
811 else if (pixelFormat == dm_444)
813 else if (pixelFormat == dm_n422)
815 // #all other modes operate at 1 pixel per clock
819 //initial transmit delay as per PPS
820 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
830 //divide by pixel per cycle to compute slice width as seen by DSC
831 w = sliceWidth / pixelsPerClock;
833 //422 mode has an additional cycle of delay
834 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
839 //main calculation for the dscce
840 ix = initalXmitDelay + 45;
845 ax = (a + 2) / 3 + D + 6 + 1;
846 L = (ax + wx - 1) / wx;
847 if ((ix % w) == 0 && P != 0)
851 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
853 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
854 pixels = Delay * 3 * pixelsPerClock;
858 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
860 unsigned int Delay = 0;
862 if (pixelFormat == dm_420) {
867 // dscc - input deserializer
869 // dscc gets pixels every other cycle
871 // dscc - input cdc fifo
873 // dscc gets pixels every other cycle
875 // dscc - cdc uncertainty
877 // dscc - output cdc fifo
879 // dscc gets pixels every other cycle
881 // dscc - cdc uncertainty
883 // dscc - output serializer
887 } else if (pixelFormat == dm_n422) {
892 // dscc - input deserializer
894 // dscc - input cdc fifo
896 // dscc - cdc uncertainty
898 // dscc - output cdc fifo
900 // dscc - cdc uncertainty
902 // dscc - output serializer
911 // dscc - input deserializer
913 // dscc - input cdc fifo
915 // dscc - cdc uncertainty
917 // dscc - output cdc fifo
919 // dscc - output serializer
921 // dscc - cdc uncertainty
930 static bool CalculatePrefetchSchedule(
931 struct display_mode_lib *mode_lib,
932 double HostVMInefficiencyFactor,
934 unsigned int DSCDelay,
935 double DPPCLKDelaySubtotalPlusCNVCFormater,
936 double DPPCLKDelaySCL,
937 double DPPCLKDelaySCLLBOnly,
938 double DPPCLKDelayCNVCCursor,
939 double DISPCLKDelaySubtotal,
940 unsigned int DPP_RECOUT_WIDTH,
941 enum output_format_class OutputFormat,
942 unsigned int MaxInterDCNTileRepeaters,
943 unsigned int VStartup,
944 unsigned int MaxVStartup,
945 unsigned int GPUVMPageTableLevels,
948 unsigned int HostVMMaxNonCachedPageTableLevels,
949 double HostVMMinPageSize,
950 bool DynamicMetadataEnable,
951 bool DynamicMetadataVMEnabled,
952 int DynamicMetadataLinesBeforeActiveRequired,
953 unsigned int DynamicMetadataTransmittedBytes,
954 double UrgentLatency,
955 double UrgentExtraLatency,
957 unsigned int PDEAndMetaPTEBytesFrame,
958 unsigned int MetaRowByte,
959 unsigned int PixelPTEBytesPerRow,
960 double PrefetchSourceLinesY,
961 unsigned int SwathWidthY,
962 double VInitPreFillY,
963 unsigned int MaxNumSwathY,
964 double PrefetchSourceLinesC,
965 unsigned int SwathWidthC,
966 double VInitPreFillC,
967 unsigned int MaxNumSwathC,
968 int swath_width_luma_ub,
969 int swath_width_chroma_ub,
970 unsigned int SwathHeightY,
971 unsigned int SwathHeightC,
973 double *DSTXAfterScaler,
974 double *DSTYAfterScaler,
975 double *DestinationLinesForPrefetch,
976 double *PrefetchBandwidth,
977 double *DestinationLinesToRequestVMInVBlank,
978 double *DestinationLinesToRequestRowInVBlank,
979 double *VRatioPrefetchY,
980 double *VRatioPrefetchC,
981 double *RequiredPrefetchPixDataBWLuma,
982 double *RequiredPrefetchPixDataBWChroma,
983 bool *NotEnoughTimeForDynamicMetadata,
985 double *prefetch_vmrow_bw,
989 int *VUpdateOffsetPix,
990 double *VUpdateWidthPix,
991 double *VReadyOffsetPix)
993 bool MyError = false;
994 unsigned int DPPCycles, DISPCLKCycles;
995 double DSTTotalPixelsAfterScaler;
997 double dst_y_prefetch_equ;
999 double prefetch_bw_oto;
1002 double Tvm_oto_lines;
1003 double Tr0_oto_lines;
1004 double dst_y_prefetch_oto;
1005 double TimeForFetchingMetaPTE = 0;
1006 double TimeForFetchingRowInVBlank = 0;
1007 double LinesToRequestPrefetchPixelData = 0;
1008 unsigned int HostVMDynamicLevelsTrips;
1012 double Tvm_trips_rounded;
1013 double Tr0_trips_rounded;
1015 double Tpre_rounded;
1016 double prefetch_bw_equ;
1022 double prefetch_sw_bytes;
1025 int max_vratio_pre = 4;
1027 double Tsw_est1 = 0;
1028 double Tsw_est3 = 0;
1030 if (GPUVMEnable == true && HostVMEnable == true) {
1031 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
1033 HostVMDynamicLevelsTrips = 0;
1035 #ifdef __DML_VBA_DEBUG__
1036 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
1038 CalculateVupdateAndDynamicMetadataParameters(
1039 MaxInterDCNTileRepeaters,
1042 myPipe->DCFCLKDeepSleep,
1046 DynamicMetadataTransmittedBytes,
1047 DynamicMetadataLinesBeforeActiveRequired,
1048 myPipe->InterlaceEnable,
1049 myPipe->ProgressiveToInterlaceUnitInOPP,
1058 LineTime = myPipe->HTotal / myPipe->PixelClock;
1059 trip_to_mem = UrgentLatency;
1060 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
1062 #ifdef __DML_VBA_ALLOW_DELTA__
1063 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
1065 if (DynamicMetadataVMEnabled == true) {
1067 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
1069 *Tdmdl = TWait + UrgentExtraLatency;
1072 #ifdef __DML_VBA_ALLOW_DELTA__
1073 if (DynamicMetadataEnable == false) {
1078 if (DynamicMetadataEnable == true) {
1079 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1080 *NotEnoughTimeForDynamicMetadata = true;
1081 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
1082 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
1083 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
1084 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
1085 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
1087 *NotEnoughTimeForDynamicMetadata = false;
1090 *NotEnoughTimeForDynamicMetadata = false;
1093 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1095 if (myPipe->ScalerEnabled)
1096 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1098 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1100 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1102 DISPCLKCycles = DISPCLKDelaySubtotal;
1104 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1107 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1109 #ifdef __DML_VBA_DEBUG__
1110 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1111 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1112 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1113 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1114 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1115 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1116 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1117 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1120 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1122 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1123 *DSTYAfterScaler = 1;
1125 *DSTYAfterScaler = 0;
1127 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1128 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1129 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1131 #ifdef __DML_VBA_DEBUG__
1132 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1137 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1138 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1139 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1141 #ifdef __DML_VBA_ALLOW_DELTA__
1142 if (!myPipe->DCCEnable) {
1144 Tr0_trips_rounded = 0.0;
1150 Tvm_trips_rounded = 0.0;
1154 if (GPUVMPageTableLevels >= 3) {
1155 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1159 } else if (!myPipe->DCCEnable) {
1162 *Tno_bw = LineTime / 4;
1165 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1166 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1168 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1170 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1171 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1173 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
1174 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1175 Tsw_oto = Lsw_oto * LineTime;
1177 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1179 #ifdef __DML_VBA_DEBUG__
1180 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1181 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1182 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1183 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1184 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1185 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1188 if (GPUVMEnable == true)
1189 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1191 Tvm_oto = LineTime / 4.0;
1193 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1194 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1198 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1201 #ifdef __DML_VBA_DEBUG__
1202 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1203 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1204 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1205 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1206 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1207 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1208 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1209 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1210 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1213 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1214 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1215 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1216 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1217 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1218 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1220 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1222 if (prefetch_sw_bytes < dep_bytes)
1223 prefetch_sw_bytes = 2 * dep_bytes;
1225 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1226 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1227 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1228 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1229 dml_print("DML: LineTime: %f\n", LineTime);
1230 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1232 dml_print("DML: LineTime: %f\n", LineTime);
1233 dml_print("DML: VStartup: %d\n", VStartup);
1234 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1235 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1236 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1237 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1238 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1239 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1240 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1241 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1242 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1243 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1244 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1246 *PrefetchBandwidth = 0;
1247 *DestinationLinesToRequestVMInVBlank = 0;
1248 *DestinationLinesToRequestRowInVBlank = 0;
1249 *VRatioPrefetchY = 0;
1250 *VRatioPrefetchC = 0;
1251 *RequiredPrefetchPixDataBWLuma = 0;
1252 if (dst_y_prefetch_equ > 1) {
1253 double PrefetchBandwidth1;
1254 double PrefetchBandwidth2;
1255 double PrefetchBandwidth3;
1256 double PrefetchBandwidth4;
1258 if (Tpre_rounded - *Tno_bw > 0) {
1259 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1260 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1261 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1263 PrefetchBandwidth1 = 0;
1266 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1267 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1268 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1271 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1272 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1274 PrefetchBandwidth2 = 0;
1276 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1277 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1278 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1279 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1281 PrefetchBandwidth3 = 0;
1284 #ifdef __DML_VBA_DEBUG__
1285 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1286 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1287 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1289 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1290 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1291 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1294 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1295 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1297 PrefetchBandwidth4 = 0;
1304 if (PrefetchBandwidth1 > 0) {
1305 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1306 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1315 if (PrefetchBandwidth2 > 0) {
1316 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1317 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1326 if (PrefetchBandwidth3 > 0) {
1327 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1328 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1338 prefetch_bw_equ = PrefetchBandwidth1;
1339 } else if (Case2OK) {
1340 prefetch_bw_equ = PrefetchBandwidth2;
1341 } else if (Case3OK) {
1342 prefetch_bw_equ = PrefetchBandwidth3;
1344 prefetch_bw_equ = PrefetchBandwidth4;
1347 #ifdef __DML_VBA_DEBUG__
1348 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1349 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1350 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1351 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1354 if (prefetch_bw_equ > 0) {
1355 if (GPUVMEnable == true) {
1356 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1358 Tvm_equ = LineTime / 4;
1361 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1363 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1365 (LineTime - Tvm_equ) / 2,
1368 Tr0_equ = (LineTime - Tvm_equ) / 2;
1373 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1377 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1378 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1379 TimeForFetchingMetaPTE = Tvm_oto;
1380 TimeForFetchingRowInVBlank = Tr0_oto;
1381 *PrefetchBandwidth = prefetch_bw_oto;
1383 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1384 TimeForFetchingMetaPTE = Tvm_equ;
1385 TimeForFetchingRowInVBlank = Tr0_equ;
1386 *PrefetchBandwidth = prefetch_bw_equ;
1389 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1391 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1393 #ifdef __DML_VBA_ALLOW_DELTA__
1394 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1395 // See note above dated 5/30/2018
1396 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1397 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1399 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1402 #ifdef __DML_VBA_DEBUG__
1403 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1404 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1405 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1406 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1407 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1408 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1409 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1412 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1414 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1415 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1416 #ifdef __DML_VBA_DEBUG__
1417 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1418 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1419 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1421 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1422 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1423 *VRatioPrefetchY = dml_max(
1424 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1425 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1426 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1429 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1430 *VRatioPrefetchY = 0;
1432 #ifdef __DML_VBA_DEBUG__
1433 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1434 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1435 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1439 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1440 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1442 #ifdef __DML_VBA_DEBUG__
1443 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1444 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1445 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1447 if ((SwathHeightC > 4)) {
1448 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1449 *VRatioPrefetchC = dml_max(
1451 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1452 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1455 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1456 *VRatioPrefetchC = 0;
1458 #ifdef __DML_VBA_DEBUG__
1459 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1460 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1461 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1465 #ifdef __DML_VBA_DEBUG__
1466 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1467 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1468 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1471 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1473 #ifdef __DML_VBA_DEBUG__
1474 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1477 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1481 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1482 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1483 *VRatioPrefetchY = 0;
1484 *VRatioPrefetchC = 0;
1485 *RequiredPrefetchPixDataBWLuma = 0;
1486 *RequiredPrefetchPixDataBWChroma = 0;
1490 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1491 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1492 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1493 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1495 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1496 (double) LinesToRequestPrefetchPixelData * LineTime);
1497 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1498 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1499 (double) myPipe->HTotal)) * LineTime);
1500 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1501 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1502 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1503 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1504 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1508 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1512 double prefetch_vm_bw;
1513 double prefetch_row_bw;
1515 if (PDEAndMetaPTEBytesFrame == 0) {
1517 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1518 #ifdef __DML_VBA_DEBUG__
1519 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1520 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1521 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1522 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1524 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1525 #ifdef __DML_VBA_DEBUG__
1526 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1531 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1534 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1535 prefetch_row_bw = 0;
1536 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1537 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1539 #ifdef __DML_VBA_DEBUG__
1540 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1541 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1542 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1543 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1546 prefetch_row_bw = 0;
1548 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1551 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1555 *PrefetchBandwidth = 0;
1556 TimeForFetchingMetaPTE = 0;
1557 TimeForFetchingRowInVBlank = 0;
1558 *DestinationLinesToRequestVMInVBlank = 0;
1559 *DestinationLinesToRequestRowInVBlank = 0;
1560 *DestinationLinesForPrefetch = 0;
1561 LinesToRequestPrefetchPixelData = 0;
1562 *VRatioPrefetchY = 0;
1563 *VRatioPrefetchC = 0;
1564 *RequiredPrefetchPixDataBWLuma = 0;
1565 *RequiredPrefetchPixDataBWChroma = 0;
1571 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1573 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1576 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1578 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1581 static void CalculateDCCConfiguration(
1583 bool DCCProgrammingAssumesScanDirectionUnknown,
1584 enum source_format_class SourcePixelFormat,
1585 unsigned int SurfaceWidthLuma,
1586 unsigned int SurfaceWidthChroma,
1587 unsigned int SurfaceHeightLuma,
1588 unsigned int SurfaceHeightChroma,
1589 double DETBufferSize,
1590 unsigned int RequestHeight256ByteLuma,
1591 unsigned int RequestHeight256ByteChroma,
1592 enum dm_swizzle_mode TilingFormat,
1593 unsigned int BytePerPixelY,
1594 unsigned int BytePerPixelC,
1595 double BytePerPixelDETY,
1596 double BytePerPixelDETC,
1597 enum scan_direction_class ScanOrientation,
1598 unsigned int *MaxUncompressedBlockLuma,
1599 unsigned int *MaxUncompressedBlockChroma,
1600 unsigned int *MaxCompressedBlockLuma,
1601 unsigned int *MaxCompressedBlockChroma,
1602 unsigned int *IndependentBlockLuma,
1603 unsigned int *IndependentBlockChroma)
1612 double detile_buf_vp_horz_limit;
1613 double detile_buf_vp_vert_limit;
1615 int MAS_vp_horz_limit;
1616 int MAS_vp_vert_limit;
1617 int max_vp_horz_width;
1618 int max_vp_vert_height;
1619 int eff_surf_width_l;
1620 int eff_surf_width_c;
1621 int eff_surf_height_l;
1622 int eff_surf_height_c;
1624 int full_swath_bytes_horz_wc_l;
1625 int full_swath_bytes_horz_wc_c;
1626 int full_swath_bytes_vert_wc_l;
1627 int full_swath_bytes_vert_wc_c;
1628 int req128_horz_wc_l;
1629 int req128_horz_wc_c;
1630 int req128_vert_wc_l;
1631 int req128_vert_wc_c;
1632 int segment_order_horz_contiguous_luma;
1633 int segment_order_horz_contiguous_chroma;
1634 int segment_order_vert_contiguous_luma;
1635 int segment_order_vert_contiguous_chroma;
1638 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1640 RequestType RequestLuma;
1641 RequestType RequestChroma;
1643 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1649 if (BytePerPixelY == 1)
1651 if (BytePerPixelC == 1)
1653 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1655 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1658 if (BytePerPixelC == 0) {
1659 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1660 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1661 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1663 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1664 detile_buf_vp_horz_limit = (double) swath_buf_size
1665 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1666 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1667 detile_buf_vp_vert_limit = (double) swath_buf_size
1668 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1671 if (SourcePixelFormat == dm_420_10) {
1672 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1673 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1676 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1677 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1679 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1680 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1681 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1682 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1683 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1684 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1685 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1686 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1688 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1689 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1690 if (BytePerPixelC > 0) {
1691 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1692 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1694 full_swath_bytes_horz_wc_c = 0;
1695 full_swath_bytes_vert_wc_c = 0;
1698 if (SourcePixelFormat == dm_420_10) {
1699 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1700 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1701 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1702 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1705 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1706 req128_horz_wc_l = 0;
1707 req128_horz_wc_c = 0;
1708 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1709 req128_horz_wc_l = 0;
1710 req128_horz_wc_c = 1;
1711 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1712 req128_horz_wc_l = 1;
1713 req128_horz_wc_c = 0;
1715 req128_horz_wc_l = 1;
1716 req128_horz_wc_c = 1;
1719 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1720 req128_vert_wc_l = 0;
1721 req128_vert_wc_c = 0;
1722 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1723 req128_vert_wc_l = 0;
1724 req128_vert_wc_c = 1;
1725 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1726 req128_vert_wc_l = 1;
1727 req128_vert_wc_c = 0;
1729 req128_vert_wc_l = 1;
1730 req128_vert_wc_c = 1;
1733 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1734 segment_order_horz_contiguous_luma = 0;
1736 segment_order_horz_contiguous_luma = 1;
1738 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1739 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1740 segment_order_vert_contiguous_luma = 0;
1742 segment_order_vert_contiguous_luma = 1;
1744 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1745 segment_order_horz_contiguous_chroma = 0;
1747 segment_order_horz_contiguous_chroma = 1;
1749 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1750 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1751 segment_order_vert_contiguous_chroma = 0;
1753 segment_order_vert_contiguous_chroma = 1;
1756 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1757 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1758 RequestLuma = REQ_256Bytes;
1759 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1760 RequestLuma = REQ_128BytesNonContiguous;
1762 RequestLuma = REQ_128BytesContiguous;
1764 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1765 RequestChroma = REQ_256Bytes;
1766 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1767 RequestChroma = REQ_128BytesNonContiguous;
1769 RequestChroma = REQ_128BytesContiguous;
1771 } else if (ScanOrientation != dm_vert) {
1772 if (req128_horz_wc_l == 0) {
1773 RequestLuma = REQ_256Bytes;
1774 } else if (segment_order_horz_contiguous_luma == 0) {
1775 RequestLuma = REQ_128BytesNonContiguous;
1777 RequestLuma = REQ_128BytesContiguous;
1779 if (req128_horz_wc_c == 0) {
1780 RequestChroma = REQ_256Bytes;
1781 } else if (segment_order_horz_contiguous_chroma == 0) {
1782 RequestChroma = REQ_128BytesNonContiguous;
1784 RequestChroma = REQ_128BytesContiguous;
1787 if (req128_vert_wc_l == 0) {
1788 RequestLuma = REQ_256Bytes;
1789 } else if (segment_order_vert_contiguous_luma == 0) {
1790 RequestLuma = REQ_128BytesNonContiguous;
1792 RequestLuma = REQ_128BytesContiguous;
1794 if (req128_vert_wc_c == 0) {
1795 RequestChroma = REQ_256Bytes;
1796 } else if (segment_order_vert_contiguous_chroma == 0) {
1797 RequestChroma = REQ_128BytesNonContiguous;
1799 RequestChroma = REQ_128BytesContiguous;
1803 if (RequestLuma == REQ_256Bytes) {
1804 *MaxUncompressedBlockLuma = 256;
1805 *MaxCompressedBlockLuma = 256;
1806 *IndependentBlockLuma = 0;
1807 } else if (RequestLuma == REQ_128BytesContiguous) {
1808 *MaxUncompressedBlockLuma = 256;
1809 *MaxCompressedBlockLuma = 128;
1810 *IndependentBlockLuma = 128;
1812 *MaxUncompressedBlockLuma = 256;
1813 *MaxCompressedBlockLuma = 64;
1814 *IndependentBlockLuma = 64;
1817 if (RequestChroma == REQ_256Bytes) {
1818 *MaxUncompressedBlockChroma = 256;
1819 *MaxCompressedBlockChroma = 256;
1820 *IndependentBlockChroma = 0;
1821 } else if (RequestChroma == REQ_128BytesContiguous) {
1822 *MaxUncompressedBlockChroma = 256;
1823 *MaxCompressedBlockChroma = 128;
1824 *IndependentBlockChroma = 128;
1826 *MaxUncompressedBlockChroma = 256;
1827 *MaxCompressedBlockChroma = 64;
1828 *IndependentBlockChroma = 64;
1831 if (DCCEnabled != true || BytePerPixelC == 0) {
1832 *MaxUncompressedBlockChroma = 0;
1833 *MaxCompressedBlockChroma = 0;
1834 *IndependentBlockChroma = 0;
1837 if (DCCEnabled != true) {
1838 *MaxUncompressedBlockLuma = 0;
1839 *MaxCompressedBlockLuma = 0;
1840 *IndependentBlockLuma = 0;
1844 static double CalculatePrefetchSourceLines(
1845 struct display_mode_lib *mode_lib,
1849 bool ProgressiveToInterlaceUnitInOPP,
1850 unsigned int SwathHeight,
1851 unsigned int ViewportYStart,
1852 double *VInitPreFill,
1853 unsigned int *MaxNumSwath)
1855 struct vba_vars_st *v = &mode_lib->vba;
1856 unsigned int MaxPartialSwath;
1858 if (ProgressiveToInterlaceUnitInOPP)
1859 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1861 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1863 if (!v->IgnoreViewportPositioning) {
1865 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1867 if (*VInitPreFill > 1.0)
1868 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1870 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1871 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1875 if (ViewportYStart != 0)
1876 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1878 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1880 if (*VInitPreFill > 1.0)
1881 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1883 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1886 #ifdef __DML_VBA_DEBUG__
1887 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1888 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1889 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1890 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1891 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1892 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1893 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1894 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1895 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1897 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1900 static unsigned int CalculateVMAndRowBytes(
1901 struct display_mode_lib *mode_lib,
1903 unsigned int BlockHeight256Bytes,
1904 unsigned int BlockWidth256Bytes,
1905 enum source_format_class SourcePixelFormat,
1906 unsigned int SurfaceTiling,
1907 unsigned int BytePerPixel,
1908 enum scan_direction_class ScanDirection,
1909 unsigned int SwathWidth,
1910 unsigned int ViewportHeight,
1913 unsigned int HostVMMaxNonCachedPageTableLevels,
1914 unsigned int GPUVMMinPageSize,
1915 unsigned int HostVMMinPageSize,
1916 unsigned int PTEBufferSizeInRequests,
1918 unsigned int DCCMetaPitch,
1919 unsigned int *MacroTileWidth,
1920 unsigned int *MetaRowByte,
1921 unsigned int *PixelPTEBytesPerRow,
1922 bool *PTEBufferSizeNotExceeded,
1923 int *dpte_row_width_ub,
1924 unsigned int *dpte_row_height,
1925 unsigned int *MetaRequestWidth,
1926 unsigned int *MetaRequestHeight,
1927 unsigned int *meta_row_width,
1928 unsigned int *meta_row_height,
1929 int *vm_group_bytes,
1930 unsigned int *dpte_group_bytes,
1931 unsigned int *PixelPTEReqWidth,
1932 unsigned int *PixelPTEReqHeight,
1933 unsigned int *PTERequestSize,
1934 int *DPDE0BytesFrame,
1935 int *MetaPTEBytesFrame)
1937 struct vba_vars_st *v = &mode_lib->vba;
1938 unsigned int MPDEBytesFrame;
1939 unsigned int DCCMetaSurfaceBytes;
1940 unsigned int MacroTileSizeBytes;
1941 unsigned int MacroTileHeight;
1942 unsigned int ExtraDPDEBytesFrame;
1943 unsigned int PDEAndMetaPTEBytesFrame;
1944 unsigned int PixelPTEReqHeightPTEs = 0;
1945 unsigned int HostVMDynamicLevels = 0;
1946 double FractionOfPTEReturnDrop;
1948 if (GPUVMEnable == true && HostVMEnable == true) {
1949 if (HostVMMinPageSize < 2048) {
1950 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1951 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1952 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1954 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1958 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1959 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1960 if (ScanDirection != dm_vert) {
1961 *meta_row_height = *MetaRequestHeight;
1962 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1963 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1965 *meta_row_height = *MetaRequestWidth;
1966 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1967 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1969 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1970 if (GPUVMEnable == true) {
1971 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1972 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1974 *MetaPTEBytesFrame = 0;
1978 if (DCCEnable != true) {
1979 *MetaPTEBytesFrame = 0;
1984 if (SurfaceTiling == dm_sw_linear) {
1985 MacroTileSizeBytes = 256;
1986 MacroTileHeight = BlockHeight256Bytes;
1988 MacroTileSizeBytes = 65536;
1989 MacroTileHeight = 16 * BlockHeight256Bytes;
1991 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1993 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1994 if (ScanDirection != dm_vert) {
1995 *DPDE0BytesFrame = 64
1997 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
2001 *DPDE0BytesFrame = 64
2003 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
2007 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
2009 *DPDE0BytesFrame = 0;
2010 ExtraDPDEBytesFrame = 0;
2013 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2015 #ifdef __DML_VBA_DEBUG__
2016 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2017 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2018 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2019 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2020 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2023 if (HostVMEnable == true) {
2024 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2026 #ifdef __DML_VBA_DEBUG__
2027 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2030 if (SurfaceTiling == dm_sw_linear) {
2031 PixelPTEReqHeightPTEs = 1;
2032 *PixelPTEReqHeight = 1;
2033 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
2034 *PTERequestSize = 64;
2035 FractionOfPTEReturnDrop = 0;
2036 } else if (MacroTileSizeBytes == 4096) {
2037 PixelPTEReqHeightPTEs = 1;
2038 *PixelPTEReqHeight = MacroTileHeight;
2039 *PixelPTEReqWidth = 8 * *MacroTileWidth;
2040 *PTERequestSize = 64;
2041 if (ScanDirection != dm_vert)
2042 FractionOfPTEReturnDrop = 0;
2044 FractionOfPTEReturnDrop = 7 / 8;
2045 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
2046 PixelPTEReqHeightPTEs = 16;
2047 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2048 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2049 *PTERequestSize = 128;
2050 FractionOfPTEReturnDrop = 0;
2052 PixelPTEReqHeightPTEs = 1;
2053 *PixelPTEReqHeight = MacroTileHeight;
2054 *PixelPTEReqWidth = 8 * *MacroTileWidth;
2055 *PTERequestSize = 64;
2056 FractionOfPTEReturnDrop = 0;
2059 if (SurfaceTiling == dm_sw_linear) {
2060 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2061 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2062 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2063 } else if (ScanDirection != dm_vert) {
2064 *dpte_row_height = *PixelPTEReqHeight;
2065 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2066 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2068 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
2069 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
2070 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2073 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
2074 *PTEBufferSizeNotExceeded = true;
2076 *PTEBufferSizeNotExceeded = false;
2079 if (GPUVMEnable != true) {
2080 *PixelPTEBytesPerRow = 0;
2081 *PTEBufferSizeNotExceeded = true;
2084 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
2086 if (HostVMEnable == true) {
2087 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2090 if (HostVMEnable == true) {
2091 *vm_group_bytes = 512;
2092 *dpte_group_bytes = 512;
2093 } else if (GPUVMEnable == true) {
2094 *vm_group_bytes = 2048;
2095 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2096 *dpte_group_bytes = 512;
2098 *dpte_group_bytes = 2048;
2101 *vm_group_bytes = 0;
2102 *dpte_group_bytes = 0;
2104 return PDEAndMetaPTEBytesFrame;
2107 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2109 struct vba_vars_st *v = &mode_lib->vba;
2111 double HostVMInefficiencyFactor = 1.0;
2112 bool NoChromaPlanes = true;
2114 double VMDataOnlyReturnBW;
2115 double MaxTotalRDBandwidth = 0;
2116 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2118 v->WritebackDISPCLK = 0.0;
2119 v->DISPCLKWithRamping = 0;
2120 v->DISPCLKWithoutRamping = 0;
2121 v->GlobalDPPCLK = 0.0;
2122 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2124 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2125 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2126 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2127 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2128 if (v->HostVMEnable != true) {
2129 v->ReturnBW = dml_min(
2130 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2131 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2133 v->ReturnBW = dml_min(
2134 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2135 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2138 /* End DAL custom code */
2140 // DISPCLK and DPPCLK Calculation
2142 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2143 if (v->WritebackEnable[k]) {
2144 v->WritebackDISPCLK = dml_max(
2145 v->WritebackDISPCLK,
2146 dml31_CalculateWriteBackDISPCLK(
2147 v->WritebackPixelFormat[k],
2149 v->WritebackHRatio[k],
2150 v->WritebackVRatio[k],
2151 v->WritebackHTaps[k],
2152 v->WritebackVTaps[k],
2153 v->WritebackSourceWidth[k],
2154 v->WritebackDestinationWidth[k],
2156 v->WritebackLineBufferSize));
2160 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2161 if (v->HRatio[k] > 1) {
2162 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2163 v->MaxDCHUBToPSCLThroughput,
2164 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2166 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2169 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2171 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2172 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2174 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2175 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2178 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2179 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2180 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2181 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2183 if (v->HRatioChroma[k] > 1) {
2184 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2185 v->MaxDCHUBToPSCLThroughput,
2186 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2188 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2190 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2192 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2193 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2196 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2197 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2200 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2204 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2205 if (v->BlendingAndTiming[k] != k)
2207 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2208 v->DISPCLKWithRamping = dml_max(
2209 v->DISPCLKWithRamping,
2210 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2211 * (1 + v->DISPCLKRampingMargin / 100));
2212 v->DISPCLKWithoutRamping = dml_max(
2213 v->DISPCLKWithoutRamping,
2214 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2215 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2216 v->DISPCLKWithRamping = dml_max(
2217 v->DISPCLKWithRamping,
2218 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2219 * (1 + v->DISPCLKRampingMargin / 100));
2220 v->DISPCLKWithoutRamping = dml_max(
2221 v->DISPCLKWithoutRamping,
2222 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2224 v->DISPCLKWithRamping = dml_max(
2225 v->DISPCLKWithRamping,
2226 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2227 v->DISPCLKWithoutRamping = dml_max(
2228 v->DISPCLKWithoutRamping,
2229 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2233 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2234 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2236 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2237 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2238 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2239 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2240 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2241 v->DISPCLKDPPCLKVCOSpeed);
2242 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2243 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2244 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2245 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2247 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2249 v->DISPCLK = v->DISPCLK_calculated;
2250 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2252 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2253 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2254 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2256 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2257 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2258 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2259 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2262 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2263 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2266 // Urgent and B P-State/DRAM Clock Change Watermark
2267 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2268 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2270 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2271 CalculateBytePerPixelAnd256BBlockSizes(
2272 v->SourcePixelFormat[k],
2273 v->SurfaceTiling[k],
2274 &v->BytePerPixelY[k],
2275 &v->BytePerPixelC[k],
2276 &v->BytePerPixelDETY[k],
2277 &v->BytePerPixelDETC[k],
2278 &v->BlockHeight256BytesY[k],
2279 &v->BlockHeight256BytesC[k],
2280 &v->BlockWidth256BytesY[k],
2281 &v->BlockWidth256BytesC[k]);
2284 CalculateSwathWidth(
2286 v->NumberOfActivePlanes,
2287 v->SourcePixelFormat,
2295 v->ODMCombineEnabled,
2298 v->BlockHeight256BytesY,
2299 v->BlockHeight256BytesC,
2300 v->BlockWidth256BytesY,
2301 v->BlockWidth256BytesC,
2302 v->BlendingAndTiming,
2306 v->SwathWidthSingleDPPY,
2307 v->SwathWidthSingleDPPC,
2312 v->swath_width_luma_ub,
2313 v->swath_width_chroma_ub);
2315 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2316 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2318 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2319 * v->VRatioChroma[k];
2320 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2323 // DCFCLK Deep Sleep
2324 CalculateDCFCLKDeepSleep(
2326 v->NumberOfActivePlanes,
2337 v->PSCL_THROUGHPUT_LUMA,
2338 v->PSCL_THROUGHPUT_CHROMA,
2340 v->ReadBandwidthPlaneLuma,
2341 v->ReadBandwidthPlaneChroma,
2343 &v->DCFCLKDeepSleep);
2346 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2347 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2348 v->DSCCLK_calculated[k] = 0.0;
2350 if (v->OutputFormat[k] == dm_420)
2351 v->DSCFormatFactor = 2;
2352 else if (v->OutputFormat[k] == dm_444)
2353 v->DSCFormatFactor = 1;
2354 else if (v->OutputFormat[k] == dm_n422)
2355 v->DSCFormatFactor = 2;
2357 v->DSCFormatFactor = 1;
2358 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2359 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2360 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2361 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2362 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2363 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2365 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2366 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2371 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2372 double BPP = v->OutputBpp[k];
2374 if (v->DSCEnabled[k] && BPP != 0) {
2375 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2376 v->DSCDelay[k] = dscceComputeDelay(
2377 v->DSCInputBitPerComponent[k],
2379 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2380 v->NumberOfDSCSlices[k],
2382 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2383 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2385 * (dscceComputeDelay(
2386 v->DSCInputBitPerComponent[k],
2388 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2389 v->NumberOfDSCSlices[k] / 2.0,
2391 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2394 * (dscceComputeDelay(
2395 v->DSCInputBitPerComponent[k],
2397 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2398 v->NumberOfDSCSlices[k] / 4.0,
2400 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2402 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2408 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2409 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2410 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2411 v->DSCDelay[k] = v->DSCDelay[j];
2414 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2415 unsigned int PDEAndMetaPTEBytesFrameY;
2416 unsigned int PixelPTEBytesPerRowY;
2417 unsigned int MetaRowByteY;
2418 unsigned int MetaRowByteC;
2419 unsigned int PDEAndMetaPTEBytesFrameC;
2420 unsigned int PixelPTEBytesPerRowC;
2421 bool PTEBufferSizeNotExceededY;
2422 bool PTEBufferSizeNotExceededC;
2424 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2425 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2426 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2427 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2428 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2430 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2431 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2434 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2437 v->BlockHeight256BytesC[k],
2438 v->BlockWidth256BytesC[k],
2439 v->SourcePixelFormat[k],
2440 v->SurfaceTiling[k],
2441 v->BytePerPixelC[k],
2444 v->ViewportHeightChroma[k],
2447 v->HostVMMaxNonCachedPageTableLevels,
2448 v->GPUVMMinPageSize,
2449 v->HostVMMinPageSize,
2450 v->PTEBufferSizeInRequestsForChroma,
2452 v->DCCMetaPitchC[k],
2453 &v->MacroTileWidthC[k],
2455 &PixelPTEBytesPerRowC,
2456 &PTEBufferSizeNotExceededC,
2457 &v->dpte_row_width_chroma_ub[k],
2458 &v->dpte_row_height_chroma[k],
2459 &v->meta_req_width_chroma[k],
2460 &v->meta_req_height_chroma[k],
2461 &v->meta_row_width_chroma[k],
2462 &v->meta_row_height_chroma[k],
2465 &v->PixelPTEReqWidthC[k],
2466 &v->PixelPTEReqHeightC[k],
2467 &v->PTERequestSizeC[k],
2468 &v->dpde0_bytes_per_frame_ub_c[k],
2469 &v->meta_pte_bytes_per_frame_ub_c[k]);
2471 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2476 v->ProgressiveToInterlaceUnitInOPP,
2478 v->ViewportYStartC[k],
2479 &v->VInitPreFillC[k],
2480 &v->MaxNumSwathC[k]);
2482 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2483 v->PTEBufferSizeInRequestsForChroma = 0;
2484 PixelPTEBytesPerRowC = 0;
2485 PDEAndMetaPTEBytesFrameC = 0;
2487 v->MaxNumSwathC[k] = 0;
2488 v->PrefetchSourceLinesC[k] = 0;
2491 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2494 v->BlockHeight256BytesY[k],
2495 v->BlockWidth256BytesY[k],
2496 v->SourcePixelFormat[k],
2497 v->SurfaceTiling[k],
2498 v->BytePerPixelY[k],
2501 v->ViewportHeight[k],
2504 v->HostVMMaxNonCachedPageTableLevels,
2505 v->GPUVMMinPageSize,
2506 v->HostVMMinPageSize,
2507 v->PTEBufferSizeInRequestsForLuma,
2509 v->DCCMetaPitchY[k],
2510 &v->MacroTileWidthY[k],
2512 &PixelPTEBytesPerRowY,
2513 &PTEBufferSizeNotExceededY,
2514 &v->dpte_row_width_luma_ub[k],
2515 &v->dpte_row_height[k],
2516 &v->meta_req_width[k],
2517 &v->meta_req_height[k],
2518 &v->meta_row_width[k],
2519 &v->meta_row_height[k],
2520 &v->vm_group_bytes[k],
2521 &v->dpte_group_bytes[k],
2522 &v->PixelPTEReqWidthY[k],
2523 &v->PixelPTEReqHeightY[k],
2524 &v->PTERequestSizeY[k],
2525 &v->dpde0_bytes_per_frame_ub_l[k],
2526 &v->meta_pte_bytes_per_frame_ub_l[k]);
2528 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2533 v->ProgressiveToInterlaceUnitInOPP,
2535 v->ViewportYStartY[k],
2536 &v->VInitPreFillY[k],
2537 &v->MaxNumSwathY[k]);
2538 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2539 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2540 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2542 CalculateRowBandwidth(
2544 v->SourcePixelFormat[k],
2548 v->HTotal[k] / v->PixelClock[k],
2551 v->meta_row_height[k],
2552 v->meta_row_height_chroma[k],
2553 PixelPTEBytesPerRowY,
2554 PixelPTEBytesPerRowC,
2555 v->dpte_row_height[k],
2556 v->dpte_row_height_chroma[k],
2558 &v->dpte_row_bw[k]);
2561 v->TotalDCCActiveDPP = 0;
2562 v->TotalActiveDPP = 0;
2563 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2564 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2565 if (v->DCCEnable[k])
2566 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2567 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2568 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2569 NoChromaPlanes = false;
2572 ReorderBytes = v->NumberOfChannels
2574 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2575 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2576 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2578 VMDataOnlyReturnBW = dml_min(
2579 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2580 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2581 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2582 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2584 #ifdef __DML_VBA_DEBUG__
2585 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2586 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2587 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2588 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2589 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2590 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2591 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2592 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2593 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2594 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2595 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2598 if (v->GPUVMEnable && v->HostVMEnable)
2599 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2601 v->UrgentExtraLatency = CalculateExtraLatency(
2602 v->RoundTripPingLatencyCycles,
2606 v->PixelChunkSizeInKByte,
2607 v->TotalDCCActiveDPP,
2612 v->NumberOfActivePlanes,
2614 v->dpte_group_bytes,
2615 HostVMInefficiencyFactor,
2616 v->HostVMMinPageSize,
2617 v->HostVMMaxNonCachedPageTableLevels);
2619 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2621 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2622 if (v->BlendingAndTiming[k] == k) {
2623 if (v->WritebackEnable[k] == true) {
2624 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2625 + CalculateWriteBackDelay(
2626 v->WritebackPixelFormat[k],
2627 v->WritebackHRatio[k],
2628 v->WritebackVRatio[k],
2629 v->WritebackVTaps[k],
2630 v->WritebackDestinationWidth[k],
2631 v->WritebackDestinationHeight[k],
2632 v->WritebackSourceHeight[k],
2633 v->HTotal[k]) / v->DISPCLK;
2635 v->WritebackDelay[v->VoltageLevel][k] = 0;
2636 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2637 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2638 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2639 v->WritebackDelay[v->VoltageLevel][k],
2641 + CalculateWriteBackDelay(
2642 v->WritebackPixelFormat[j],
2643 v->WritebackHRatio[j],
2644 v->WritebackVRatio[j],
2645 v->WritebackVTaps[j],
2646 v->WritebackDestinationWidth[j],
2647 v->WritebackDestinationHeight[j],
2648 v->WritebackSourceHeight[j],
2649 v->HTotal[k]) / v->DISPCLK);
2655 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2656 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2657 if (v->BlendingAndTiming[k] == j)
2658 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2660 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2661 v->MaxVStartupLines[k] =
2662 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2663 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2664 v->VTotal[k] - v->VActive[k]
2668 (double) v->WritebackDelay[v->VoltageLevel][k]
2669 / (v->HTotal[k] / v->PixelClock[k]),
2672 #ifdef __DML_VBA_DEBUG__
2673 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2674 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2675 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2679 v->MaximumMaxVStartupLines = 0;
2680 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2681 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2684 // We don't really care to iterate between the various prefetch modes
2685 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2687 v->UrgentLatency = CalculateUrgentLatency(
2688 v->UrgentLatencyPixelDataOnly,
2689 v->UrgentLatencyPixelMixedWithVMData,
2690 v->UrgentLatencyVMDataOnly,
2691 v->DoUrgentLatencyAdjustment,
2692 v->UrgentLatencyAdjustmentFabricClockComponent,
2693 v->UrgentLatencyAdjustmentFabricClockReference,
2696 v->FractionOfUrgentBandwidth = 0.0;
2697 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2699 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2702 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2703 bool DestinationLineTimesForPrefetchLessThan2 = false;
2704 bool VRatioPrefetchMoreThan4 = false;
2705 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2706 MaxTotalRDBandwidth = 0;
2708 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2710 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2713 myPipe.DPPCLK = v->DPPCLK[k];
2714 myPipe.DISPCLK = v->DISPCLK;
2715 myPipe.PixelClock = v->PixelClock[k];
2716 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2717 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2718 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2719 myPipe.SourceScan = v->SourceScan[k];
2720 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2721 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2722 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2723 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2724 myPipe.InterlaceEnable = v->Interlace[k];
2725 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2726 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2727 myPipe.HTotal = v->HTotal[k];
2728 myPipe.DCCEnable = v->DCCEnable[k];
2729 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2730 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2731 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2732 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2733 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2734 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2735 v->ErrorResult[k] = CalculatePrefetchSchedule(
2737 HostVMInefficiencyFactor,
2740 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2742 v->DPPCLKDelaySCLLBOnly,
2743 v->DPPCLKDelayCNVCCursor,
2744 v->DISPCLKDelaySubtotal,
2745 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2747 v->MaxInterDCNTileRepeaters,
2748 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2749 v->MaxVStartupLines[k],
2750 v->GPUVMMaxPageTableLevels,
2753 v->HostVMMaxNonCachedPageTableLevels,
2754 v->HostVMMinPageSize,
2755 v->DynamicMetadataEnable[k],
2756 v->DynamicMetadataVMEnabled,
2757 v->DynamicMetadataLinesBeforeActiveRequired[k],
2758 v->DynamicMetadataTransmittedBytes[k],
2760 v->UrgentExtraLatency,
2762 v->PDEAndMetaPTEBytesFrame[k],
2764 v->PixelPTEBytesPerRow[k],
2765 v->PrefetchSourceLinesY[k],
2767 v->VInitPreFillY[k],
2769 v->PrefetchSourceLinesC[k],
2771 v->VInitPreFillC[k],
2773 v->swath_width_luma_ub[k],
2774 v->swath_width_chroma_ub[k],
2778 &v->DSTXAfterScaler[k],
2779 &v->DSTYAfterScaler[k],
2780 &v->DestinationLinesForPrefetch[k],
2781 &v->PrefetchBandwidth[k],
2782 &v->DestinationLinesToRequestVMInVBlank[k],
2783 &v->DestinationLinesToRequestRowInVBlank[k],
2784 &v->VRatioPrefetchY[k],
2785 &v->VRatioPrefetchC[k],
2786 &v->RequiredPrefetchPixDataBWLuma[k],
2787 &v->RequiredPrefetchPixDataBWChroma[k],
2788 &v->NotEnoughTimeForDynamicMetadata[k],
2790 &v->prefetch_vmrow_bw[k],
2794 &v->VUpdateOffsetPix[k],
2795 &v->VUpdateWidthPix[k],
2796 &v->VReadyOffsetPix[k]);
2798 #ifdef __DML_VBA_DEBUG__
2799 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2801 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2804 v->NoEnoughUrgentLatencyHiding = false;
2805 v->NoEnoughUrgentLatencyHidingPre = false;
2807 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2808 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2809 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2810 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2811 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2813 CalculateUrgentBurstFactor(
2814 v->swath_width_luma_ub[k],
2815 v->swath_width_chroma_ub[k],
2818 v->HTotal[k] / v->PixelClock[k],
2820 v->CursorBufferSize,
2821 v->CursorWidth[k][0],
2825 v->BytePerPixelDETY[k],
2826 v->BytePerPixelDETC[k],
2827 v->DETBufferSizeY[k],
2828 v->DETBufferSizeC[k],
2829 &v->UrgBurstFactorCursor[k],
2830 &v->UrgBurstFactorLuma[k],
2831 &v->UrgBurstFactorChroma[k],
2832 &v->NoUrgentLatencyHiding[k]);
2834 CalculateUrgentBurstFactor(
2835 v->swath_width_luma_ub[k],
2836 v->swath_width_chroma_ub[k],
2839 v->HTotal[k] / v->PixelClock[k],
2841 v->CursorBufferSize,
2842 v->CursorWidth[k][0],
2844 v->VRatioPrefetchY[k],
2845 v->VRatioPrefetchC[k],
2846 v->BytePerPixelDETY[k],
2847 v->BytePerPixelDETC[k],
2848 v->DETBufferSizeY[k],
2849 v->DETBufferSizeC[k],
2850 &v->UrgBurstFactorCursorPre[k],
2851 &v->UrgBurstFactorLumaPre[k],
2852 &v->UrgBurstFactorChromaPre[k],
2853 &v->NoUrgentLatencyHidingPre[k]);
2855 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2857 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2858 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2859 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2860 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2861 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2863 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2864 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2865 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2867 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2869 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2870 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2871 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2872 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2873 + v->cursor_bw_pre[k]);
2875 #ifdef __DML_VBA_DEBUG__
2876 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2877 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2878 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2879 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2880 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2882 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2883 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2885 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2886 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2887 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2888 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2889 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2890 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2891 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2892 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2893 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2894 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2897 if (v->DestinationLinesForPrefetch[k] < 2)
2898 DestinationLineTimesForPrefetchLessThan2 = true;
2900 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2901 VRatioPrefetchMoreThan4 = true;
2903 if (v->NoUrgentLatencyHiding[k] == true)
2904 v->NoEnoughUrgentLatencyHiding = true;
2906 if (v->NoUrgentLatencyHidingPre[k] == true)
2907 v->NoEnoughUrgentLatencyHidingPre = true;
2910 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2912 #ifdef __DML_VBA_DEBUG__
2913 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2914 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2915 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2918 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2919 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2920 v->PrefetchModeSupported = true;
2922 v->PrefetchModeSupported = false;
2923 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2924 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2925 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2926 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2930 // This error result check was done after the PrefetchModeSupported. So we will
2931 // still try to calculate flip schedule even prefetch mode not supported
2932 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2933 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2934 v->PrefetchModeSupported = false;
2935 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2939 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2940 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2941 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2942 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2944 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2945 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2946 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2948 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2949 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2950 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2953 v->TotImmediateFlipBytes = 0;
2954 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2955 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2956 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2958 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2959 CalculateFlipSchedule(
2961 HostVMInefficiencyFactor,
2962 v->UrgentExtraLatency,
2964 v->GPUVMMaxPageTableLevels,
2966 v->HostVMMaxNonCachedPageTableLevels,
2968 v->HostVMMinPageSize,
2969 v->PDEAndMetaPTEBytesFrame[k],
2971 v->PixelPTEBytesPerRow[k],
2972 v->BandwidthAvailableForImmediateFlip,
2973 v->TotImmediateFlipBytes,
2974 v->SourcePixelFormat[k],
2975 v->HTotal[k] / v->PixelClock[k],
2980 v->dpte_row_height[k],
2981 v->meta_row_height[k],
2982 v->dpte_row_height_chroma[k],
2983 v->meta_row_height_chroma[k],
2984 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2985 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2986 &v->final_flip_bw[k],
2987 &v->ImmediateFlipSupportedForPipe[k]);
2990 v->total_dcn_read_bw_with_flip = 0.0;
2991 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2992 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2993 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2995 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2996 v->DPPPerPlane[k] * v->final_flip_bw[k]
2997 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2998 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2999 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
3001 * (v->final_flip_bw[k]
3002 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
3003 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
3004 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
3005 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
3007 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
3008 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
3009 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
3011 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
3012 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
3014 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
3016 v->ImmediateFlipSupported = true;
3017 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
3018 #ifdef __DML_VBA_DEBUG__
3019 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
3021 v->ImmediateFlipSupported = false;
3022 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
3024 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3025 if (v->ImmediateFlipSupportedForPipe[k] == false) {
3026 #ifdef __DML_VBA_DEBUG__
3027 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
3030 v->ImmediateFlipSupported = false;
3034 v->ImmediateFlipSupported = false;
3037 v->PrefetchAndImmediateFlipSupported =
3038 (v->PrefetchModeSupported == true
3039 && ((!v->ImmediateFlipSupport && !v->HostVMEnable
3040 && v->ImmediateFlipRequirement != dm_immediate_flip_required) || v->ImmediateFlipSupported)) ?
3042 #ifdef __DML_VBA_DEBUG__
3043 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
3044 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
3045 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
3046 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
3047 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
3048 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
3050 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
3052 v->VStartupLines = v->VStartupLines + 1;
3053 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
3054 ASSERT(v->PrefetchAndImmediateFlipSupported);
3056 // Unbounded Request Enabled
3057 CalculateUnboundedRequestAndCompressedBufferSize(
3058 v->DETBufferSizeInKByte[0],
3059 v->ConfigReturnBufferSizeInKByte,
3060 v->UseUnboundedRequesting,
3064 v->CompressedBufferSegmentSizeInkByte,
3066 &v->UnboundedRequestEnabled,
3067 &v->CompressedBufferSizeInkByte);
3069 //Watermarks and NB P-State/DRAM Clock Change Support
3071 enum clock_change_support DRAMClockChangeSupport; // dummy
3072 CalculateWatermarksAndDRAMSpeedChangeSupport(
3075 v->NumberOfActivePlanes,
3076 v->MaxLineBufferLines,
3078 v->WritebackInterfaceBufferSize,
3081 v->SynchronizedVBlank,
3082 v->dpte_group_bytes,
3085 v->UrgentExtraLatency,
3086 v->WritebackLatency,
3087 v->WritebackChunkSize,
3089 v->DRAMClockChangeLatency,
3091 v->SREnterPlusExitTime,
3093 v->SREnterPlusExitZ8Time,
3110 v->BlendingAndTiming,
3112 v->BytePerPixelDETY,
3113 v->BytePerPixelDETC,
3117 v->WritebackPixelFormat,
3118 v->WritebackDestinationWidth,
3119 v->WritebackDestinationHeight,
3120 v->WritebackSourceHeight,
3121 v->UnboundedRequestEnabled,
3122 v->CompressedBufferSizeInkByte,
3123 &DRAMClockChangeSupport,
3124 &v->UrgentWatermark,
3125 &v->WritebackUrgentWatermark,
3126 &v->DRAMClockChangeWatermark,
3127 &v->WritebackDRAMClockChangeWatermark,
3128 &v->StutterExitWatermark,
3129 &v->StutterEnterPlusExitWatermark,
3130 &v->Z8StutterExitWatermark,
3131 &v->Z8StutterEnterPlusExitWatermark,
3132 &v->MinActiveDRAMClockChangeLatencySupported);
3134 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3135 if (v->WritebackEnable[k] == true) {
3136 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
3138 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
3140 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3145 //Display Pipeline Delivery Time in Prefetch, Groups
3146 CalculatePixelDeliveryTimes(
3147 v->NumberOfActivePlanes,
3152 v->swath_width_luma_ub,
3153 v->swath_width_chroma_ub,
3158 v->PSCL_THROUGHPUT_LUMA,
3159 v->PSCL_THROUGHPUT_CHROMA,
3166 v->BlockWidth256BytesY,
3167 v->BlockHeight256BytesY,
3168 v->BlockWidth256BytesC,
3169 v->BlockHeight256BytesC,
3170 v->DisplayPipeLineDeliveryTimeLuma,
3171 v->DisplayPipeLineDeliveryTimeChroma,
3172 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3173 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3174 v->DisplayPipeRequestDeliveryTimeLuma,
3175 v->DisplayPipeRequestDeliveryTimeChroma,
3176 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3177 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3178 v->CursorRequestDeliveryTime,
3179 v->CursorRequestDeliveryTimePrefetch);
3181 CalculateMetaAndPTETimes(
3182 v->NumberOfActivePlanes,
3185 v->MinMetaChunkSizeBytes,
3189 v->DestinationLinesToRequestRowInVBlank,
3190 v->DestinationLinesToRequestRowInImmediateFlip,
3197 v->dpte_row_height_chroma,
3199 v->meta_row_width_chroma,
3201 v->meta_row_height_chroma,
3203 v->meta_req_width_chroma,
3205 v->meta_req_height_chroma,
3206 v->dpte_group_bytes,
3209 v->PixelPTEReqWidthY,
3210 v->PixelPTEReqHeightY,
3211 v->PixelPTEReqWidthC,
3212 v->PixelPTEReqHeightC,
3213 v->dpte_row_width_luma_ub,
3214 v->dpte_row_width_chroma_ub,
3215 v->DST_Y_PER_PTE_ROW_NOM_L,
3216 v->DST_Y_PER_PTE_ROW_NOM_C,
3217 v->DST_Y_PER_META_ROW_NOM_L,
3218 v->DST_Y_PER_META_ROW_NOM_C,
3219 v->TimePerMetaChunkNominal,
3220 v->TimePerChromaMetaChunkNominal,
3221 v->TimePerMetaChunkVBlank,
3222 v->TimePerChromaMetaChunkVBlank,
3223 v->TimePerMetaChunkFlip,
3224 v->TimePerChromaMetaChunkFlip,
3225 v->time_per_pte_group_nom_luma,
3226 v->time_per_pte_group_vblank_luma,
3227 v->time_per_pte_group_flip_luma,
3228 v->time_per_pte_group_nom_chroma,
3229 v->time_per_pte_group_vblank_chroma,
3230 v->time_per_pte_group_flip_chroma);
3232 CalculateVMGroupAndRequestTimes(
3233 v->NumberOfActivePlanes,
3235 v->GPUVMMaxPageTableLevels,
3238 v->DestinationLinesToRequestVMInVBlank,
3239 v->DestinationLinesToRequestVMInImmediateFlip,
3242 v->dpte_row_width_luma_ub,
3243 v->dpte_row_width_chroma_ub,
3245 v->dpde0_bytes_per_frame_ub_l,
3246 v->dpde0_bytes_per_frame_ub_c,
3247 v->meta_pte_bytes_per_frame_ub_l,
3248 v->meta_pte_bytes_per_frame_ub_c,
3249 v->TimePerVMGroupVBlank,
3250 v->TimePerVMGroupFlip,
3251 v->TimePerVMRequestVBlank,
3252 v->TimePerVMRequestFlip);
3255 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3256 if (PrefetchMode == 0) {
3257 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3258 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3259 v->MinTTUVBlank[k] = dml_max(
3260 v->DRAMClockChangeWatermark,
3261 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3262 } else if (PrefetchMode == 1) {
3263 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3264 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3265 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3267 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3268 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3269 v->MinTTUVBlank[k] = v->UrgentWatermark;
3271 if (!v->DynamicMetadataEnable[k])
3272 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3275 // DCC Configuration
3277 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3278 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3279 v->SourcePixelFormat[k],
3280 v->SurfaceWidthY[k],
3281 v->SurfaceWidthC[k],
3282 v->SurfaceHeightY[k],
3283 v->SurfaceHeightC[k],
3284 v->DETBufferSizeInKByte[0] * 1024,
3285 v->BlockHeight256BytesY[k],
3286 v->BlockHeight256BytesC[k],
3287 v->SurfaceTiling[k],
3288 v->BytePerPixelY[k],
3289 v->BytePerPixelC[k],
3290 v->BytePerPixelDETY[k],
3291 v->BytePerPixelDETC[k],
3293 &v->DCCYMaxUncompressedBlock[k],
3294 &v->DCCCMaxUncompressedBlock[k],
3295 &v->DCCYMaxCompressedBlock[k],
3296 &v->DCCCMaxCompressedBlock[k],
3297 &v->DCCYIndependentBlock[k],
3298 &v->DCCCIndependentBlock[k]);
3301 // VStartup Adjustment
3302 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3303 bool isInterlaceTiming;
3304 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3305 #ifdef __DML_VBA_DEBUG__
3306 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3309 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3311 #ifdef __DML_VBA_DEBUG__
3312 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3313 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3314 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3315 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3318 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3319 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3320 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3323 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3325 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3326 - v->VFrontPorch[k])
3327 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3328 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3330 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3332 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3333 <= (isInterlaceTiming ?
3334 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3335 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3336 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3338 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3340 #ifdef __DML_VBA_DEBUG__
3341 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3342 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3343 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3344 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3345 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3346 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3347 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3348 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3349 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3350 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3351 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3356 //Maximum Bandwidth Used
3357 double TotalWRBandwidth = 0;
3358 double MaxPerPlaneVActiveWRBandwidth = 0;
3359 double WRBandwidth = 0;
3360 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3361 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3362 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3363 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3364 } else if (v->WritebackEnable[k] == true) {
3365 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3366 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3368 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3369 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3372 v->TotalDataReadBandwidth = 0;
3373 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3374 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3377 // Stutter Efficiency
3378 CalculateStutterEfficiency(
3380 v->CompressedBufferSizeInkByte,
3381 v->UnboundedRequestEnabled,
3382 v->ConfigReturnBufferSizeInKByte,
3383 v->MetaFIFOSizeInKEntries,
3384 v->ZeroSizeBufferEntries,
3385 v->NumberOfActivePlanes,
3386 v->ROBBufferSizeInKByte,
3387 v->TotalDataReadBandwidth,
3390 v->COMPBUF_RESERVED_SPACE_64B,
3391 v->COMPBUF_RESERVED_SPACE_ZS,
3394 v->SynchronizedVBlank,
3395 v->StutterEnterPlusExitWatermark,
3396 v->Z8StutterEnterPlusExitWatermark,
3397 v->ProgressiveToInterlaceUnitInOPP,
3403 v->BytePerPixelDETY,
3409 v->DCCFractionOfZeroSizeRequestsLuma,
3410 v->DCCFractionOfZeroSizeRequestsChroma,
3416 v->BlockHeight256BytesY,
3417 v->BlockWidth256BytesY,
3418 v->BlockHeight256BytesC,
3419 v->BlockWidth256BytesC,
3420 v->DCCYMaxUncompressedBlock,
3421 v->DCCCMaxUncompressedBlock,
3425 v->ReadBandwidthPlaneLuma,
3426 v->ReadBandwidthPlaneChroma,
3429 &v->StutterEfficiencyNotIncludingVBlank,
3430 &v->StutterEfficiency,
3431 &v->NumberOfStutterBurstsPerFrame,
3432 &v->Z8StutterEfficiencyNotIncludingVBlank,
3433 &v->Z8StutterEfficiency,
3434 &v->Z8NumberOfStutterBurstsPerFrame,
3438 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3440 struct vba_vars_st *v = &mode_lib->vba;
3441 // Display Pipe Configuration
3442 double BytePerPixDETY[DC__NUM_DPP__MAX];
3443 double BytePerPixDETC[DC__NUM_DPP__MAX];
3444 int BytePerPixY[DC__NUM_DPP__MAX];
3445 int BytePerPixC[DC__NUM_DPP__MAX];
3446 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3447 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3448 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3449 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3450 double dummy1[DC__NUM_DPP__MAX];
3451 double dummy2[DC__NUM_DPP__MAX];
3452 double dummy3[DC__NUM_DPP__MAX];
3453 double dummy4[DC__NUM_DPP__MAX];
3454 int dummy5[DC__NUM_DPP__MAX];
3455 int dummy6[DC__NUM_DPP__MAX];
3456 bool dummy7[DC__NUM_DPP__MAX];
3457 bool dummysinglestring;
3461 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3463 CalculateBytePerPixelAnd256BBlockSizes(
3464 v->SourcePixelFormat[k],
3465 v->SurfaceTiling[k],
3470 &Read256BytesBlockHeightY[k],
3471 &Read256BytesBlockHeightC[k],
3472 &Read256BytesBlockWidthY[k],
3473 &Read256BytesBlockWidthC[k]);
3476 CalculateSwathAndDETConfiguration(
3478 v->NumberOfActivePlanes,
3479 v->DETBufferSizeInKByte[0],
3483 v->SourcePixelFormat,
3491 Read256BytesBlockHeightY,
3492 Read256BytesBlockHeightC,
3493 Read256BytesBlockWidthY,
3494 Read256BytesBlockWidthC,
3495 v->ODMCombineEnabled,
3496 v->BlendingAndTiming,
3514 &dummysinglestring);
3517 static bool CalculateBytePerPixelAnd256BBlockSizes(
3518 enum source_format_class SourcePixelFormat,
3519 enum dm_swizzle_mode SurfaceTiling,
3520 unsigned int *BytePerPixelY,
3521 unsigned int *BytePerPixelC,
3522 double *BytePerPixelDETY,
3523 double *BytePerPixelDETC,
3524 unsigned int *BlockHeight256BytesY,
3525 unsigned int *BlockHeight256BytesC,
3526 unsigned int *BlockWidth256BytesY,
3527 unsigned int *BlockWidth256BytesC)
3529 if (SourcePixelFormat == dm_444_64) {
3530 *BytePerPixelDETY = 8;
3531 *BytePerPixelDETC = 0;
3534 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3535 *BytePerPixelDETY = 4;
3536 *BytePerPixelDETC = 0;
3539 } else if (SourcePixelFormat == dm_444_16) {
3540 *BytePerPixelDETY = 2;
3541 *BytePerPixelDETC = 0;
3544 } else if (SourcePixelFormat == dm_444_8) {
3545 *BytePerPixelDETY = 1;
3546 *BytePerPixelDETC = 0;
3549 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3550 *BytePerPixelDETY = 4;
3551 *BytePerPixelDETC = 1;
3554 } else if (SourcePixelFormat == dm_420_8) {
3555 *BytePerPixelDETY = 1;
3556 *BytePerPixelDETC = 2;
3559 } else if (SourcePixelFormat == dm_420_12) {
3560 *BytePerPixelDETY = 2;
3561 *BytePerPixelDETC = 4;
3565 *BytePerPixelDETY = 4.0 / 3;
3566 *BytePerPixelDETC = 8.0 / 3;
3571 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3572 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3573 if (SurfaceTiling == dm_sw_linear) {
3574 *BlockHeight256BytesY = 1;
3575 } else if (SourcePixelFormat == dm_444_64) {
3576 *BlockHeight256BytesY = 4;
3577 } else if (SourcePixelFormat == dm_444_8) {
3578 *BlockHeight256BytesY = 16;
3580 *BlockHeight256BytesY = 8;
3582 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3583 *BlockHeight256BytesC = 0;
3584 *BlockWidth256BytesC = 0;
3586 if (SurfaceTiling == dm_sw_linear) {
3587 *BlockHeight256BytesY = 1;
3588 *BlockHeight256BytesC = 1;
3589 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3590 *BlockHeight256BytesY = 8;
3591 *BlockHeight256BytesC = 16;
3592 } else if (SourcePixelFormat == dm_420_8) {
3593 *BlockHeight256BytesY = 16;
3594 *BlockHeight256BytesC = 8;
3596 *BlockHeight256BytesY = 8;
3597 *BlockHeight256BytesC = 8;
3599 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3600 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3605 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3607 if (PrefetchMode == 0) {
3608 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3609 } else if (PrefetchMode == 1) {
3610 return dml_max(SREnterPlusExitTime, UrgentLatency);
3612 return UrgentLatency;
3616 double dml31_CalculateWriteBackDISPCLK(
3617 enum source_format_class WritebackPixelFormat,
3619 double WritebackHRatio,
3620 double WritebackVRatio,
3621 unsigned int WritebackHTaps,
3622 unsigned int WritebackVTaps,
3623 long WritebackSourceWidth,
3624 long WritebackDestinationWidth,
3625 unsigned int HTotal,
3626 unsigned int WritebackLineBufferSize)
3628 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3630 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3631 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3632 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3633 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3636 static double CalculateWriteBackDelay(
3637 enum source_format_class WritebackPixelFormat,
3638 double WritebackHRatio,
3639 double WritebackVRatio,
3640 unsigned int WritebackVTaps,
3641 int WritebackDestinationWidth,
3642 int WritebackDestinationHeight,
3643 int WritebackSourceHeight,
3644 unsigned int HTotal)
3646 double CalculateWriteBackDelay;
3648 double Output_lines_last_notclamped;
3649 double WritebackVInit;
3651 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3652 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3653 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3654 if (Output_lines_last_notclamped < 0) {
3655 CalculateWriteBackDelay = 0;
3657 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3659 return CalculateWriteBackDelay;
3662 static void CalculateVupdateAndDynamicMetadataParameters(
3663 int MaxInterDCNTileRepeaters,
3666 double DCFClkDeepSleep,
3670 int DynamicMetadataTransmittedBytes,
3671 int DynamicMetadataLinesBeforeActiveRequired,
3672 int InterlaceEnable,
3673 bool ProgressiveToInterlaceUnitInOPP,
3678 int *VUpdateOffsetPix,
3679 double *VUpdateWidthPix,
3680 double *VReadyOffsetPix)
3682 double TotalRepeaterDelayTime;
3684 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3685 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3686 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3687 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3688 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3689 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3690 *Tdmec = HTotal / PixelClock;
3691 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3692 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3694 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3696 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3697 *Tdmsks = *Tdmsks / 2;
3699 #ifdef __DML_VBA_DEBUG__
3700 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3701 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3702 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3706 static void CalculateRowBandwidth(
3708 enum source_format_class SourcePixelFormat,
3710 double VRatioChroma,
3713 unsigned int MetaRowByteLuma,
3714 unsigned int MetaRowByteChroma,
3715 unsigned int meta_row_height_luma,
3716 unsigned int meta_row_height_chroma,
3717 unsigned int PixelPTEBytesPerRowLuma,
3718 unsigned int PixelPTEBytesPerRowChroma,
3719 unsigned int dpte_row_height_luma,
3720 unsigned int dpte_row_height_chroma,
3721 double *meta_row_bw,
3722 double *dpte_row_bw)
3724 if (DCCEnable != true) {
3726 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3727 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3729 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3732 if (GPUVMEnable != true) {
3734 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3735 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3736 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3738 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3742 static void CalculateFlipSchedule(
3743 struct display_mode_lib *mode_lib,
3744 double HostVMInefficiencyFactor,
3745 double UrgentExtraLatency,
3746 double UrgentLatency,
3747 unsigned int GPUVMMaxPageTableLevels,
3749 unsigned int HostVMMaxNonCachedPageTableLevels,
3751 double HostVMMinPageSize,
3752 double PDEAndMetaPTEBytesPerFrame,
3753 double MetaRowBytes,
3754 double DPTEBytesPerRow,
3755 double BandwidthAvailableForImmediateFlip,
3756 unsigned int TotImmediateFlipBytes,
3757 enum source_format_class SourcePixelFormat,
3760 double VRatioChroma,
3763 unsigned int dpte_row_height,
3764 unsigned int meta_row_height,
3765 unsigned int dpte_row_height_chroma,
3766 unsigned int meta_row_height_chroma,
3767 double *DestinationLinesToRequestVMInImmediateFlip,
3768 double *DestinationLinesToRequestRowInImmediateFlip,
3769 double *final_flip_bw,
3770 bool *ImmediateFlipSupportedForPipe)
3772 double min_row_time = 0.0;
3773 unsigned int HostVMDynamicLevelsTrips;
3774 double TimeForFetchingMetaPTEImmediateFlip;
3775 double TimeForFetchingRowInVBlankImmediateFlip;
3776 double ImmediateFlipBW;
3778 if (GPUVMEnable == true && HostVMEnable == true) {
3779 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3781 HostVMDynamicLevelsTrips = 0;
3784 if (GPUVMEnable == true || DCCEnable == true) {
3785 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3788 if (GPUVMEnable == true) {
3789 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3790 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3791 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3794 TimeForFetchingMetaPTEImmediateFlip = 0;
3797 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3798 if ((GPUVMEnable == true || DCCEnable == true)) {
3799 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3800 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3801 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3804 TimeForFetchingRowInVBlankImmediateFlip = 0;
3807 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3809 if (GPUVMEnable == true) {
3810 *final_flip_bw = dml_max(
3811 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3812 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3813 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3814 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3819 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3820 if (GPUVMEnable == true && DCCEnable != true) {
3821 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3822 } else if (GPUVMEnable != true && DCCEnable == true) {
3823 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3825 min_row_time = dml_min4(
3826 dpte_row_height * LineTime / VRatio,
3827 meta_row_height * LineTime / VRatio,
3828 dpte_row_height_chroma * LineTime / VRatioChroma,
3829 meta_row_height_chroma * LineTime / VRatioChroma);
3832 if (GPUVMEnable == true && DCCEnable != true) {
3833 min_row_time = dpte_row_height * LineTime / VRatio;
3834 } else if (GPUVMEnable != true && DCCEnable == true) {
3835 min_row_time = meta_row_height * LineTime / VRatio;
3837 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3841 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3842 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3843 *ImmediateFlipSupportedForPipe = false;
3845 *ImmediateFlipSupportedForPipe = true;
3848 #ifdef __DML_VBA_DEBUG__
3849 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
3850 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
3851 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3852 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3853 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3854 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
3859 static double TruncToValidBPP(
3867 enum output_encoder_class Output,
3868 enum output_format_class Format,
3869 unsigned int DSCInputBitPerComponent,
3873 enum odm_combine_mode ODMCombine)
3882 if (Format == dm_420) {
3887 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3888 } else if (Format == dm_444) {
3893 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3895 if (Output == dm_hdmi) {
3904 if (Format == dm_n422) {
3906 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3909 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3913 if (DSCEnable && Output == dm_dp) {
3914 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3916 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3919 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3921 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3925 if (DesiredBPP == 0) {
3927 if (MaxLinkBPP < MinDSCBPP) {
3929 } else if (MaxLinkBPP >= MaxDSCBPP) {
3932 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3935 if (MaxLinkBPP >= NonDSCBPP2) {
3937 } else if (MaxLinkBPP >= NonDSCBPP1) {
3939 } else if (MaxLinkBPP >= NonDSCBPP0) {
3946 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3947 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3956 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3958 struct vba_vars_st *v = &mode_lib->vba;
3962 int ReorderingBytes;
3963 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3964 bool NoChroma = true;
3965 bool EnoughWritebackUnits = true;
3966 bool P2IWith420 = false;
3967 bool DSCOnlyIfNecessaryWithBPP = false;
3968 bool DSC422NativeNotSupported = false;
3969 double MaxTotalVActiveRDBandwidth;
3970 bool ViewportExceedsSurface = false;
3971 bool FMTBufferExceeded = false;
3973 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3975 CalculateMinAndMaxPrefetchMode(
3976 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3977 &MinPrefetchMode, &MaxPrefetchMode);
3979 /*Scale Ratio, taps Support Check*/
3981 v->ScaleRatioAndTapsSupport = true;
3982 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3983 if (v->ScalerEnabled[k] == false
3984 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3985 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3986 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3987 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3988 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3989 v->ScaleRatioAndTapsSupport = false;
3990 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3991 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3992 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3993 || v->VRatio[k] > v->vtaps[k]
3994 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3995 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3996 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3997 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3998 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3999 || v->HRatioChroma[k] > v->MaxHSCLRatio
4000 || v->VRatioChroma[k] > v->MaxVSCLRatio
4001 || v->HRatioChroma[k] > v->HTAPsChroma[k]
4002 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
4003 v->ScaleRatioAndTapsSupport = false;
4006 /*Source Format, Pixel Format and Scan Support Check*/
4008 v->SourceFormatPixelAndScanSupport = true;
4009 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4010 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
4011 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
4012 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
4013 v->SourceFormatPixelAndScanSupport = false;
4016 /*Bandwidth Support Check*/
4018 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4019 CalculateBytePerPixelAnd256BBlockSizes(
4020 v->SourcePixelFormat[k],
4021 v->SurfaceTiling[k],
4022 &v->BytePerPixelY[k],
4023 &v->BytePerPixelC[k],
4024 &v->BytePerPixelInDETY[k],
4025 &v->BytePerPixelInDETC[k],
4026 &v->Read256BlockHeightY[k],
4027 &v->Read256BlockHeightC[k],
4028 &v->Read256BlockWidthY[k],
4029 &v->Read256BlockWidthC[k]);
4031 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4032 if (v->SourceScan[k] != dm_vert) {
4033 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
4034 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
4036 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
4037 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
4040 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4041 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
4042 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4043 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
4044 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
4046 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4047 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
4048 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4049 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
4050 } else if (v->WritebackEnable[k] == true) {
4051 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4052 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
4054 v->WriteBandwidth[k] = 0.0;
4058 /*Writeback Latency support check*/
4060 v->WritebackLatencySupport = true;
4061 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4062 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
4063 v->WritebackLatencySupport = false;
4067 /*Writeback Mode Support Check*/
4069 v->TotalNumberOfActiveWriteback = 0;
4070 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4071 if (v->WritebackEnable[k] == true) {
4072 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4076 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4077 EnoughWritebackUnits = false;
4080 /*Writeback Scale Ratio and Taps Support Check*/
4082 v->WritebackScaleRatioAndTapsSupport = true;
4083 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4084 if (v->WritebackEnable[k] == true) {
4085 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4086 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4087 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4088 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4089 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4090 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4091 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4092 v->WritebackScaleRatioAndTapsSupport = false;
4094 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4095 v->WritebackScaleRatioAndTapsSupport = false;
4099 /*Maximum DISPCLK/DPPCLK Support check*/
4101 v->WritebackRequiredDISPCLK = 0.0;
4102 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4103 if (v->WritebackEnable[k] == true) {
4104 v->WritebackRequiredDISPCLK = dml_max(
4105 v->WritebackRequiredDISPCLK,
4106 dml31_CalculateWriteBackDISPCLK(
4107 v->WritebackPixelFormat[k],
4109 v->WritebackHRatio[k],
4110 v->WritebackVRatio[k],
4111 v->WritebackHTaps[k],
4112 v->WritebackVTaps[k],
4113 v->WritebackSourceWidth[k],
4114 v->WritebackDestinationWidth[k],
4116 v->WritebackLineBufferSize));
4119 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4120 if (v->HRatio[k] > 1.0) {
4121 v->PSCL_FACTOR[k] = dml_min(
4122 v->MaxDCHUBToPSCLThroughput,
4123 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4125 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4127 if (v->BytePerPixelC[k] == 0.0) {
4128 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4129 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4131 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4132 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4134 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4135 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4138 if (v->HRatioChroma[k] > 1.0) {
4139 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4140 v->MaxDCHUBToPSCLThroughput,
4141 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4143 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4145 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4147 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4148 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4149 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4150 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4152 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4153 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4154 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4158 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4159 int MaximumSwathWidthSupportLuma;
4160 int MaximumSwathWidthSupportChroma;
4162 if (v->SurfaceTiling[k] == dm_sw_linear) {
4163 MaximumSwathWidthSupportLuma = 8192.0;
4164 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4165 MaximumSwathWidthSupportLuma = 2880.0;
4166 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4167 MaximumSwathWidthSupportLuma = 3840.0;
4169 MaximumSwathWidthSupportLuma = 5760.0;
4172 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4173 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4175 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4177 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4178 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4179 if (v->BytePerPixelC[k] == 0.0) {
4180 v->MaximumSwathWidthInLineBufferChroma = 0;
4182 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4183 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4185 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4186 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4189 CalculateSwathAndDETConfiguration(
4191 v->NumberOfActivePlanes,
4192 v->DETBufferSizeInKByte[0],
4193 v->MaximumSwathWidthLuma,
4194 v->MaximumSwathWidthChroma,
4196 v->SourcePixelFormat,
4204 v->Read256BlockHeightY,
4205 v->Read256BlockHeightC,
4206 v->Read256BlockWidthY,
4207 v->Read256BlockWidthC,
4208 v->odm_combine_dummy,
4209 v->BlendingAndTiming,
4212 v->BytePerPixelInDETY,
4213 v->BytePerPixelInDETC,
4217 v->NoOfDPPThisState,
4218 v->swath_width_luma_ub_this_state,
4219 v->swath_width_chroma_ub_this_state,
4220 v->SwathWidthYThisState,
4221 v->SwathWidthCThisState,
4222 v->SwathHeightYThisState,
4223 v->SwathHeightCThisState,
4224 v->DETBufferSizeYThisState,
4225 v->DETBufferSizeCThisState,
4226 v->SingleDPPViewportSizeSupportPerPlane,
4227 &v->ViewportSizeSupport[0][0]);
4229 for (i = 0; i < v->soc.num_states; i++) {
4230 for (j = 0; j < 2; j++) {
4231 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4232 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4233 v->RequiredDISPCLK[i][j] = 0.0;
4234 v->DISPCLK_DPPCLK_Support[i][j] = true;
4235 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4236 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4237 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4238 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4239 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4240 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4241 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4242 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4244 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4245 * (1 + v->DISPCLKRampingMargin / 100.0);
4246 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4247 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4248 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4249 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4250 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4252 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4253 * (1 + v->DISPCLKRampingMargin / 100.0);
4254 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4255 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4256 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4257 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4258 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4261 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4262 || !(v->Output[k] == dm_dp ||
4263 v->Output[k] == dm_edp)) {
4264 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4265 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4267 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4268 FMTBufferExceeded = true;
4269 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4270 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4271 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4272 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4273 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4274 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4275 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4276 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4277 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4278 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4280 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4281 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4283 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4284 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4285 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4286 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4287 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4289 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4290 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4293 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4294 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4295 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4296 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4297 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4299 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4300 FMTBufferExceeded = true;
4302 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4303 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4306 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4307 v->MPCCombine[i][j][k] = false;
4308 v->NoOfDPP[i][j][k] = 4;
4309 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4310 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4311 v->MPCCombine[i][j][k] = false;
4312 v->NoOfDPP[i][j][k] = 2;
4313 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4314 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4315 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4316 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4317 v->MPCCombine[i][j][k] = false;
4318 v->NoOfDPP[i][j][k] = 1;
4319 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4321 v->MPCCombine[i][j][k] = true;
4322 v->NoOfDPP[i][j][k] = 2;
4323 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4325 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4326 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4327 > v->MaxDppclkRoundedDownToDFSGranularity)
4328 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4329 v->DISPCLK_DPPCLK_Support[i][j] = false;
4332 v->TotalNumberOfActiveDPP[i][j] = 0;
4333 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4334 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4335 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4336 if (v->NoOfDPP[i][j][k] == 1)
4337 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4338 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4339 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4344 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4345 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4346 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4347 double BWOfNonSplitPlaneOfMaximumBandwidth;
4348 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4349 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4350 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4351 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4352 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4353 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4354 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4355 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4358 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4359 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4360 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4361 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4362 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4363 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4364 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4367 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4368 v->RequiredDISPCLK[i][j] = 0.0;
4369 v->DISPCLK_DPPCLK_Support[i][j] = true;
4370 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4371 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4372 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4373 v->MPCCombine[i][j][k] = true;
4374 v->NoOfDPP[i][j][k] = 2;
4375 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4376 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4378 v->MPCCombine[i][j][k] = false;
4379 v->NoOfDPP[i][j][k] = 1;
4380 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4381 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4383 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4384 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4385 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4386 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4388 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4390 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4391 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4392 > v->MaxDppclkRoundedDownToDFSGranularity)
4393 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4394 v->DISPCLK_DPPCLK_Support[i][j] = false;
4397 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4398 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4399 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4402 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4403 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4404 v->DISPCLK_DPPCLK_Support[i][j] = false;
4409 /*Total Available Pipes Support Check*/
4411 for (i = 0; i < v->soc.num_states; i++) {
4412 for (j = 0; j < 2; j++) {
4413 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4414 v->TotalAvailablePipesSupport[i][j] = true;
4416 v->TotalAvailablePipesSupport[i][j] = false;
4420 /*Display IO and DSC Support Check*/
4422 v->NonsupportedDSCInputBPC = false;
4423 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4424 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4425 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4426 v->NonsupportedDSCInputBPC = true;
4430 /*Number Of DSC Slices*/
4431 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4432 if (v->BlendingAndTiming[k] == k) {
4433 if (v->PixelClockBackEnd[k] > 3200) {
4434 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4435 } else if (v->PixelClockBackEnd[k] > 1360) {
4436 v->NumberOfDSCSlices[k] = 8;
4437 } else if (v->PixelClockBackEnd[k] > 680) {
4438 v->NumberOfDSCSlices[k] = 4;
4439 } else if (v->PixelClockBackEnd[k] > 340) {
4440 v->NumberOfDSCSlices[k] = 2;
4442 v->NumberOfDSCSlices[k] = 1;
4445 v->NumberOfDSCSlices[k] = 0;
4449 for (i = 0; i < v->soc.num_states; i++) {
4450 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4451 v->RequiresDSC[i][k] = false;
4452 v->RequiresFEC[i][k] = false;
4453 if (v->BlendingAndTiming[k] == k) {
4454 if (v->Output[k] == dm_hdmi) {
4455 v->RequiresDSC[i][k] = false;
4456 v->RequiresFEC[i][k] = false;
4457 v->OutputBppPerState[i][k] = TruncToValidBPP(
4458 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4462 v->PixelClockBackEnd[k],
4463 v->ForcedOutputLinkBPP[k],
4467 v->DSCInputBitPerComponent[k],
4468 v->NumberOfDSCSlices[k],
4469 v->AudioSampleRate[k],
4470 v->AudioSampleLayout[k],
4471 v->ODMCombineEnablePerState[i][k]);
4472 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4473 if (v->DSCEnable[k] == true) {
4474 v->RequiresDSC[i][k] = true;
4475 v->LinkDSCEnable = true;
4476 if (v->Output[k] == dm_dp) {
4477 v->RequiresFEC[i][k] = true;
4479 v->RequiresFEC[i][k] = false;
4482 v->RequiresDSC[i][k] = false;
4483 v->LinkDSCEnable = false;
4484 v->RequiresFEC[i][k] = false;
4487 v->Outbpp = BPP_INVALID;
4488 if (v->PHYCLKPerState[i] >= 270.0) {
4489 v->Outbpp = TruncToValidBPP(
4490 (1.0 - v->Downspreading / 100.0) * 2700,
4491 v->OutputLinkDPLanes[k],
4494 v->PixelClockBackEnd[k],
4495 v->ForcedOutputLinkBPP[k],
4499 v->DSCInputBitPerComponent[k],
4500 v->NumberOfDSCSlices[k],
4501 v->AudioSampleRate[k],
4502 v->AudioSampleLayout[k],
4503 v->ODMCombineEnablePerState[i][k]);
4504 v->OutputBppPerState[i][k] = v->Outbpp;
4505 // TODO: Need some other way to handle this nonsense
4506 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4508 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4509 v->Outbpp = TruncToValidBPP(
4510 (1.0 - v->Downspreading / 100.0) * 5400,
4511 v->OutputLinkDPLanes[k],
4514 v->PixelClockBackEnd[k],
4515 v->ForcedOutputLinkBPP[k],
4519 v->DSCInputBitPerComponent[k],
4520 v->NumberOfDSCSlices[k],
4521 v->AudioSampleRate[k],
4522 v->AudioSampleLayout[k],
4523 v->ODMCombineEnablePerState[i][k]);
4524 v->OutputBppPerState[i][k] = v->Outbpp;
4525 // TODO: Need some other way to handle this nonsense
4526 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4528 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4529 v->Outbpp = TruncToValidBPP(
4530 (1.0 - v->Downspreading / 100.0) * 8100,
4531 v->OutputLinkDPLanes[k],
4534 v->PixelClockBackEnd[k],
4535 v->ForcedOutputLinkBPP[k],
4539 v->DSCInputBitPerComponent[k],
4540 v->NumberOfDSCSlices[k],
4541 v->AudioSampleRate[k],
4542 v->AudioSampleLayout[k],
4543 v->ODMCombineEnablePerState[i][k]);
4544 v->OutputBppPerState[i][k] = v->Outbpp;
4545 // TODO: Need some other way to handle this nonsense
4546 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4548 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
4549 v->Outbpp = TruncToValidBPP(
4550 (1.0 - v->Downspreading / 100.0) * 10000,
4554 v->PixelClockBackEnd[k],
4555 v->ForcedOutputLinkBPP[k],
4559 v->DSCInputBitPerComponent[k],
4560 v->NumberOfDSCSlices[k],
4561 v->AudioSampleRate[k],
4562 v->AudioSampleLayout[k],
4563 v->ODMCombineEnablePerState[i][k]);
4564 v->OutputBppPerState[i][k] = v->Outbpp;
4565 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
4567 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
4568 v->Outbpp = TruncToValidBPP(
4573 v->PixelClockBackEnd[k],
4574 v->ForcedOutputLinkBPP[k],
4578 v->DSCInputBitPerComponent[k],
4579 v->NumberOfDSCSlices[k],
4580 v->AudioSampleRate[k],
4581 v->AudioSampleLayout[k],
4582 v->ODMCombineEnablePerState[i][k]);
4583 v->OutputBppPerState[i][k] = v->Outbpp;
4584 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
4588 v->OutputBppPerState[i][k] = 0;
4593 for (i = 0; i < v->soc.num_states; i++) {
4594 v->LinkCapacitySupport[i] = true;
4595 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4596 if (v->BlendingAndTiming[k] == k
4597 && (v->Output[k] == dm_dp ||
4598 v->Output[k] == dm_edp ||
4599 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4600 v->LinkCapacitySupport[i] = false;
4606 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4607 if (v->BlendingAndTiming[k] == k
4608 && (v->Output[k] == dm_dp ||
4609 v->Output[k] == dm_edp ||
4610 v->Output[k] == dm_hdmi)) {
4611 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4614 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4615 && !v->DSC422NativeSupport) {
4616 DSC422NativeNotSupported = true;
4621 for (i = 0; i < v->soc.num_states; ++i) {
4622 v->ODMCombine4To1SupportCheckOK[i] = true;
4623 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4624 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4625 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4626 || v->Output[k] == dm_hdmi)) {
4627 v->ODMCombine4To1SupportCheckOK[i] = false;
4632 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4634 for (i = 0; i < v->soc.num_states; i++) {
4635 v->NotEnoughDSCUnits[i] = false;
4636 v->TotalDSCUnitsRequired = 0.0;
4637 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4638 if (v->RequiresDSC[i][k] == true) {
4639 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4640 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4641 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4642 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4644 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4648 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4649 v->NotEnoughDSCUnits[i] = true;
4652 /*DSC Delay per state*/
4654 for (i = 0; i < v->soc.num_states; i++) {
4655 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4656 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4659 v->BPP = v->OutputBppPerState[i][k];
4661 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4662 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4663 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4664 v->DSCInputBitPerComponent[k],
4666 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4667 v->NumberOfDSCSlices[k],
4669 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4670 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4671 v->DSCDelayPerState[i][k] = 2.0
4672 * (dscceComputeDelay(
4673 v->DSCInputBitPerComponent[k],
4675 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4676 v->NumberOfDSCSlices[k] / 2,
4678 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4680 v->DSCDelayPerState[i][k] = 4.0
4681 * (dscceComputeDelay(
4682 v->DSCInputBitPerComponent[k],
4684 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4685 v->NumberOfDSCSlices[k] / 4,
4687 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4689 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4691 v->DSCDelayPerState[i][k] = 0.0;
4694 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4695 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4696 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4697 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4703 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4705 for (i = 0; i < v->soc.num_states; ++i) {
4706 for (j = 0; j <= 1; ++j) {
4707 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4708 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4709 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4710 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4713 CalculateSwathAndDETConfiguration(
4715 v->NumberOfActivePlanes,
4716 v->DETBufferSizeInKByte[0],
4717 v->MaximumSwathWidthLuma,
4718 v->MaximumSwathWidthChroma,
4720 v->SourcePixelFormat,
4728 v->Read256BlockHeightY,
4729 v->Read256BlockHeightC,
4730 v->Read256BlockWidthY,
4731 v->Read256BlockWidthC,
4732 v->ODMCombineEnableThisState,
4733 v->BlendingAndTiming,
4736 v->BytePerPixelInDETY,
4737 v->BytePerPixelInDETC,
4741 v->NoOfDPPThisState,
4742 v->swath_width_luma_ub_this_state,
4743 v->swath_width_chroma_ub_this_state,
4744 v->SwathWidthYThisState,
4745 v->SwathWidthCThisState,
4746 v->SwathHeightYThisState,
4747 v->SwathHeightCThisState,
4748 v->DETBufferSizeYThisState,
4749 v->DETBufferSizeCThisState,
4751 &v->ViewportSizeSupport[i][j]);
4753 CalculateDCFCLKDeepSleep(
4755 v->NumberOfActivePlanes,
4760 v->SwathWidthYThisState,
4761 v->SwathWidthCThisState,
4762 v->NoOfDPPThisState,
4767 v->PSCL_FACTOR_CHROMA,
4768 v->RequiredDPPCLKThisState,
4769 v->ReadBandwidthLuma,
4770 v->ReadBandwidthChroma,
4772 &v->ProjectedDCFCLKDeepSleep[i][j]);
4774 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4775 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4776 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4777 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4778 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4779 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4780 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4781 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4782 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4787 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4788 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4789 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4792 for (i = 0; i < v->soc.num_states; i++) {
4793 for (j = 0; j < 2; j++) {
4794 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4796 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4797 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4798 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4799 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4800 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4801 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4802 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4803 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4804 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4807 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4808 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4809 if (v->DCCEnable[k] == true) {
4810 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4814 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4815 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4816 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4818 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4819 && v->SourceScan[k] != dm_vert) {
4820 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4822 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4824 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4825 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4828 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4831 v->Read256BlockHeightC[k],
4832 v->Read256BlockWidthC[k],
4833 v->SourcePixelFormat[k],
4834 v->SurfaceTiling[k],
4835 v->BytePerPixelC[k],
4837 v->SwathWidthCThisState[k],
4838 v->ViewportHeightChroma[k],
4841 v->HostVMMaxNonCachedPageTableLevels,
4842 v->GPUVMMinPageSize,
4843 v->HostVMMinPageSize,
4844 v->PTEBufferSizeInRequestsForChroma,
4847 &v->MacroTileWidthC[k],
4849 &v->DPTEBytesPerRowC,
4850 &v->PTEBufferSizeNotExceededC[i][j][k],
4852 &v->dpte_row_height_chroma[k],
4856 &v->meta_row_height_chroma[k],
4863 &v->dummyinteger11);
4865 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4870 v->ProgressiveToInterlaceUnitInOPP,
4871 v->SwathHeightCThisState[k],
4872 v->ViewportYStartC[k],
4876 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4877 v->PTEBufferSizeInRequestsForChroma = 0;
4878 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4879 v->MetaRowBytesC = 0.0;
4880 v->DPTEBytesPerRowC = 0.0;
4881 v->PrefetchLinesC[i][j][k] = 0.0;
4882 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4884 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4887 v->Read256BlockHeightY[k],
4888 v->Read256BlockWidthY[k],
4889 v->SourcePixelFormat[k],
4890 v->SurfaceTiling[k],
4891 v->BytePerPixelY[k],
4893 v->SwathWidthYThisState[k],
4894 v->ViewportHeight[k],
4897 v->HostVMMaxNonCachedPageTableLevels,
4898 v->GPUVMMinPageSize,
4899 v->HostVMMinPageSize,
4900 v->PTEBufferSizeInRequestsForLuma,
4902 v->DCCMetaPitchY[k],
4903 &v->MacroTileWidthY[k],
4905 &v->DPTEBytesPerRowY,
4906 &v->PTEBufferSizeNotExceededY[i][j][k],
4908 &v->dpte_row_height[k],
4912 &v->meta_row_height[k],
4914 &v->dpte_group_bytes[k],
4920 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4925 v->ProgressiveToInterlaceUnitInOPP,
4926 v->SwathHeightYThisState[k],
4927 v->ViewportYStartY[k],
4930 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4931 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4932 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4934 CalculateRowBandwidth(
4936 v->SourcePixelFormat[k],
4940 v->HTotal[k] / v->PixelClock[k],
4943 v->meta_row_height[k],
4944 v->meta_row_height_chroma[k],
4945 v->DPTEBytesPerRowY,
4946 v->DPTEBytesPerRowC,
4947 v->dpte_row_height[k],
4948 v->dpte_row_height_chroma[k],
4949 &v->meta_row_bandwidth[i][j][k],
4950 &v->dpte_row_bandwidth[i][j][k]);
4952 v->UrgLatency[i] = CalculateUrgentLatency(
4953 v->UrgentLatencyPixelDataOnly,
4954 v->UrgentLatencyPixelMixedWithVMData,
4955 v->UrgentLatencyVMDataOnly,
4956 v->DoUrgentLatencyAdjustment,
4957 v->UrgentLatencyAdjustmentFabricClockComponent,
4958 v->UrgentLatencyAdjustmentFabricClockReference,
4959 v->FabricClockPerState[i]);
4961 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4962 CalculateUrgentBurstFactor(
4963 v->swath_width_luma_ub_this_state[k],
4964 v->swath_width_chroma_ub_this_state[k],
4965 v->SwathHeightYThisState[k],
4966 v->SwathHeightCThisState[k],
4967 v->HTotal[k] / v->PixelClock[k],
4969 v->CursorBufferSize,
4970 v->CursorWidth[k][0],
4974 v->BytePerPixelInDETY[k],
4975 v->BytePerPixelInDETC[k],
4976 v->DETBufferSizeYThisState[k],
4977 v->DETBufferSizeCThisState[k],
4978 &v->UrgentBurstFactorCursor[k],
4979 &v->UrgentBurstFactorLuma[k],
4980 &v->UrgentBurstFactorChroma[k],
4981 &NotUrgentLatencyHiding[k]);
4984 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4985 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4986 if (NotUrgentLatencyHiding[k]) {
4987 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4991 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4992 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4993 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4994 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4997 v->TotalVActivePixelBandwidth[i][j] = 0;
4998 v->TotalVActiveCursorBandwidth[i][j] = 0;
4999 v->TotalMetaRowBandwidth[i][j] = 0;
5000 v->TotalDPTERowBandwidth[i][j] = 0;
5001 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5002 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5003 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5004 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5005 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5010 //Calculate Return BW
5011 for (i = 0; i < v->soc.num_states; ++i) {
5012 for (j = 0; j <= 1; ++j) {
5013 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5014 if (v->BlendingAndTiming[k] == k) {
5015 if (v->WritebackEnable[k] == true) {
5016 v->WritebackDelayTime[k] = v->WritebackLatency
5017 + CalculateWriteBackDelay(
5018 v->WritebackPixelFormat[k],
5019 v->WritebackHRatio[k],
5020 v->WritebackVRatio[k],
5021 v->WritebackVTaps[k],
5022 v->WritebackDestinationWidth[k],
5023 v->WritebackDestinationHeight[k],
5024 v->WritebackSourceHeight[k],
5025 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5027 v->WritebackDelayTime[k] = 0.0;
5029 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5030 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5031 v->WritebackDelayTime[k] = dml_max(
5032 v->WritebackDelayTime[k],
5034 + CalculateWriteBackDelay(
5035 v->WritebackPixelFormat[m],
5036 v->WritebackHRatio[m],
5037 v->WritebackVRatio[m],
5038 v->WritebackVTaps[m],
5039 v->WritebackDestinationWidth[m],
5040 v->WritebackDestinationHeight[m],
5041 v->WritebackSourceHeight[m],
5042 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5047 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5048 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5049 if (v->BlendingAndTiming[k] == m) {
5050 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5054 v->MaxMaxVStartup[i][j] = 0;
5055 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5056 v->MaximumVStartup[i][j][k] =
5057 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5058 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5059 v->VTotal[k] - v->VActive[k]
5063 1.0 * v->WritebackDelayTime[k]
5065 / v->PixelClock[k]),
5067 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5072 ReorderingBytes = v->NumberOfChannels
5074 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5075 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5076 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5078 for (i = 0; i < v->soc.num_states; ++i) {
5079 for (j = 0; j <= 1; ++j) {
5080 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5084 if (v->UseMinimumRequiredDCFCLK == true) {
5087 v->MaxInterDCNTileRepeaters,
5089 v->DRAMClockChangeLatency,
5090 v->SREnterPlusExitTime,
5092 v->RoundTripPingLatencyCycles,
5094 v->PixelChunkSizeInKByte,
5097 v->GPUVMMaxPageTableLevels,
5099 v->NumberOfActivePlanes,
5100 v->HostVMMinPageSize,
5101 v->HostVMMaxNonCachedPageTableLevels,
5102 v->DynamicMetadataVMEnabled,
5103 v->ImmediateFlipRequirement,
5104 v->ProgressiveToInterlaceUnitInOPP,
5105 v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
5106 v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
5109 v->DynamicMetadataTransmittedBytes,
5110 v->DynamicMetadataLinesBeforeActiveRequired,
5116 v->ProjectedDCFCLKDeepSleep,
5118 v->TotalVActivePixelBandwidth,
5119 v->TotalVActiveCursorBandwidth,
5120 v->TotalMetaRowBandwidth,
5121 v->TotalDPTERowBandwidth,
5122 v->TotalNumberOfActiveDPP,
5123 v->TotalNumberOfDCCActiveDPP,
5124 v->dpte_group_bytes,
5127 v->swath_width_luma_ub_all_states,
5128 v->swath_width_chroma_ub_all_states,
5133 v->PDEAndMetaPTEBytesPerFrame,
5136 v->DynamicMetadataEnable,
5137 v->VActivePixelBandwidth,
5138 v->VActiveCursorBandwidth,
5139 v->ReadBandwidthLuma,
5140 v->ReadBandwidthChroma,
5145 for (i = 0; i < v->soc.num_states; ++i) {
5146 for (j = 0; j <= 1; ++j) {
5147 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5148 v->ReturnBusWidth * v->DCFCLKState[i][j],
5149 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5150 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5151 double PixelDataOnlyReturnBWPerState = dml_min(
5152 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5153 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5154 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5155 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5156 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5158 if (v->HostVMEnable != true) {
5159 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5161 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5166 //Re-ordering Buffer Support Check
5167 for (i = 0; i < v->soc.num_states; ++i) {
5168 for (j = 0; j <= 1; ++j) {
5169 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5170 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5171 v->ROBSupport[i][j] = true;
5173 v->ROBSupport[i][j] = false;
5178 //Vertical Active BW support check
5180 MaxTotalVActiveRDBandwidth = 0;
5181 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5182 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5185 for (i = 0; i < v->soc.num_states; ++i) {
5186 for (j = 0; j <= 1; ++j) {
5187 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5189 v->ReturnBusWidth * v->DCFCLKState[i][j],
5190 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5191 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5192 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5193 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5195 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5196 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5198 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5203 v->UrgentLatency = CalculateUrgentLatency(
5204 v->UrgentLatencyPixelDataOnly,
5205 v->UrgentLatencyPixelMixedWithVMData,
5206 v->UrgentLatencyVMDataOnly,
5207 v->DoUrgentLatencyAdjustment,
5208 v->UrgentLatencyAdjustmentFabricClockComponent,
5209 v->UrgentLatencyAdjustmentFabricClockReference,
5212 for (i = 0; i < v->soc.num_states; ++i) {
5213 for (j = 0; j <= 1; ++j) {
5214 double VMDataOnlyReturnBWPerState;
5215 double HostVMInefficiencyFactor = 1;
5216 int NextPrefetchModeState = MinPrefetchMode;
5217 bool UnboundedRequestEnabledThisState = false;
5218 int CompressedBufferSizeInkByteThisState = 0;
5221 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5223 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5224 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5225 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5226 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5229 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5230 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5231 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5232 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5233 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5234 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5235 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5236 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5237 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5238 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5241 VMDataOnlyReturnBWPerState = dml_min(
5243 v->ReturnBusWidth * v->DCFCLKState[i][j],
5244 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5245 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5246 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5247 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5248 if (v->GPUVMEnable && v->HostVMEnable)
5249 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5251 v->ExtraLatency = CalculateExtraLatency(
5252 v->RoundTripPingLatencyCycles,
5254 v->DCFCLKState[i][j],
5255 v->TotalNumberOfActiveDPP[i][j],
5256 v->PixelChunkSizeInKByte,
5257 v->TotalNumberOfDCCActiveDPP[i][j],
5259 v->ReturnBWPerState[i][j],
5262 v->NumberOfActivePlanes,
5263 v->NoOfDPPThisState,
5264 v->dpte_group_bytes,
5265 HostVMInefficiencyFactor,
5266 v->HostVMMinPageSize,
5267 v->HostVMMaxNonCachedPageTableLevels);
5269 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5271 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5272 v->MaxVStartup = v->NextMaxVStartup;
5274 v->TWait = CalculateTWait(
5275 v->PrefetchModePerState[i][j],
5276 v->DRAMClockChangeLatency,
5278 v->SREnterPlusExitTime);
5280 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5283 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
5284 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
5285 myPipe.PixelClock = v->PixelClock[k];
5286 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
5287 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
5288 myPipe.ScalerEnabled = v->ScalerEnabled[k];
5289 myPipe.SourceScan = v->SourceScan[k];
5290 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
5291 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
5292 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
5293 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
5294 myPipe.InterlaceEnable = v->Interlace[k];
5295 myPipe.NumberOfCursors = v->NumberOfCursors[k];
5296 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
5297 myPipe.HTotal = v->HTotal[k];
5298 myPipe.DCCEnable = v->DCCEnable[k];
5299 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
5300 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
5301 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
5302 myPipe.BytePerPixelY = v->BytePerPixelY[k];
5303 myPipe.BytePerPixelC = v->BytePerPixelC[k];
5304 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
5305 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
5307 HostVMInefficiencyFactor,
5309 v->DSCDelayPerState[i][k],
5310 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
5312 v->DPPCLKDelaySCLLBOnly,
5313 v->DPPCLKDelayCNVCCursor,
5314 v->DISPCLKDelaySubtotal,
5315 v->SwathWidthYThisState[k] / v->HRatio[k],
5317 v->MaxInterDCNTileRepeaters,
5318 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
5319 v->MaximumVStartup[i][j][k],
5320 v->GPUVMMaxPageTableLevels,
5323 v->HostVMMaxNonCachedPageTableLevels,
5324 v->HostVMMinPageSize,
5325 v->DynamicMetadataEnable[k],
5326 v->DynamicMetadataVMEnabled,
5327 v->DynamicMetadataLinesBeforeActiveRequired[k],
5328 v->DynamicMetadataTransmittedBytes[k],
5332 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5333 v->MetaRowBytes[i][j][k],
5334 v->DPTEBytesPerRow[i][j][k],
5335 v->PrefetchLinesY[i][j][k],
5336 v->SwathWidthYThisState[k],
5339 v->PrefetchLinesC[i][j][k],
5340 v->SwathWidthCThisState[k],
5343 v->swath_width_luma_ub_this_state[k],
5344 v->swath_width_chroma_ub_this_state[k],
5345 v->SwathHeightYThisState[k],
5346 v->SwathHeightCThisState[k],
5348 &v->DSTXAfterScaler[k],
5349 &v->DSTYAfterScaler[k],
5350 &v->LineTimesForPrefetch[k],
5352 &v->LinesForMetaPTE[k],
5353 &v->LinesForMetaAndDPTERow[k],
5354 &v->VRatioPreY[i][j][k],
5355 &v->VRatioPreC[i][j][k],
5356 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
5357 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
5358 &v->NoTimeForDynamicMetadata[i][j][k],
5360 &v->prefetch_vmrow_bw[k],
5364 &v->VUpdateOffsetPix[k],
5365 &v->VUpdateWidthPix[k],
5366 &v->VReadyOffsetPix[k]);
5369 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5370 CalculateUrgentBurstFactor(
5371 v->swath_width_luma_ub_this_state[k],
5372 v->swath_width_chroma_ub_this_state[k],
5373 v->SwathHeightYThisState[k],
5374 v->SwathHeightCThisState[k],
5375 v->HTotal[k] / v->PixelClock[k],
5377 v->CursorBufferSize,
5378 v->CursorWidth[k][0],
5380 v->VRatioPreY[i][j][k],
5381 v->VRatioPreC[i][j][k],
5382 v->BytePerPixelInDETY[k],
5383 v->BytePerPixelInDETC[k],
5384 v->DETBufferSizeYThisState[k],
5385 v->DETBufferSizeCThisState[k],
5386 &v->UrgentBurstFactorCursorPre[k],
5387 &v->UrgentBurstFactorLumaPre[k],
5388 &v->UrgentBurstFactorChroma[k],
5389 &v->NotUrgentLatencyHidingPre[k]);
5392 v->MaximumReadBandwidthWithPrefetch = 0.0;
5393 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5394 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5395 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5397 v->MaximumReadBandwidthWithPrefetch =
5398 v->MaximumReadBandwidthWithPrefetch
5400 v->VActivePixelBandwidth[i][j][k],
5401 v->VActiveCursorBandwidth[i][j][k]
5402 + v->NoOfDPP[i][j][k]
5403 * (v->meta_row_bandwidth[i][j][k]
5404 + v->dpte_row_bandwidth[i][j][k]),
5405 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5407 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5408 * v->UrgentBurstFactorLumaPre[k]
5409 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5410 * v->UrgentBurstFactorChromaPre[k])
5411 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5414 v->NotEnoughUrgentLatencyHidingPre = false;
5415 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5416 if (v->NotUrgentLatencyHidingPre[k] == true) {
5417 v->NotEnoughUrgentLatencyHidingPre = true;
5421 v->PrefetchSupported[i][j] = true;
5422 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5423 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5424 v->PrefetchSupported[i][j] = false;
5426 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5427 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5428 || v->NoTimeForPrefetch[i][j][k] == true) {
5429 v->PrefetchSupported[i][j] = false;
5433 v->DynamicMetadataSupported[i][j] = true;
5434 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5435 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5436 v->DynamicMetadataSupported[i][j] = false;
5440 v->VRatioInPrefetchSupported[i][j] = true;
5441 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5442 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5443 v->VRatioInPrefetchSupported[i][j] = false;
5446 v->AnyLinesForVMOrRowTooLarge = false;
5447 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5448 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5449 v->AnyLinesForVMOrRowTooLarge = true;
5453 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5455 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5456 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5457 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5458 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5460 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5462 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5463 * v->UrgentBurstFactorLumaPre[k]
5464 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5465 * v->UrgentBurstFactorChromaPre[k])
5466 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5468 v->TotImmediateFlipBytes = 0.0;
5469 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5470 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5471 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5472 + v->DPTEBytesPerRow[i][j][k];
5475 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5476 CalculateFlipSchedule(
5478 HostVMInefficiencyFactor,
5481 v->GPUVMMaxPageTableLevels,
5483 v->HostVMMaxNonCachedPageTableLevels,
5485 v->HostVMMinPageSize,
5486 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5487 v->MetaRowBytes[i][j][k],
5488 v->DPTEBytesPerRow[i][j][k],
5489 v->BandwidthAvailableForImmediateFlip,
5490 v->TotImmediateFlipBytes,
5491 v->SourcePixelFormat[k],
5492 v->HTotal[k] / v->PixelClock[k],
5497 v->dpte_row_height[k],
5498 v->meta_row_height[k],
5499 v->dpte_row_height_chroma[k],
5500 v->meta_row_height_chroma[k],
5501 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5502 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5503 &v->final_flip_bw[k],
5504 &v->ImmediateFlipSupportedForPipe[k]);
5506 v->total_dcn_read_bw_with_flip = 0.0;
5507 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5508 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5510 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5511 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5512 + v->VActiveCursorBandwidth[i][j][k],
5514 * (v->final_flip_bw[k]
5515 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5516 * v->UrgentBurstFactorLumaPre[k]
5517 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5518 * v->UrgentBurstFactorChromaPre[k])
5519 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5521 v->ImmediateFlipSupportedForState[i][j] = true;
5522 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5523 v->ImmediateFlipSupportedForState[i][j] = false;
5525 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5526 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5527 v->ImmediateFlipSupportedForState[i][j] = false;
5531 v->ImmediateFlipSupportedForState[i][j] = false;
5534 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5535 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5536 NextPrefetchModeState = NextPrefetchModeState + 1;
5538 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5540 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5541 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5542 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement != dm_immediate_flip_required)
5543 || v->ImmediateFlipSupportedForState[i][j] == true))
5544 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5546 CalculateUnboundedRequestAndCompressedBufferSize(
5547 v->DETBufferSizeInKByte[0],
5548 v->ConfigReturnBufferSizeInKByte,
5549 v->UseUnboundedRequesting,
5550 v->TotalNumberOfActiveDPP[i][j],
5553 v->CompressedBufferSegmentSizeInkByte,
5555 &UnboundedRequestEnabledThisState,
5556 &CompressedBufferSizeInkByteThisState);
5558 CalculateWatermarksAndDRAMSpeedChangeSupport(
5560 v->PrefetchModePerState[i][j],
5561 v->NumberOfActivePlanes,
5562 v->MaxLineBufferLines,
5564 v->WritebackInterfaceBufferSize,
5565 v->DCFCLKState[i][j],
5566 v->ReturnBWPerState[i][j],
5567 v->SynchronizedVBlank,
5568 v->dpte_group_bytes,
5572 v->WritebackLatency,
5573 v->WritebackChunkSize,
5574 v->SOCCLKPerState[i],
5575 v->DRAMClockChangeLatency,
5577 v->SREnterPlusExitTime,
5579 v->SREnterPlusExitZ8Time,
5580 v->ProjectedDCFCLKDeepSleep[i][j],
5581 v->DETBufferSizeYThisState,
5582 v->DETBufferSizeCThisState,
5583 v->SwathHeightYThisState,
5584 v->SwathHeightCThisState,
5586 v->SwathWidthYThisState,
5587 v->SwathWidthCThisState,
5596 v->BlendingAndTiming,
5597 v->NoOfDPPThisState,
5598 v->BytePerPixelInDETY,
5599 v->BytePerPixelInDETC,
5603 v->WritebackPixelFormat,
5604 v->WritebackDestinationWidth,
5605 v->WritebackDestinationHeight,
5606 v->WritebackSourceHeight,
5607 UnboundedRequestEnabledThisState,
5608 CompressedBufferSizeInkByteThisState,
5609 &v->DRAMClockChangeSupport[i][j],
5610 &v->UrgentWatermark,
5611 &v->WritebackUrgentWatermark,
5612 &v->DRAMClockChangeWatermark,
5613 &v->WritebackDRAMClockChangeWatermark,
5618 &v->MinActiveDRAMClockChangeLatencySupported);
5622 /*PTE Buffer Size Check*/
5623 for (i = 0; i < v->soc.num_states; i++) {
5624 for (j = 0; j < 2; j++) {
5625 v->PTEBufferSizeNotExceeded[i][j] = true;
5626 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5627 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5628 v->PTEBufferSizeNotExceeded[i][j] = false;
5634 /*Cursor Support Check*/
5635 v->CursorSupport = true;
5636 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5637 if (v->CursorWidth[k][0] > 0.0) {
5638 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5639 v->CursorSupport = false;
5644 /*Valid Pitch Check*/
5645 v->PitchSupport = true;
5646 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5647 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5648 if (v->DCCEnable[k] == true) {
5649 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5651 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5653 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5654 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5655 && v->SourcePixelFormat[k] != dm_mono_8) {
5656 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5657 if (v->DCCEnable[k] == true) {
5658 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5659 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5660 64.0 * v->Read256BlockWidthC[k]);
5662 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5665 v->AlignedCPitch[k] = v->PitchC[k];
5666 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5668 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5669 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5670 v->PitchSupport = false;
5674 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5675 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5676 ViewportExceedsSurface = true;
5677 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5678 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5679 && v->SourcePixelFormat[k] != dm_rgbe) {
5680 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5681 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5682 ViewportExceedsSurface = true;
5688 /*Mode Support, Voltage State and SOC Configuration*/
5689 for (i = v->soc.num_states - 1; i >= 0; i--) {
5690 for (j = 0; j < 2; j++) {
5691 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5692 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5693 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5694 && v->DTBCLKRequiredMoreThanSupported[i] == false
5695 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5696 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5697 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5698 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5699 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5700 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5701 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5702 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement != dm_immediate_flip_required)
5703 || v->ImmediateFlipSupportedForState[i][j] == true)
5704 && FMTBufferExceeded == false) {
5705 v->ModeSupport[i][j] = true;
5707 v->ModeSupport[i][j] = false;
5713 unsigned int MaximumMPCCombine = 0;
5714 for (i = v->soc.num_states; i >= 0; i--) {
5715 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5716 v->VoltageLevel = i;
5717 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5718 if (v->ModeSupport[i][0] == true) {
5719 MaximumMPCCombine = 0;
5721 MaximumMPCCombine = 1;
5725 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5726 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5727 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5728 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5730 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5731 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5732 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5733 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5734 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5735 v->maxMpcComb = MaximumMPCCombine;
5739 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5740 struct display_mode_lib *mode_lib,
5741 unsigned int PrefetchMode,
5742 unsigned int NumberOfActivePlanes,
5743 unsigned int MaxLineBufferLines,
5744 unsigned int LineBufferSize,
5745 unsigned int WritebackInterfaceBufferSize,
5748 bool SynchronizedVBlank,
5749 unsigned int dpte_group_bytes[],
5750 unsigned int MetaChunkSize,
5751 double UrgentLatency,
5752 double ExtraLatency,
5753 double WritebackLatency,
5754 double WritebackChunkSize,
5756 double DRAMClockChangeLatency,
5758 double SREnterPlusExitTime,
5759 double SRExitZ8Time,
5760 double SREnterPlusExitZ8Time,
5761 double DCFCLKDeepSleep,
5762 unsigned int DETBufferSizeY[],
5763 unsigned int DETBufferSizeC[],
5764 unsigned int SwathHeightY[],
5765 unsigned int SwathHeightC[],
5766 unsigned int LBBitPerPixel[],
5767 double SwathWidthY[],
5768 double SwathWidthC[],
5770 double HRatioChroma[],
5771 unsigned int vtaps[],
5772 unsigned int VTAPsChroma[],
5774 double VRatioChroma[],
5775 unsigned int HTotal[],
5776 double PixelClock[],
5777 unsigned int BlendingAndTiming[],
5778 unsigned int DPPPerPlane[],
5779 double BytePerPixelDETY[],
5780 double BytePerPixelDETC[],
5781 double DSTXAfterScaler[],
5782 double DSTYAfterScaler[],
5783 bool WritebackEnable[],
5784 enum source_format_class WritebackPixelFormat[],
5785 double WritebackDestinationWidth[],
5786 double WritebackDestinationHeight[],
5787 double WritebackSourceHeight[],
5788 bool UnboundedRequestEnabled,
5789 int unsigned CompressedBufferSizeInkByte,
5790 enum clock_change_support *DRAMClockChangeSupport,
5791 double *UrgentWatermark,
5792 double *WritebackUrgentWatermark,
5793 double *DRAMClockChangeWatermark,
5794 double *WritebackDRAMClockChangeWatermark,
5795 double *StutterExitWatermark,
5796 double *StutterEnterPlusExitWatermark,
5797 double *Z8StutterExitWatermark,
5798 double *Z8StutterEnterPlusExitWatermark,
5799 double *MinActiveDRAMClockChangeLatencySupported)
5801 struct vba_vars_st *v = &mode_lib->vba;
5802 double EffectiveLBLatencyHidingY;
5803 double EffectiveLBLatencyHidingC;
5804 double LinesInDETY[DC__NUM_DPP__MAX];
5806 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5807 unsigned int LinesInDETCRoundedDownToSwath;
5808 double FullDETBufferingTimeY;
5809 double FullDETBufferingTimeC;
5810 double ActiveDRAMClockChangeLatencyMarginY;
5811 double ActiveDRAMClockChangeLatencyMarginC;
5812 double WritebackDRAMClockChangeLatencyMargin;
5813 double PlaneWithMinActiveDRAMClockChangeMargin;
5814 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5815 double WritebackDRAMClockChangeLatencyHiding;
5816 double TotalPixelBW = 0.0;
5819 *UrgentWatermark = UrgentLatency + ExtraLatency;
5821 #ifdef __DML_VBA_DEBUG__
5822 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5823 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5824 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
5827 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5829 #ifdef __DML_VBA_DEBUG__
5830 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
5831 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
5834 v->TotalActiveWriteback = 0;
5835 for (k = 0; k < NumberOfActivePlanes; ++k) {
5836 if (WritebackEnable[k] == true) {
5837 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5841 if (v->TotalActiveWriteback <= 1) {
5842 *WritebackUrgentWatermark = WritebackLatency;
5844 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5847 if (v->TotalActiveWriteback <= 1) {
5848 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5850 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5853 for (k = 0; k < NumberOfActivePlanes; ++k) {
5854 TotalPixelBW = TotalPixelBW
5855 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
5856 / (HTotal[k] / PixelClock[k]);
5859 for (k = 0; k < NumberOfActivePlanes; ++k) {
5860 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5862 v->LBLatencyHidingSourceLinesY = dml_min(
5863 (double) MaxLineBufferLines,
5864 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5866 v->LBLatencyHidingSourceLinesC = dml_min(
5867 (double) MaxLineBufferLines,
5868 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5870 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5872 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5874 if (UnboundedRequestEnabled) {
5875 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5876 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
5879 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5880 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5881 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5882 if (BytePerPixelDETC[k] > 0) {
5883 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5884 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5885 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5888 FullDETBufferingTimeC = 999999;
5891 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5892 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5894 if (NumberOfActivePlanes > 1) {
5895 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5896 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5899 if (BytePerPixelDETC[k] > 0) {
5900 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5901 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5903 if (NumberOfActivePlanes > 1) {
5904 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5905 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5907 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5909 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5912 if (WritebackEnable[k] == true) {
5913 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
5914 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5915 if (WritebackPixelFormat[k] == dm_444_64) {
5916 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5918 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5919 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5923 v->MinActiveDRAMClockChangeMargin = 999999;
5924 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5925 for (k = 0; k < NumberOfActivePlanes; ++k) {
5926 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5927 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5928 if (BlendingAndTiming[k] == k) {
5929 PlaneWithMinActiveDRAMClockChangeMargin = k;
5931 for (j = 0; j < NumberOfActivePlanes; ++j) {
5932 if (BlendingAndTiming[k] == j) {
5933 PlaneWithMinActiveDRAMClockChangeMargin = j;
5940 *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5942 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5943 for (k = 0; k < NumberOfActivePlanes; ++k) {
5944 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5945 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5946 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5950 v->TotalNumberOfActiveOTG = 0;
5952 for (k = 0; k < NumberOfActivePlanes; ++k) {
5953 if (BlendingAndTiming[k] == k) {
5954 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5958 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5959 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5960 } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5961 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5962 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5964 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5967 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5968 *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5969 *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5970 *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5972 #ifdef __DML_VBA_DEBUG__
5973 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5974 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5975 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5976 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5980 static void CalculateDCFCLKDeepSleep(
5981 struct display_mode_lib *mode_lib,
5982 unsigned int NumberOfActivePlanes,
5983 int BytePerPixelY[],
5984 int BytePerPixelC[],
5986 double VRatioChroma[],
5987 double SwathWidthY[],
5988 double SwathWidthC[],
5989 unsigned int DPPPerPlane[],
5991 double HRatioChroma[],
5992 double PixelClock[],
5993 double PSCL_THROUGHPUT[],
5994 double PSCL_THROUGHPUT_CHROMA[],
5996 double ReadBandwidthLuma[],
5997 double ReadBandwidthChroma[],
5999 double *DCFCLKDeepSleep)
6001 struct vba_vars_st *v = &mode_lib->vba;
6002 double DisplayPipeLineDeliveryTimeLuma;
6003 double DisplayPipeLineDeliveryTimeChroma;
6004 double ReadBandwidth = 0.0;
6007 for (k = 0; k < NumberOfActivePlanes; ++k) {
6009 if (VRatio[k] <= 1) {
6010 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6012 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6014 if (BytePerPixelC[k] == 0) {
6015 DisplayPipeLineDeliveryTimeChroma = 0;
6017 if (VRatioChroma[k] <= 1) {
6018 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6020 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6024 if (BytePerPixelC[k] > 0) {
6025 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
6026 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
6028 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
6030 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
6034 for (k = 0; k < NumberOfActivePlanes; ++k) {
6035 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
6038 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
6040 for (k = 0; k < NumberOfActivePlanes; ++k) {
6041 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
6045 static void CalculateUrgentBurstFactor(
6046 int swath_width_luma_ub,
6047 int swath_width_chroma_ub,
6048 unsigned int SwathHeightY,
6049 unsigned int SwathHeightC,
6051 double UrgentLatency,
6052 double CursorBufferSize,
6053 unsigned int CursorWidth,
6054 unsigned int CursorBPP,
6057 double BytePerPixelInDETY,
6058 double BytePerPixelInDETC,
6059 double DETBufferSizeY,
6060 double DETBufferSizeC,
6061 double *UrgentBurstFactorCursor,
6062 double *UrgentBurstFactorLuma,
6063 double *UrgentBurstFactorChroma,
6064 bool *NotEnoughUrgentLatencyHiding)
6066 double LinesInDETLuma;
6067 double LinesInDETChroma;
6068 unsigned int LinesInCursorBuffer;
6069 double CursorBufferSizeInTime;
6070 double DETBufferSizeInTimeLuma;
6071 double DETBufferSizeInTimeChroma;
6073 *NotEnoughUrgentLatencyHiding = 0;
6075 if (CursorWidth > 0) {
6076 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
6078 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
6079 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
6080 *NotEnoughUrgentLatencyHiding = 1;
6081 *UrgentBurstFactorCursor = 0;
6083 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
6086 *UrgentBurstFactorCursor = 1;
6090 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
6092 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
6093 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
6094 *NotEnoughUrgentLatencyHiding = 1;
6095 *UrgentBurstFactorLuma = 0;
6097 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
6100 *UrgentBurstFactorLuma = 1;
6103 if (BytePerPixelInDETC > 0) {
6104 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
6106 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
6107 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
6108 *NotEnoughUrgentLatencyHiding = 1;
6109 *UrgentBurstFactorChroma = 0;
6111 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
6114 *UrgentBurstFactorChroma = 1;
6119 static void CalculatePixelDeliveryTimes(
6120 unsigned int NumberOfActivePlanes,
6122 double VRatioChroma[],
6123 double VRatioPrefetchY[],
6124 double VRatioPrefetchC[],
6125 unsigned int swath_width_luma_ub[],
6126 unsigned int swath_width_chroma_ub[],
6127 unsigned int DPPPerPlane[],
6129 double HRatioChroma[],
6130 double PixelClock[],
6131 double PSCL_THROUGHPUT[],
6132 double PSCL_THROUGHPUT_CHROMA[],
6134 int BytePerPixelC[],
6135 enum scan_direction_class SourceScan[],
6136 unsigned int NumberOfCursors[],
6137 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
6138 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6139 unsigned int BlockWidth256BytesY[],
6140 unsigned int BlockHeight256BytesY[],
6141 unsigned int BlockWidth256BytesC[],
6142 unsigned int BlockHeight256BytesC[],
6143 double DisplayPipeLineDeliveryTimeLuma[],
6144 double DisplayPipeLineDeliveryTimeChroma[],
6145 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6146 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6147 double DisplayPipeRequestDeliveryTimeLuma[],
6148 double DisplayPipeRequestDeliveryTimeChroma[],
6149 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6150 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6151 double CursorRequestDeliveryTime[],
6152 double CursorRequestDeliveryTimePrefetch[])
6154 double req_per_swath_ub;
6157 for (k = 0; k < NumberOfActivePlanes; ++k) {
6158 if (VRatio[k] <= 1) {
6159 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6161 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6164 if (BytePerPixelC[k] == 0) {
6165 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6167 if (VRatioChroma[k] <= 1) {
6168 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6170 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6174 if (VRatioPrefetchY[k] <= 1) {
6175 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6177 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6180 if (BytePerPixelC[k] == 0) {
6181 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6183 if (VRatioPrefetchC[k] <= 1) {
6184 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6186 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6191 for (k = 0; k < NumberOfActivePlanes; ++k) {
6192 if (SourceScan[k] != dm_vert) {
6193 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6195 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6197 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6198 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6199 if (BytePerPixelC[k] == 0) {
6200 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6201 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6203 if (SourceScan[k] != dm_vert) {
6204 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6206 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6208 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6209 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6211 #ifdef __DML_VBA_DEBUG__
6212 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6213 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6214 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6215 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6216 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6217 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6218 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6219 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6220 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6221 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6222 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6223 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6227 for (k = 0; k < NumberOfActivePlanes; ++k) {
6228 int cursor_req_per_width;
6229 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6230 if (NumberOfCursors[k] > 0) {
6231 if (VRatio[k] <= 1) {
6232 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6234 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6236 if (VRatioPrefetchY[k] <= 1) {
6237 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6239 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6242 CursorRequestDeliveryTime[k] = 0;
6243 CursorRequestDeliveryTimePrefetch[k] = 0;
6245 #ifdef __DML_VBA_DEBUG__
6246 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6247 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6248 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6253 static void CalculateMetaAndPTETimes(
6254 int NumberOfActivePlanes,
6257 int MinMetaChunkSizeBytes,
6260 double VRatioChroma[],
6261 double DestinationLinesToRequestRowInVBlank[],
6262 double DestinationLinesToRequestRowInImmediateFlip[],
6264 double PixelClock[],
6265 int BytePerPixelY[],
6266 int BytePerPixelC[],
6267 enum scan_direction_class SourceScan[],
6268 int dpte_row_height[],
6269 int dpte_row_height_chroma[],
6270 int meta_row_width[],
6271 int meta_row_width_chroma[],
6272 int meta_row_height[],
6273 int meta_row_height_chroma[],
6274 int meta_req_width[],
6275 int meta_req_width_chroma[],
6276 int meta_req_height[],
6277 int meta_req_height_chroma[],
6278 int dpte_group_bytes[],
6279 int PTERequestSizeY[],
6280 int PTERequestSizeC[],
6281 int PixelPTEReqWidthY[],
6282 int PixelPTEReqHeightY[],
6283 int PixelPTEReqWidthC[],
6284 int PixelPTEReqHeightC[],
6285 int dpte_row_width_luma_ub[],
6286 int dpte_row_width_chroma_ub[],
6287 double DST_Y_PER_PTE_ROW_NOM_L[],
6288 double DST_Y_PER_PTE_ROW_NOM_C[],
6289 double DST_Y_PER_META_ROW_NOM_L[],
6290 double DST_Y_PER_META_ROW_NOM_C[],
6291 double TimePerMetaChunkNominal[],
6292 double TimePerChromaMetaChunkNominal[],
6293 double TimePerMetaChunkVBlank[],
6294 double TimePerChromaMetaChunkVBlank[],
6295 double TimePerMetaChunkFlip[],
6296 double TimePerChromaMetaChunkFlip[],
6297 double time_per_pte_group_nom_luma[],
6298 double time_per_pte_group_vblank_luma[],
6299 double time_per_pte_group_flip_luma[],
6300 double time_per_pte_group_nom_chroma[],
6301 double time_per_pte_group_vblank_chroma[],
6302 double time_per_pte_group_flip_chroma[])
6304 unsigned int meta_chunk_width;
6305 unsigned int min_meta_chunk_width;
6306 unsigned int meta_chunk_per_row_int;
6307 unsigned int meta_row_remainder;
6308 unsigned int meta_chunk_threshold;
6309 unsigned int meta_chunks_per_row_ub;
6310 unsigned int meta_chunk_width_chroma;
6311 unsigned int min_meta_chunk_width_chroma;
6312 unsigned int meta_chunk_per_row_int_chroma;
6313 unsigned int meta_row_remainder_chroma;
6314 unsigned int meta_chunk_threshold_chroma;
6315 unsigned int meta_chunks_per_row_ub_chroma;
6316 unsigned int dpte_group_width_luma;
6317 unsigned int dpte_groups_per_row_luma_ub;
6318 unsigned int dpte_group_width_chroma;
6319 unsigned int dpte_groups_per_row_chroma_ub;
6322 for (k = 0; k < NumberOfActivePlanes; ++k) {
6323 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6324 if (BytePerPixelC[k] == 0) {
6325 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6327 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6329 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6330 if (BytePerPixelC[k] == 0) {
6331 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6333 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6337 for (k = 0; k < NumberOfActivePlanes; ++k) {
6338 if (DCCEnable[k] == true) {
6339 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6340 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6341 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6342 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6343 if (SourceScan[k] != dm_vert) {
6344 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6346 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6348 if (meta_row_remainder <= meta_chunk_threshold) {
6349 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6351 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6353 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6354 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6355 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6356 if (BytePerPixelC[k] == 0) {
6357 TimePerChromaMetaChunkNominal[k] = 0;
6358 TimePerChromaMetaChunkVBlank[k] = 0;
6359 TimePerChromaMetaChunkFlip[k] = 0;
6361 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6362 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6363 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6364 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6365 if (SourceScan[k] != dm_vert) {
6366 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6368 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6370 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6371 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6373 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6375 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6376 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6377 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6380 TimePerMetaChunkNominal[k] = 0;
6381 TimePerMetaChunkVBlank[k] = 0;
6382 TimePerMetaChunkFlip[k] = 0;
6383 TimePerChromaMetaChunkNominal[k] = 0;
6384 TimePerChromaMetaChunkVBlank[k] = 0;
6385 TimePerChromaMetaChunkFlip[k] = 0;
6389 for (k = 0; k < NumberOfActivePlanes; ++k) {
6390 if (GPUVMEnable == true) {
6391 if (SourceScan[k] != dm_vert) {
6392 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6394 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6396 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6397 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6398 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6399 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6400 if (BytePerPixelC[k] == 0) {
6401 time_per_pte_group_nom_chroma[k] = 0;
6402 time_per_pte_group_vblank_chroma[k] = 0;
6403 time_per_pte_group_flip_chroma[k] = 0;
6405 if (SourceScan[k] != dm_vert) {
6406 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6408 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6410 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6411 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6412 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6413 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6416 time_per_pte_group_nom_luma[k] = 0;
6417 time_per_pte_group_vblank_luma[k] = 0;
6418 time_per_pte_group_flip_luma[k] = 0;
6419 time_per_pte_group_nom_chroma[k] = 0;
6420 time_per_pte_group_vblank_chroma[k] = 0;
6421 time_per_pte_group_flip_chroma[k] = 0;
6426 static void CalculateVMGroupAndRequestTimes(
6427 unsigned int NumberOfActivePlanes,
6429 unsigned int GPUVMMaxPageTableLevels,
6430 unsigned int HTotal[],
6431 int BytePerPixelC[],
6432 double DestinationLinesToRequestVMInVBlank[],
6433 double DestinationLinesToRequestVMInImmediateFlip[],
6435 double PixelClock[],
6436 int dpte_row_width_luma_ub[],
6437 int dpte_row_width_chroma_ub[],
6438 int vm_group_bytes[],
6439 unsigned int dpde0_bytes_per_frame_ub_l[],
6440 unsigned int dpde0_bytes_per_frame_ub_c[],
6441 int meta_pte_bytes_per_frame_ub_l[],
6442 int meta_pte_bytes_per_frame_ub_c[],
6443 double TimePerVMGroupVBlank[],
6444 double TimePerVMGroupFlip[],
6445 double TimePerVMRequestVBlank[],
6446 double TimePerVMRequestFlip[])
6448 int num_group_per_lower_vm_stage;
6449 int num_req_per_lower_vm_stage;
6452 for (k = 0; k < NumberOfActivePlanes; ++k) {
6453 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6454 if (DCCEnable[k] == false) {
6455 if (BytePerPixelC[k] > 0) {
6456 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6457 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6459 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6462 if (GPUVMMaxPageTableLevels == 1) {
6463 if (BytePerPixelC[k] > 0) {
6464 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6465 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6467 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6470 if (BytePerPixelC[k] > 0) {
6471 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6472 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6473 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6474 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6476 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6477 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6482 if (DCCEnable[k] == false) {
6483 if (BytePerPixelC[k] > 0) {
6484 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6486 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6489 if (GPUVMMaxPageTableLevels == 1) {
6490 if (BytePerPixelC[k] > 0) {
6491 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6493 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6496 if (BytePerPixelC[k] > 0) {
6497 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6498 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6500 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6505 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6506 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6507 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6508 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6510 if (GPUVMMaxPageTableLevels > 2) {
6511 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6512 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6513 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6514 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6518 TimePerVMGroupVBlank[k] = 0;
6519 TimePerVMGroupFlip[k] = 0;
6520 TimePerVMRequestVBlank[k] = 0;
6521 TimePerVMRequestFlip[k] = 0;
6526 static void CalculateStutterEfficiency(
6527 struct display_mode_lib *mode_lib,
6528 int CompressedBufferSizeInkByte,
6529 bool UnboundedRequestEnabled,
6530 int ConfigReturnBufferSizeInKByte,
6531 int MetaFIFOSizeInKEntries,
6532 int ZeroSizeBufferEntries,
6533 int NumberOfActivePlanes,
6534 int ROBBufferSizeInKByte,
6535 double TotalDataReadBandwidth,
6538 double COMPBUF_RESERVED_SPACE_64B,
6539 double COMPBUF_RESERVED_SPACE_ZS,
6541 double SRExitZ8Time,
6542 bool SynchronizedVBlank,
6543 double Z8StutterEnterPlusExitWatermark,
6544 double StutterEnterPlusExitWatermark,
6545 bool ProgressiveToInterlaceUnitInOPP,
6547 double MinTTUVBlank[],
6549 unsigned int DETBufferSizeY[],
6550 int BytePerPixelY[],
6551 double BytePerPixelDETY[],
6552 double SwathWidthY[],
6555 double NetDCCRateLuma[],
6556 double NetDCCRateChroma[],
6557 double DCCFractionOfZeroSizeRequestsLuma[],
6558 double DCCFractionOfZeroSizeRequestsChroma[],
6561 double PixelClock[],
6563 enum scan_direction_class SourceScan[],
6564 int BlockHeight256BytesY[],
6565 int BlockWidth256BytesY[],
6566 int BlockHeight256BytesC[],
6567 int BlockWidth256BytesC[],
6568 int DCCYMaxUncompressedBlock[],
6569 int DCCCMaxUncompressedBlock[],
6572 bool WritebackEnable[],
6573 double ReadBandwidthPlaneLuma[],
6574 double ReadBandwidthPlaneChroma[],
6575 double meta_row_bw[],
6576 double dpte_row_bw[],
6577 double *StutterEfficiencyNotIncludingVBlank,
6578 double *StutterEfficiency,
6579 int *NumberOfStutterBurstsPerFrame,
6580 double *Z8StutterEfficiencyNotIncludingVBlank,
6581 double *Z8StutterEfficiency,
6582 int *Z8NumberOfStutterBurstsPerFrame,
6583 double *StutterPeriod)
6585 struct vba_vars_st *v = &mode_lib->vba;
6587 double DETBufferingTimeY;
6588 double SwathWidthYCriticalPlane = 0;
6589 double VActiveTimeCriticalPlane = 0;
6590 double FrameTimeCriticalPlane = 0;
6591 int BytePerPixelYCriticalPlane = 0;
6592 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6593 double MinTTUVBlankCriticalPlane = 0;
6594 double TotalCompressedReadBandwidth;
6595 double TotalRowReadBandwidth;
6596 double AverageDCCCompressionRate;
6597 double EffectiveCompressedBufferSize;
6598 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6599 double StutterBurstTime;
6600 int TotalActiveWriteback;
6602 double LinesInDETYRoundedDownToSwath;
6603 double MaximumEffectiveCompressionLuma;
6604 double MaximumEffectiveCompressionChroma;
6605 double TotalZeroSizeRequestReadBandwidth;
6606 double TotalZeroSizeCompressedReadBandwidth;
6607 double AverageDCCZeroSizeFraction;
6608 double AverageZeroSizeCompressionRate;
6609 int TotalNumberOfActiveOTG = 0;
6610 double LastStutterPeriod = 0.0;
6611 double LastZ8StutterPeriod = 0.0;
6614 TotalZeroSizeRequestReadBandwidth = 0;
6615 TotalZeroSizeCompressedReadBandwidth = 0;
6616 TotalRowReadBandwidth = 0;
6617 TotalCompressedReadBandwidth = 0;
6619 for (k = 0; k < NumberOfActivePlanes; ++k) {
6620 if (DCCEnable[k] == true) {
6621 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6622 || DCCYMaxUncompressedBlock[k] < 256) {
6623 MaximumEffectiveCompressionLuma = 2;
6625 MaximumEffectiveCompressionLuma = 4;
6627 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6628 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6629 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6630 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6631 if (ReadBandwidthPlaneChroma[k] > 0) {
6632 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6633 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6634 MaximumEffectiveCompressionChroma = 2;
6636 MaximumEffectiveCompressionChroma = 4;
6638 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6639 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6640 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6641 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6642 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6645 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6647 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6650 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6651 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6653 #ifdef __DML_VBA_DEBUG__
6654 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6655 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6656 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6657 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6658 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6659 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6660 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6661 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6664 if (AverageDCCZeroSizeFraction == 1) {
6665 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6666 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6667 } else if (AverageDCCZeroSizeFraction > 0) {
6668 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6669 EffectiveCompressedBufferSize = dml_min(
6670 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6671 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6672 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6673 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6674 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6676 "DML::%s: min 2 = %f\n",
6678 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6679 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6680 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6682 EffectiveCompressedBufferSize = dml_min(
6683 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6684 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6685 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6686 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6689 #ifdef __DML_VBA_DEBUG__
6690 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6691 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6692 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6696 for (k = 0; k < NumberOfActivePlanes; ++k) {
6697 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6698 / BytePerPixelDETY[k] / SwathWidthY[k];
6699 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6700 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6701 #ifdef __DML_VBA_DEBUG__
6702 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6703 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6704 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6705 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6706 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6707 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6708 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6709 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6710 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6711 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6712 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6713 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6716 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6717 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6719 *StutterPeriod = DETBufferingTimeY;
6720 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6721 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6722 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6723 SwathWidthYCriticalPlane = SwathWidthY[k];
6724 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6725 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6727 #ifdef __DML_VBA_DEBUG__
6728 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6729 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6730 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6731 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6732 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6733 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6734 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6739 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6740 #ifdef __DML_VBA_DEBUG__
6741 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6742 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6743 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6744 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6745 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6746 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6747 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6748 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6749 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6750 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6753 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6754 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6755 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6756 #ifdef __DML_VBA_DEBUG__
6757 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6758 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6759 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6760 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6761 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6763 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6766 "DML::%s: Time to finish residue swath=%f\n",
6768 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6770 TotalActiveWriteback = 0;
6771 for (k = 0; k < NumberOfActivePlanes; ++k) {
6772 if (WritebackEnable[k]) {
6773 TotalActiveWriteback = TotalActiveWriteback + 1;
6777 if (TotalActiveWriteback == 0) {
6778 #ifdef __DML_VBA_DEBUG__
6779 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6780 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6781 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6782 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6784 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6785 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6786 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6787 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6789 *StutterEfficiencyNotIncludingVBlank = 0.;
6790 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6791 *NumberOfStutterBurstsPerFrame = 0;
6792 *Z8NumberOfStutterBurstsPerFrame = 0;
6794 #ifdef __DML_VBA_DEBUG__
6795 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6796 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6797 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6798 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6799 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6802 for (k = 0; k < NumberOfActivePlanes; ++k) {
6803 if (v->BlendingAndTiming[k] == k) {
6804 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6808 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6809 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6811 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6812 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6813 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6815 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6818 *StutterEfficiency = 0;
6821 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6822 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6823 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6824 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6825 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6827 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6830 *Z8StutterEfficiency = 0.;
6833 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6834 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6835 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6836 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6837 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6838 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6839 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6840 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6843 static void CalculateSwathAndDETConfiguration(
6844 bool ForceSingleDPP,
6845 int NumberOfActivePlanes,
6846 unsigned int DETBufferSizeInKByte,
6847 double MaximumSwathWidthLuma[],
6848 double MaximumSwathWidthChroma[],
6849 enum scan_direction_class SourceScan[],
6850 enum source_format_class SourcePixelFormat[],
6851 enum dm_swizzle_mode SurfaceTiling[],
6852 int ViewportWidth[],
6853 int ViewportHeight[],
6854 int SurfaceWidthY[],
6855 int SurfaceWidthC[],
6856 int SurfaceHeightY[],
6857 int SurfaceHeightC[],
6858 int Read256BytesBlockHeightY[],
6859 int Read256BytesBlockHeightC[],
6860 int Read256BytesBlockWidthY[],
6861 int Read256BytesBlockWidthC[],
6862 enum odm_combine_mode ODMCombineEnabled[],
6863 int BlendingAndTiming[],
6866 double BytePerPixDETY[],
6867 double BytePerPixDETC[],
6870 double HRatioChroma[],
6872 int swath_width_luma_ub[],
6873 int swath_width_chroma_ub[],
6874 double SwathWidth[],
6875 double SwathWidthChroma[],
6878 unsigned int DETBufferSizeY[],
6879 unsigned int DETBufferSizeC[],
6880 bool ViewportSizeSupportPerPlane[],
6881 bool *ViewportSizeSupport)
6883 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6884 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6885 int MinimumSwathHeightY;
6886 int MinimumSwathHeightC;
6887 int RoundedUpMaxSwathSizeBytesY;
6888 int RoundedUpMaxSwathSizeBytesC;
6889 int RoundedUpMinSwathSizeBytesY;
6890 int RoundedUpMinSwathSizeBytesC;
6891 int RoundedUpSwathSizeBytesY;
6892 int RoundedUpSwathSizeBytesC;
6893 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6894 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6897 CalculateSwathWidth(
6899 NumberOfActivePlanes,
6911 Read256BytesBlockHeightY,
6912 Read256BytesBlockHeightC,
6913 Read256BytesBlockWidthY,
6914 Read256BytesBlockWidthC,
6919 SwathWidthSingleDPP,
6920 SwathWidthSingleDPPChroma,
6923 MaximumSwathHeightY,
6924 MaximumSwathHeightC,
6925 swath_width_luma_ub,
6926 swath_width_chroma_ub);
6928 *ViewportSizeSupport = true;
6929 for (k = 0; k < NumberOfActivePlanes; ++k) {
6930 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6931 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6932 if (SurfaceTiling[k] == dm_sw_linear
6933 || (SourcePixelFormat[k] == dm_444_64
6934 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6935 && SourceScan[k] != dm_vert)) {
6936 MinimumSwathHeightY = MaximumSwathHeightY[k];
6937 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6938 MinimumSwathHeightY = MaximumSwathHeightY[k];
6940 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6942 MinimumSwathHeightC = MaximumSwathHeightC[k];
6944 if (SurfaceTiling[k] == dm_sw_linear) {
6945 MinimumSwathHeightY = MaximumSwathHeightY[k];
6946 MinimumSwathHeightC = MaximumSwathHeightC[k];
6947 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6948 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6949 MinimumSwathHeightC = MaximumSwathHeightC[k];
6950 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6951 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6952 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6953 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6954 MinimumSwathHeightY = MaximumSwathHeightY[k];
6955 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6957 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6958 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6962 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6963 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6964 if (SourcePixelFormat[k] == dm_420_10) {
6965 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6966 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6968 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6969 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6970 if (SourcePixelFormat[k] == dm_420_10) {
6971 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6972 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6975 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6976 SwathHeightY[k] = MaximumSwathHeightY[k];
6977 SwathHeightC[k] = MaximumSwathHeightC[k];
6978 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6979 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6980 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6981 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6982 SwathHeightY[k] = MinimumSwathHeightY;
6983 SwathHeightC[k] = MaximumSwathHeightC[k];
6984 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6985 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6986 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6987 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6988 SwathHeightY[k] = MaximumSwathHeightY[k];
6989 SwathHeightC[k] = MinimumSwathHeightC;
6990 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6991 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6993 SwathHeightY[k] = MinimumSwathHeightY;
6994 SwathHeightC[k] = MinimumSwathHeightC;
6995 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6996 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6999 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7000 if (SwathHeightC[k] == 0) {
7001 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
7002 DETBufferSizeC[k] = 0;
7003 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
7004 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
7005 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
7007 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
7008 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
7011 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
7012 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
7013 *ViewportSizeSupport = false;
7014 ViewportSizeSupportPerPlane[k] = false;
7016 ViewportSizeSupportPerPlane[k] = true;
7022 static void CalculateSwathWidth(
7023 bool ForceSingleDPP,
7024 int NumberOfActivePlanes,
7025 enum source_format_class SourcePixelFormat[],
7026 enum scan_direction_class SourceScan[],
7027 int ViewportWidth[],
7028 int ViewportHeight[],
7029 int SurfaceWidthY[],
7030 int SurfaceWidthC[],
7031 int SurfaceHeightY[],
7032 int SurfaceHeightC[],
7033 enum odm_combine_mode ODMCombineEnabled[],
7036 int Read256BytesBlockHeightY[],
7037 int Read256BytesBlockHeightC[],
7038 int Read256BytesBlockWidthY[],
7039 int Read256BytesBlockWidthC[],
7040 int BlendingAndTiming[],
7044 double SwathWidthSingleDPPY[],
7045 double SwathWidthSingleDPPC[],
7046 double SwathWidthY[],
7047 double SwathWidthC[],
7048 int MaximumSwathHeightY[],
7049 int MaximumSwathHeightC[],
7050 int swath_width_luma_ub[],
7051 int swath_width_chroma_ub[])
7053 enum odm_combine_mode MainPlaneODMCombine;
7056 #ifdef __DML_VBA_DEBUG__
7057 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
7060 for (k = 0; k < NumberOfActivePlanes; ++k) {
7061 if (SourceScan[k] != dm_vert) {
7062 SwathWidthSingleDPPY[k] = ViewportWidth[k];
7064 SwathWidthSingleDPPY[k] = ViewportHeight[k];
7067 #ifdef __DML_VBA_DEBUG__
7068 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
7069 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
7072 MainPlaneODMCombine = ODMCombineEnabled[k];
7073 for (j = 0; j < NumberOfActivePlanes; ++j) {
7074 if (BlendingAndTiming[k] == j) {
7075 MainPlaneODMCombine = ODMCombineEnabled[j];
7079 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
7080 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
7081 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
7082 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
7083 } else if (DPPPerPlane[k] == 2) {
7084 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
7086 SwathWidthY[k] = SwathWidthSingleDPPY[k];
7089 #ifdef __DML_VBA_DEBUG__
7090 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
7091 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
7094 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
7095 SwathWidthC[k] = SwathWidthY[k] / 2;
7096 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
7098 SwathWidthC[k] = SwathWidthY[k];
7099 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
7102 if (ForceSingleDPP == true) {
7103 SwathWidthY[k] = SwathWidthSingleDPPY[k];
7104 SwathWidthC[k] = SwathWidthSingleDPPC[k];
7107 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
7108 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
7109 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
7110 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
7112 #ifdef __DML_VBA_DEBUG__
7113 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
7116 if (SourceScan[k] != dm_vert) {
7117 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
7118 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
7119 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
7120 if (BytePerPixC[k] > 0) {
7121 swath_width_chroma_ub[k] = dml_min(
7123 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
7125 swath_width_chroma_ub[k] = 0;
7128 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
7129 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
7130 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
7131 if (BytePerPixC[k] > 0) {
7132 swath_width_chroma_ub[k] = dml_min(
7133 surface_height_ub_c,
7134 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
7136 swath_width_chroma_ub[k] = 0;
7143 static double CalculateExtraLatency(
7144 int RoundTripPingLatencyCycles,
7145 int ReorderingBytes,
7147 int TotalNumberOfActiveDPP,
7148 int PixelChunkSizeInKByte,
7149 int TotalNumberOfDCCActiveDPP,
7154 int NumberOfActivePlanes,
7156 int dpte_group_bytes[],
7157 double HostVMInefficiencyFactor,
7158 double HostVMMinPageSize,
7159 int HostVMMaxNonCachedPageTableLevels)
7161 double ExtraLatencyBytes;
7162 double ExtraLatency;
7164 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7166 TotalNumberOfActiveDPP,
7167 PixelChunkSizeInKByte,
7168 TotalNumberOfDCCActiveDPP,
7172 NumberOfActivePlanes,
7175 HostVMInefficiencyFactor,
7177 HostVMMaxNonCachedPageTableLevels);
7179 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7181 #ifdef __DML_VBA_DEBUG__
7182 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7183 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7184 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7185 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7186 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7189 return ExtraLatency;
7192 static double CalculateExtraLatencyBytes(
7193 int ReorderingBytes,
7194 int TotalNumberOfActiveDPP,
7195 int PixelChunkSizeInKByte,
7196 int TotalNumberOfDCCActiveDPP,
7200 int NumberOfActivePlanes,
7202 int dpte_group_bytes[],
7203 double HostVMInefficiencyFactor,
7204 double HostVMMinPageSize,
7205 int HostVMMaxNonCachedPageTableLevels)
7208 int HostVMDynamicLevels = 0, k;
7210 if (GPUVMEnable == true && HostVMEnable == true) {
7211 if (HostVMMinPageSize < 2048) {
7212 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7213 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
7214 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7216 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7219 HostVMDynamicLevels = 0;
7222 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7224 if (GPUVMEnable == true) {
7225 for (k = 0; k < NumberOfActivePlanes; ++k) {
7226 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7232 static double CalculateUrgentLatency(
7233 double UrgentLatencyPixelDataOnly,
7234 double UrgentLatencyPixelMixedWithVMData,
7235 double UrgentLatencyVMDataOnly,
7236 bool DoUrgentLatencyAdjustment,
7237 double UrgentLatencyAdjustmentFabricClockComponent,
7238 double UrgentLatencyAdjustmentFabricClockReference,
7243 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7244 if (DoUrgentLatencyAdjustment == true) {
7245 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7250 static void UseMinimumDCFCLK(
7251 struct display_mode_lib *mode_lib,
7252 int MaxInterDCNTileRepeaters,
7253 int MaxPrefetchMode,
7254 double FinalDRAMClockChangeLatency,
7255 double SREnterPlusExitTime,
7257 int RoundTripPingLatencyCycles,
7258 int ReorderingBytes,
7259 int PixelChunkSizeInKByte,
7262 int GPUVMMaxPageTableLevels,
7264 int NumberOfActivePlanes,
7265 double HostVMMinPageSize,
7266 int HostVMMaxNonCachedPageTableLevels,
7267 bool DynamicMetadataVMEnabled,
7268 enum immediate_flip_requirement ImmediateFlipRequirement,
7269 bool ProgressiveToInterlaceUnitInOPP,
7270 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
7271 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
7274 int DynamicMetadataTransmittedBytes[],
7275 int DynamicMetadataLinesBeforeActiveRequired[],
7277 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
7278 double RequiredDISPCLK[][2],
7279 double UrgLatency[],
7280 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
7281 double ProjectedDCFCLKDeepSleep[][2],
7282 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
7283 double TotalVActivePixelBandwidth[][2],
7284 double TotalVActiveCursorBandwidth[][2],
7285 double TotalMetaRowBandwidth[][2],
7286 double TotalDPTERowBandwidth[][2],
7287 unsigned int TotalNumberOfActiveDPP[][2],
7288 unsigned int TotalNumberOfDCCActiveDPP[][2],
7289 int dpte_group_bytes[],
7290 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
7291 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
7292 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
7293 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
7294 int BytePerPixelY[],
7295 int BytePerPixelC[],
7297 double PixelClock[],
7298 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
7299 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
7300 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
7301 bool DynamicMetadataEnable[],
7302 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
7303 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
7304 double ReadBandwidthLuma[],
7305 double ReadBandwidthChroma[],
7306 double DCFCLKPerState[],
7307 double DCFCLKState[][2])
7309 struct vba_vars_st *v = &mode_lib->vba;
7310 int dummy1, i, j, k;
7311 double NormalEfficiency, dummy2, dummy3;
7312 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7314 NormalEfficiency = PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7315 for (i = 0; i < v->soc.num_states; ++i) {
7316 for (j = 0; j <= 1; ++j) {
7317 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7318 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7319 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7320 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7321 double MinimumTWait;
7322 double NonDPTEBandwidth;
7323 double DPTEBandwidth;
7324 double DCFCLKRequiredForAverageBandwidth;
7325 double ExtraLatencyBytes;
7326 double ExtraLatencyCycles;
7327 double DCFCLKRequiredForPeakBandwidth;
7328 int NoOfDPPState[DC__NUM_DPP__MAX];
7329 double MinimumTvmPlus2Tr0;
7331 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7332 for (k = 0; k < NumberOfActivePlanes; ++k) {
7333 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7334 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
7337 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
7338 NoOfDPPState[k] = NoOfDPP[i][j][k];
7341 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
7342 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
7343 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
7344 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
7345 DCFCLKRequiredForAverageBandwidth = dml_max3(
7346 ProjectedDCFCLKDeepSleep[i][j],
7347 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth
7348 / (MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7349 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / ReturnBusWidth);
7351 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7353 TotalNumberOfActiveDPP[i][j],
7354 PixelChunkSizeInKByte,
7355 TotalNumberOfDCCActiveDPP[i][j],
7359 NumberOfActivePlanes,
7364 HostVMMaxNonCachedPageTableLevels);
7365 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
7366 for (k = 0; k < NumberOfActivePlanes; ++k) {
7367 double DCFCLKCyclesRequiredInPrefetch;
7368 double ExpectedPrefetchBWAcceleration;
7369 double PrefetchTime;
7371 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
7372 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
7373 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7374 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7375 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth
7376 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7377 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
7378 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k])
7379 / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
7380 DynamicMetadataVMExtraLatency[k] =
7381 (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
7382 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7383 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait
7385 * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : GPUVMMaxPageTableLevels - 2)
7386 * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7387 - DynamicMetadataVMExtraLatency[k];
7389 if (PrefetchTime > 0) {
7390 double ExpectedVRatioPrefetch;
7391 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7392 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7393 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7394 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7395 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
7396 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7397 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / ReturnBusWidth;
7400 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
7402 if (DynamicMetadataEnable[k] == true) {
7407 double AllowedTimeForUrgentExtraLatency;
7409 CalculateVupdateAndDynamicMetadataParameters(
7410 MaxInterDCNTileRepeaters,
7411 RequiredDPPCLK[i][j][k],
7412 RequiredDISPCLK[i][j],
7413 ProjectedDCFCLKDeepSleep[i][j],
7416 VTotal[k] - VActive[k],
7417 DynamicMetadataTransmittedBytes[k],
7418 DynamicMetadataLinesBeforeActiveRequired[k],
7420 ProgressiveToInterlaceUnitInOPP,
7428 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7429 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7430 if (AllowedTimeForUrgentExtraLatency > 0) {
7431 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7432 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7433 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7435 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
7439 DCFCLKRequiredForPeakBandwidth = 0;
7440 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
7441 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7443 MinimumTvmPlus2Tr0 = UrgLatency[i]
7444 * (GPUVMEnable == true ?
7445 (HostVMEnable == true ?
7446 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) :
7448 for (k = 0; k < NumberOfActivePlanes; ++k) {
7449 double MaximumTvmPlus2Tr0PlusTsw;
7450 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7451 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7452 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
7454 DCFCLKRequiredForPeakBandwidth = dml_max3(
7455 DCFCLKRequiredForPeakBandwidth,
7456 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7457 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7460 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7465 static void CalculateUnboundedRequestAndCompressedBufferSize(
7466 unsigned int DETBufferSizeInKByte,
7467 int ConfigReturnBufferSizeInKByte,
7468 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7470 bool NoChromaPlanes,
7472 int CompressedBufferSegmentSizeInkByteFinal,
7473 enum output_encoder_class *Output,
7474 bool *UnboundedRequestEnabled,
7475 int *CompressedBufferSizeInkByte)
7477 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7479 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7480 *CompressedBufferSizeInkByte = (
7481 *UnboundedRequestEnabled == true ?
7482 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7483 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7484 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7486 #ifdef __DML_VBA_DEBUG__
7487 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7488 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7489 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7490 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7491 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7492 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7493 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7497 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7499 bool ret_val = false;
7501 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7502 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {
7508 #endif /* CONFIG_DRM_AMD_DC_DCN3_1 */