From: Alvin Lee Date: Mon, 29 Aug 2022 14:33:21 +0000 (-0400) Subject: drm/amd/display: Fix pipe split prediction X-Git-Tag: microblaze-v6.2~40^2~8^2~72 X-Git-Url: http://git.monstr.eu/?a=commitdiff_plain;h=064841347d75e094fa2bcb5b997639ec9e5f9a5a;p=linux-2.6-microblaze.git drm/amd/display: Fix pipe split prediction [Why & How] - Pipe split prediction previously only took into account MPC split. We must also consider when ODM combine is required, and when we apply ODM combine by policy. - Also re-work DET allocation function as it wasn't properly splitting the DET per stream, per plane. Reviewed-by: Jun Lei Acked-by: Wayne Lin Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 165cae0ae704..ac0fedfa7bc1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1850,12 +1850,36 @@ int dcn32_populate_dml_pipes_from_context( int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; struct pipe_ctx *pipe; - bool subvp_in_use = false, is_pipe_split_expected[MAX_PIPES]; + bool subvp_in_use = false; + uint8_t is_pipe_split_expected[MAX_PIPES] = {0}; int plane_count = 0; struct dc_crtc_timing *timing; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + /* Determine whether we will apply ODM 2to1 policy: + * Applies to single display and where the number of planes is less than 3. + * For 3 plane case ( 2 MPO planes ), we will not set the policy for the MPO pipes. + * + * Apply pipe split policy first so we can predict the pipe split correctly + * (dcn32_predict_pipe_split). + */ + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + if (context->stream_count == 1 && !dc_is_hdmi_signal(res_ctx->pipe_ctx[i].stream->signal)) { + if (dc->debug.enable_single_display_2to1_odm_policy) { + if (!((plane_count > 2) && pipe->top_pipe)) + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; + } + } + pipe_cnt++; + } + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { if (!res_ctx->pipe_ctx[i].stream) @@ -1916,32 +1940,12 @@ int dcn32_populate_dml_pipes_from_context( ++plane_count; DC_FP_START(); - is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, pipes[i].pipe, i); + is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, &pipes[pipe_cnt]); DC_FP_END(); pipe_cnt++; } - /* Determine whether we will apply ODM 2to1 policy - * Applies to single display and where the number of planes is less than 3 - * For 3 plane case ( 2 MPO planes ), we will not set the policy for the MPO pipes - */ - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - if (!res_ctx->pipe_ctx[i].stream) - continue; - pipe = &res_ctx->pipe_ctx[i]; - timing = &pipe->stream->timing; - - pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; - if (context->stream_count == 1 && !dc_is_hdmi_signal(res_ctx->pipe_ctx[i].stream->signal)) { - if (dc->debug.enable_single_display_2to1_odm_policy) { - if (!((plane_count > 2) && pipe->top_pipe)) - pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; - } - } - pipe_cnt++; - } - /* For DET allocation, we don't want to use DML policy (not optimal for utilizing all * the DET available for each pipe). Use the DET override input to maintain our driver * policy. @@ -1958,7 +1962,7 @@ int dcn32_populate_dml_pipes_from_context( } } } else - dcn32_determine_det_override(context, pipes, is_pipe_split_expected, dc->res_pool->pipe_count); + dcn32_determine_det_override(dc, context, pipes, is_pipe_split_expected); // In general cases we want to keep the dram clock change requirement // (prefer configs that support MCLK switch). Only override to false diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 95f3517724d1..1039df3f9565 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -109,8 +109,10 @@ struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( struct dc_stream_state *stream, struct pipe_ctx *head_pipe); -void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool *is_pipe_split_expected, int pipe_cnt); +void dcn32_determine_det_override(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + uint8_t *is_pipe_split_expected); /* definitions for run time init of reg offsets */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 417dfdcf9596..237f71d9304e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -225,36 +225,106 @@ bool dcn32_mpo_in_use(struct dc_state *context) return false; } -void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool *is_pipe_split_expected, int pipe_cnt) +/** + * ******************************************************************************************* + * dcn32_determine_det_override: Determine DET allocation for each pipe + * + * This function determines how much DET to allocate for each pipe. The total number of + * DET segments will be split equally among each of the streams, and after that the DET + * segments per stream will be split equally among the planes for the given stream. + * + * If there is a plane that's driven by more than 1 pipe (i.e. pipe split), then the + * number of DET for that given plane will be split among the pipes driving that plane. + * + * The pipe split prediction (is_pipe_split_expected) has to work 100% of the time in + * order for this function to work properly. + * + * High level algorithm: + * 1. Split total DET among number of streams + * 2. For each stream, split DET among the planes + * 3. For each plane, check if pipe split is expected. If yes, split the DET for that plane + * among the number of splits we expect (i.e. 2 [2:1] or 4 [4:1]) + * - NOTE: Make sure not to double count the pipe splits (i.e. the pipes could + * already be split in the context). + * 4. Assign the DET override to the DML pipes. + * + * @param [in]: dc: Current DC state + * @param [in]: context: New DC state to be programmed + * @param [in]: pipes: Array of DML pipes + * @param [in]: is_pipe_split_expected: Array indicating pipe split prediction for each pipe + * + * @return: void + * + * ******************************************************************************************* + */ +void dcn32_determine_det_override(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + uint8_t *is_pipe_split_expected) { - int i, j, count, stream_segments, pipe_segments[MAX_PIPES]; + uint8_t i, j, pipe_plane_count, stream_segments, plane_segments, pipe_segments[MAX_PIPES] = {0}; + uint8_t pipe_counted[MAX_PIPES] = {0}; + uint8_t pipe_cnt = 0; + struct dc_plane_state *current_plane = NULL; + struct pipe_ctx *next_odm_pipe = NULL; + struct pipe_ctx *bottom_pipe = NULL; if (context->stream_count > 0) { stream_segments = 18 / context->stream_count; for (i = 0; i < context->stream_count; i++) { - count = 0; - for (j = 0; j < pipe_cnt; j++) { - if (context->res_ctx.pipe_ctx[j].stream == context->streams[i]) { - count++; - if (is_pipe_split_expected[j]) - count++; + if (context->stream_status[i].plane_count > 0) + plane_segments = stream_segments / context->stream_status[i].plane_count; + else + plane_segments = stream_segments; + for (j = 0; j < dc->res_pool->pipe_count; j++) { + pipe_plane_count = 0; + if (context->res_ctx.pipe_ctx[j].stream == context->streams[i] && + pipe_counted[j] != 1) { + /* Note: pipe_plane_count indicates the number of pipes to be used for a + * given plane. e.g. pipe_plane_count = 1 means single pipe (i.e. not split), + * pipe_plane_count = 2 means 2:1 split, etc. + */ + pipe_plane_count++; + pipe_counted[j] = 1; + current_plane = context->res_ctx.pipe_ctx[j].plane_state; + if (is_pipe_split_expected[j] != 0) { + pipe_plane_count += is_pipe_split_expected[j]; + + next_odm_pipe = context->res_ctx.pipe_ctx[j].next_odm_pipe; + bottom_pipe = context->res_ctx.pipe_ctx[j].bottom_pipe; + + /* If pipe already happens to be split in context, mark as already + * counted so we don't double count the pipe split. + */ + while (next_odm_pipe) { + if (next_odm_pipe->plane_state == current_plane) { + pipe_counted[next_odm_pipe->pipe_idx] = 1; + pipe_segments[next_odm_pipe->pipe_idx] = plane_segments / pipe_plane_count; + } + next_odm_pipe = next_odm_pipe->next_odm_pipe; + } + + while (bottom_pipe) { + if (bottom_pipe->plane_state == current_plane) { + pipe_counted[bottom_pipe->pipe_idx] = 1; + pipe_segments[bottom_pipe->pipe_idx] = plane_segments / pipe_plane_count; + } + bottom_pipe = bottom_pipe->bottom_pipe; + } + } + pipe_segments[j] = plane_segments / pipe_plane_count; } } - pipe_segments[i] = stream_segments / count; } - for (i = 0; i < pipe_cnt; i++) { - pipes[i].pipe.src.det_size_override = 0; - for (j = 0; j < context->stream_count; j++) { - if (context->res_ctx.pipe_ctx[i].stream == context->streams[j]) { - pipes[i].pipe.src.det_size_override = pipe_segments[j] * DCN3_2_DET_SEG_SIZE; - break; - } - } + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + pipes[pipe_cnt].pipe.src.det_size_override = pipe_segments[i] * DCN3_2_DET_SEG_SIZE; + pipe_cnt++; } } else { - for (i = 0; i < pipe_cnt; i++) + for (i = 0; i < dc->res_pool->pipe_count; i++) pipes[i].pipe.src.det_size_override = 4 * DCN3_2_DET_SEG_SIZE; //DCN3_2_DEFAULT_DET_SIZE } } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 7f6c977c4981..2b3ffa300f25 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -286,41 +286,92 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, } } -bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index) +/** + * ******************************************************************************************* + * dcn32_predict_pipe_split: Predict if pipe split will occur for a given DML pipe + * + * This function takes in a DML pipe (pipe_e2e) and predicts if pipe split is required (both + * ODM and MPC). For pipe split, ODM combine is determined by the ODM mode, and MPC combine is + * determined by DPPClk requirements + * + * This function follows the same policy as DML: + * - Check for ODM combine requirements / policy first + * - MPC combine is only chosen if there is no ODM combine requirements / policy in place, and + * MPC is required + * + * @param [in]: context: New DC state to be programmed + * @param [in]: pipe_e2e: DML pipe end to end context + * + * @return: Number of splits expected (1 for 2:1 split, 3 for 4:1 split, 0 for no splits). + * + * ******************************************************************************************* + */ +uint8_t dcn32_predict_pipe_split(struct dc_state *context, + display_e2e_pipe_params_st *pipe_e2e) { double pscl_throughput; double pscl_throughput_chroma; double dpp_clk_single_dpp, clock; double clk_frequency = 0.0; double vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz; + bool total_available_pipes_support = false; + uint32_t number_of_dpp = 0; + enum odm_combine_mode odm_mode = dm_odm_combine_mode_disabled; + double req_dispclk_per_surface = 0; + uint8_t num_splits = 0; dc_assert_fp_enabled(); - dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe.scale_ratio_depth.hscl_ratio, - pipe.scale_ratio_depth.hscl_ratio_c, - pipe.scale_ratio_depth.vscl_ratio, - pipe.scale_ratio_depth.vscl_ratio_c, - context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk, - context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk, - pipe.dest.pixel_rate_mhz, - pipe.src.source_format, - pipe.scale_taps.htaps, - pipe.scale_taps.htaps_c, - pipe.scale_taps.vtaps, - pipe.scale_taps.vtaps_c, - /* Output */ - &pscl_throughput, &pscl_throughput_chroma, - &dpp_clk_single_dpp); + dml32_CalculateODMMode(context->bw_ctx.dml.ip.maximum_pixels_per_line_per_dsc_unit, + pipe_e2e->pipe.dest.hactive, + pipe_e2e->dout.output_format, + pipe_e2e->dout.output_type, + pipe_e2e->pipe.dest.odm_combine_policy, + context->bw_ctx.dml.soc.clock_limits[context->bw_ctx.dml.soc.num_states - 1].dispclk_mhz, + context->bw_ctx.dml.soc.clock_limits[context->bw_ctx.dml.soc.num_states - 1].dispclk_mhz, + pipe_e2e->dout.dsc_enable != 0, + 0, /* TotalNumberOfActiveDPP can be 0 since we're predicting pipe split requirement */ + context->bw_ctx.dml.ip.max_num_dpp, + pipe_e2e->pipe.dest.pixel_rate_mhz, + context->bw_ctx.dml.soc.dcn_downspread_percent, + context->bw_ctx.dml.ip.dispclk_ramp_margin_percent, + context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz, + pipe_e2e->dout.dsc_slices, + /* Output */ + &total_available_pipes_support, + &number_of_dpp, + &odm_mode, + &req_dispclk_per_surface); + + dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe_e2e->pipe.scale_ratio_depth.hscl_ratio, + pipe_e2e->pipe.scale_ratio_depth.hscl_ratio_c, + pipe_e2e->pipe.scale_ratio_depth.vscl_ratio, + pipe_e2e->pipe.scale_ratio_depth.vscl_ratio_c, + context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk, + context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk, + pipe_e2e->pipe.dest.pixel_rate_mhz, + pipe_e2e->pipe.src.source_format, + pipe_e2e->pipe.scale_taps.htaps, + pipe_e2e->pipe.scale_taps.htaps_c, + pipe_e2e->pipe.scale_taps.vtaps, + pipe_e2e->pipe.scale_taps.vtaps_c, + /* Output */ + &pscl_throughput, &pscl_throughput_chroma, + &dpp_clk_single_dpp); clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100); if (clock > 0) - clk_frequency = vco_speed * 4.0 / ((int)(vco_speed * 4.0)); + clk_frequency = vco_speed * 4.0 / ((int)(vco_speed * 4.0) / clock); - if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[index].dppclk_mhz) - return true; - else - return false; + if (odm_mode == dm_odm_combine_mode_2to1) + num_splits = 1; + else if (odm_mode == dm_odm_combine_mode_4to1) + num_splits = 3; + else if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[context->bw_ctx.dml.soc.num_states - 1].dppclk_mhz) + num_splits = 1; + + return num_splits; } static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 3ed06ab855be..ce4c33e64186 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -41,9 +41,8 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, display_e2e_pipe_params_st *pipes, int pipe_cnt); -bool dcn32_predict_pipe_split(struct dc_state *context, - display_pipe_params_st pipe, - int index); +uint8_t dcn32_predict_pipe_split(struct dc_state *context, + display_e2e_pipe_params_st *pipe_e2e); void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,