Merge tag 'drm-misc-next-fixes-2021-09-09' of git://anongit.freedesktop.org/drm/drm...
[linux-2.6-microblaze.git] / drivers / gpu / drm / tegra / dc.c
index 51bbbc4..16c7aab 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/iommu.h>
+#include <linux/interconnect.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/pm_runtime.h>
@@ -618,9 +619,14 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
        struct tegra_dc *dc = to_tegra_dc(new_plane_state->crtc);
        int err;
 
+       plane_state->peak_memory_bandwidth = 0;
+       plane_state->avg_memory_bandwidth = 0;
+
        /* no need for further checks if the plane is being disabled */
-       if (!new_plane_state->crtc)
+       if (!new_plane_state->crtc) {
+               plane_state->total_peak_memory_bandwidth = 0;
                return 0;
+       }
 
        err = tegra_plane_format(new_plane_state->fb->format->format,
                                 &plane_state->format,
@@ -808,6 +814,12 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm,
        formats = dc->soc->primary_formats;
        modifiers = dc->soc->modifiers;
 
+       err = tegra_plane_interconnect_init(plane);
+       if (err) {
+               kfree(plane);
+               return ERR_PTR(err);
+       }
+
        err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
                                       &tegra_plane_funcs, formats,
                                       num_formats, modifiers, type, NULL);
@@ -845,12 +857,18 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane,
 {
        struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
                                                                                 plane);
+       struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state);
        struct tegra_plane *tegra = to_tegra_plane(plane);
        int err;
 
+       plane_state->peak_memory_bandwidth = 0;
+       plane_state->avg_memory_bandwidth = 0;
+
        /* no need for further checks if the plane is being disabled */
-       if (!new_plane_state->crtc)
+       if (!new_plane_state->crtc) {
+               plane_state->total_peak_memory_bandwidth = 0;
                return 0;
+       }
 
        /* scaling not supported for cursor */
        if ((new_plane_state->src_w >> 16 != new_plane_state->crtc_w) ||
@@ -1030,6 +1048,12 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm,
        if (!dc->soc->has_nvdisplay) {
                num_formats = ARRAY_SIZE(tegra_legacy_cursor_plane_formats);
                formats = tegra_legacy_cursor_plane_formats;
+
+               err = tegra_plane_interconnect_init(plane);
+               if (err) {
+                       kfree(plane);
+                       return ERR_PTR(err);
+               }
        } else {
                num_formats = ARRAY_SIZE(tegra_cursor_plane_formats);
                formats = tegra_cursor_plane_formats;
@@ -1149,6 +1173,12 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
        num_formats = dc->soc->num_overlay_formats;
        formats = dc->soc->overlay_formats;
 
+       err = tegra_plane_interconnect_init(plane);
+       if (err) {
+               kfree(plane);
+               return ERR_PTR(err);
+       }
+
        if (!cursor)
                type = DRM_PLANE_TYPE_OVERLAY;
        else
@@ -1572,6 +1602,11 @@ static int tegra_dc_show_stats(struct seq_file *s, void *data)
        seq_printf(s, "underflow: %lu\n", dc->stats.underflow);
        seq_printf(s, "overflow: %lu\n", dc->stats.overflow);
 
+       seq_printf(s, "frames total: %lu\n", dc->stats.frames_total);
+       seq_printf(s, "vblank total: %lu\n", dc->stats.vblank_total);
+       seq_printf(s, "underflow total: %lu\n", dc->stats.underflow_total);
+       seq_printf(s, "overflow total: %lu\n", dc->stats.overflow_total);
+
        return 0;
 }
 
@@ -1804,6 +1839,106 @@ static int tegra_dc_wait_idle(struct tegra_dc *dc, unsigned long timeout)
        return -ETIMEDOUT;
 }
 
+static void
+tegra_crtc_update_memory_bandwidth(struct drm_crtc *crtc,
+                                  struct drm_atomic_state *state,
+                                  bool prepare_bandwidth_transition)
+{
+       const struct tegra_plane_state *old_tegra_state, *new_tegra_state;
+       const struct tegra_dc_state *old_dc_state, *new_dc_state;
+       u32 i, new_avg_bw, old_avg_bw, new_peak_bw, old_peak_bw;
+       const struct drm_plane_state *old_plane_state;
+       const struct drm_crtc_state *old_crtc_state;
+       struct tegra_dc_window window, old_window;
+       struct tegra_dc *dc = to_tegra_dc(crtc);
+       struct tegra_plane *tegra;
+       struct drm_plane *plane;
+
+       if (dc->soc->has_nvdisplay)
+               return;
+
+       old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc);
+       old_dc_state = to_const_dc_state(old_crtc_state);
+       new_dc_state = to_const_dc_state(crtc->state);
+
+       if (!crtc->state->active) {
+               if (!old_crtc_state->active)
+                       return;
+
+               /*
+                * When CRTC is disabled on DPMS, the state of attached planes
+                * is kept unchanged. Hence we need to enforce removal of the
+                * bandwidths from the ICC paths.
+                */
+               drm_atomic_crtc_for_each_plane(plane, crtc) {
+                       tegra = to_tegra_plane(plane);
+
+                       icc_set_bw(tegra->icc_mem, 0, 0);
+                       icc_set_bw(tegra->icc_mem_vfilter, 0, 0);
+               }
+
+               return;
+       }
+
+       for_each_old_plane_in_state(old_crtc_state->state, plane,
+                                   old_plane_state, i) {
+               old_tegra_state = to_const_tegra_plane_state(old_plane_state);
+               new_tegra_state = to_const_tegra_plane_state(plane->state);
+               tegra = to_tegra_plane(plane);
+
+               /*
+                * We're iterating over the global atomic state and it contains
+                * planes from another CRTC, hence we need to filter out the
+                * planes unrelated to this CRTC.
+                */
+               if (tegra->dc != dc)
+                       continue;
+
+               new_avg_bw = new_tegra_state->avg_memory_bandwidth;
+               old_avg_bw = old_tegra_state->avg_memory_bandwidth;
+
+               new_peak_bw = new_tegra_state->total_peak_memory_bandwidth;
+               old_peak_bw = old_tegra_state->total_peak_memory_bandwidth;
+
+               /*
+                * See the comment related to !crtc->state->active above,
+                * which explains why bandwidths need to be updated when
+                * CRTC is turning ON.
+                */
+               if (new_avg_bw == old_avg_bw && new_peak_bw == old_peak_bw &&
+                   old_crtc_state->active)
+                       continue;
+
+               window.src.h = drm_rect_height(&plane->state->src) >> 16;
+               window.dst.h = drm_rect_height(&plane->state->dst);
+
+               old_window.src.h = drm_rect_height(&old_plane_state->src) >> 16;
+               old_window.dst.h = drm_rect_height(&old_plane_state->dst);
+
+               /*
+                * During the preparation phase (atomic_begin), the memory
+                * freq should go high before the DC changes are committed
+                * if bandwidth requirement goes up, otherwise memory freq
+                * should to stay high if BW requirement goes down.  The
+                * opposite applies to the completion phase (post_commit).
+                */
+               if (prepare_bandwidth_transition) {
+                       new_avg_bw = max(old_avg_bw, new_avg_bw);
+                       new_peak_bw = max(old_peak_bw, new_peak_bw);
+
+                       if (tegra_plane_use_vertical_filtering(tegra, &old_window))
+                               window = old_window;
+               }
+
+               icc_set_bw(tegra->icc_mem, new_avg_bw, new_peak_bw);
+
+               if (tegra_plane_use_vertical_filtering(tegra, &window))
+                       icc_set_bw(tegra->icc_mem_vfilter, new_avg_bw, new_peak_bw);
+               else
+                       icc_set_bw(tegra->icc_mem_vfilter, 0, 0);
+       }
+}
+
 static void tegra_crtc_atomic_disable(struct drm_crtc *crtc,
                                      struct drm_atomic_state *state)
 {
@@ -1985,6 +2120,8 @@ static void tegra_crtc_atomic_begin(struct drm_crtc *crtc,
 {
        unsigned long flags;
 
+       tegra_crtc_update_memory_bandwidth(crtc, state, true);
+
        if (crtc->state->event) {
                spin_lock_irqsave(&crtc->dev->event_lock, flags);
 
@@ -2017,7 +2154,207 @@ static void tegra_crtc_atomic_flush(struct drm_crtc *crtc,
        value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
 }
 
+static bool tegra_plane_is_cursor(const struct drm_plane_state *state)
+{
+       const struct tegra_dc_soc_info *soc = to_tegra_dc(state->crtc)->soc;
+       const struct drm_format_info *fmt = state->fb->format;
+       unsigned int src_w = drm_rect_width(&state->src) >> 16;
+       unsigned int dst_w = drm_rect_width(&state->dst);
+
+       if (state->plane->type != DRM_PLANE_TYPE_CURSOR)
+               return false;
+
+       if (soc->supports_cursor)
+               return true;
+
+       if (src_w != dst_w || fmt->num_planes != 1 || src_w * fmt->cpp[0] > 256)
+               return false;
+
+       return true;
+}
+
+static unsigned long
+tegra_plane_overlap_mask(struct drm_crtc_state *state,
+                        const struct drm_plane_state *plane_state)
+{
+       const struct drm_plane_state *other_state;
+       const struct tegra_plane *tegra;
+       unsigned long overlap_mask = 0;
+       struct drm_plane *plane;
+       struct drm_rect rect;
+
+       if (!plane_state->visible || !plane_state->fb)
+               return 0;
+
+       /*
+        * Data-prefetch FIFO will easily help to overcome temporal memory
+        * pressure if other plane overlaps with the cursor plane.
+        */
+       if (tegra_plane_is_cursor(plane_state))
+               return 0;
+
+       drm_atomic_crtc_state_for_each_plane_state(plane, other_state, state) {
+               rect = plane_state->dst;
+
+               tegra = to_tegra_plane(other_state->plane);
+
+               if (!other_state->visible || !other_state->fb)
+                       continue;
+
+               /*
+                * Ignore cursor plane overlaps because it's not practical to
+                * assume that it contributes to the bandwidth in overlapping
+                * area if window width is small.
+                */
+               if (tegra_plane_is_cursor(other_state))
+                       continue;
+
+               if (drm_rect_intersect(&rect, &other_state->dst))
+                       overlap_mask |= BIT(tegra->index);
+       }
+
+       return overlap_mask;
+}
+
+static int tegra_crtc_calculate_memory_bandwidth(struct drm_crtc *crtc,
+                                                struct drm_atomic_state *state)
+{
+       ulong overlap_mask[TEGRA_DC_LEGACY_PLANES_NUM] = {}, mask;
+       u32 plane_peak_bw[TEGRA_DC_LEGACY_PLANES_NUM] = {};
+       bool all_planes_overlap_simultaneously = true;
+       const struct tegra_plane_state *tegra_state;
+       const struct drm_plane_state *plane_state;
+       struct tegra_dc *dc = to_tegra_dc(crtc);
+       const struct drm_crtc_state *old_state;
+       struct drm_crtc_state *new_state;
+       struct tegra_plane *tegra;
+       struct drm_plane *plane;
+
+       /*
+        * The nv-display uses shared planes.  The algorithm below assumes
+        * maximum 3 planes per-CRTC, this assumption isn't applicable to
+        * the nv-display.  Note that T124 support has additional windows,
+        * but currently they aren't supported by the driver.
+        */
+       if (dc->soc->has_nvdisplay)
+               return 0;
+
+       new_state = drm_atomic_get_new_crtc_state(state, crtc);
+       old_state = drm_atomic_get_old_crtc_state(state, crtc);
+
+       /*
+        * For overlapping planes pixel's data is fetched for each plane at
+        * the same time, hence bandwidths are accumulated in this case.
+        * This needs to be taken into account for calculating total bandwidth
+        * consumed by all planes.
+        *
+        * Here we get the overlapping state of each plane, which is a
+        * bitmask of plane indices telling with what planes there is an
+        * overlap. Note that bitmask[plane] includes BIT(plane) in order
+        * to make further code nicer and simpler.
+        */
+       drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) {
+               tegra_state = to_const_tegra_plane_state(plane_state);
+               tegra = to_tegra_plane(plane);
+
+               if (WARN_ON_ONCE(tegra->index >= TEGRA_DC_LEGACY_PLANES_NUM))
+                       return -EINVAL;
+
+               plane_peak_bw[tegra->index] = tegra_state->peak_memory_bandwidth;
+               mask = tegra_plane_overlap_mask(new_state, plane_state);
+               overlap_mask[tegra->index] = mask;
+
+               if (hweight_long(mask) != 3)
+                       all_planes_overlap_simultaneously = false;
+       }
+
+       /*
+        * Then we calculate maximum bandwidth of each plane state.
+        * The bandwidth includes the plane BW + BW of the "simultaneously"
+        * overlapping planes, where "simultaneously" means areas where DC
+        * fetches from the planes simultaneously during of scan-out process.
+        *
+        * For example, if plane A overlaps with planes B and C, but B and C
+        * don't overlap, then the peak bandwidth will be either in area where
+        * A-and-B or A-and-C planes overlap.
+        *
+        * The plane_peak_bw[] contains peak memory bandwidth values of
+        * each plane, this information is needed by interconnect provider
+        * in order to set up latency allowance based on the peak BW, see
+        * tegra_crtc_update_memory_bandwidth().
+        */
+       drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) {
+               u32 i, old_peak_bw, new_peak_bw, overlap_bw = 0;
+
+               /*
+                * Note that plane's atomic check doesn't touch the
+                * total_peak_memory_bandwidth of enabled plane, hence the
+                * current state contains the old bandwidth state from the
+                * previous CRTC commit.
+                */
+               tegra_state = to_const_tegra_plane_state(plane_state);
+               tegra = to_tegra_plane(plane);
+
+               for_each_set_bit(i, &overlap_mask[tegra->index], 3) {
+                       if (i == tegra->index)
+                               continue;
+
+                       if (all_planes_overlap_simultaneously)
+                               overlap_bw += plane_peak_bw[i];
+                       else
+                               overlap_bw = max(overlap_bw, plane_peak_bw[i]);
+               }
+
+               new_peak_bw = plane_peak_bw[tegra->index] + overlap_bw;
+               old_peak_bw = tegra_state->total_peak_memory_bandwidth;
+
+               /*
+                * If plane's peak bandwidth changed (for example plane isn't
+                * overlapped anymore) and plane isn't in the atomic state,
+                * then add plane to the state in order to have the bandwidth
+                * updated.
+                */
+               if (old_peak_bw != new_peak_bw) {
+                       struct tegra_plane_state *new_tegra_state;
+                       struct drm_plane_state *new_plane_state;
+
+                       new_plane_state = drm_atomic_get_plane_state(state, plane);
+                       if (IS_ERR(new_plane_state))
+                               return PTR_ERR(new_plane_state);
+
+                       new_tegra_state = to_tegra_plane_state(new_plane_state);
+                       new_tegra_state->total_peak_memory_bandwidth = new_peak_bw;
+               }
+       }
+
+       return 0;
+}
+
+static int tegra_crtc_atomic_check(struct drm_crtc *crtc,
+                                  struct drm_atomic_state *state)
+{
+       int err;
+
+       err = tegra_crtc_calculate_memory_bandwidth(crtc, state);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc,
+                                  struct drm_atomic_state *state)
+{
+       /*
+        * Display bandwidth is allowed to go down only once hardware state
+        * is known to be armed, i.e. state was committed and VBLANK event
+        * received.
+        */
+       tegra_crtc_update_memory_bandwidth(crtc, state, false);
+}
+
 static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = {
+       .atomic_check = tegra_crtc_atomic_check,
        .atomic_begin = tegra_crtc_atomic_begin,
        .atomic_flush = tegra_crtc_atomic_flush,
        .atomic_enable = tegra_crtc_atomic_enable,
@@ -2036,6 +2373,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
                /*
                dev_dbg(dc->dev, "%s(): frame end\n", __func__);
                */
+               dc->stats.frames_total++;
                dc->stats.frames++;
        }
 
@@ -2044,6 +2382,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
                dev_dbg(dc->dev, "%s(): vertical blank\n", __func__);
                */
                drm_crtc_handle_vblank(&dc->base);
+               dc->stats.vblank_total++;
                dc->stats.vblank++;
        }
 
@@ -2051,6 +2390,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
                /*
                dev_dbg(dc->dev, "%s(): underflow\n", __func__);
                */
+               dc->stats.underflow_total++;
                dc->stats.underflow++;
        }
 
@@ -2058,11 +2398,13 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
                /*
                dev_dbg(dc->dev, "%s(): overflow\n", __func__);
                */
+               dc->stats.overflow_total++;
                dc->stats.overflow++;
        }
 
        if (status & HEAD_UF_INT) {
                dev_dbg_ratelimited(dc->dev, "%s(): head underflow\n", __func__);
+               dc->stats.underflow_total++;
                dc->stats.underflow++;
        }
 
@@ -2343,7 +2685,9 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
        .overlay_formats = tegra20_overlay_formats,
        .modifiers = tegra20_modifiers,
        .has_win_a_without_filters = true,
+       .has_win_b_vfilter_mem_client = true,
        .has_win_c_without_vert_filter = true,
+       .plane_tiled_memory_bandwidth_x2 = false,
 };
 
 static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
@@ -2363,7 +2707,9 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
        .overlay_formats = tegra20_overlay_formats,
        .modifiers = tegra20_modifiers,
        .has_win_a_without_filters = false,
+       .has_win_b_vfilter_mem_client = true,
        .has_win_c_without_vert_filter = false,
+       .plane_tiled_memory_bandwidth_x2 = true,
 };
 
 static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
@@ -2383,7 +2729,9 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
        .overlay_formats = tegra114_overlay_formats,
        .modifiers = tegra20_modifiers,
        .has_win_a_without_filters = false,
+       .has_win_b_vfilter_mem_client = false,
        .has_win_c_without_vert_filter = false,
+       .plane_tiled_memory_bandwidth_x2 = true,
 };
 
 static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
@@ -2403,7 +2751,9 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
        .overlay_formats = tegra124_overlay_formats,
        .modifiers = tegra124_modifiers,
        .has_win_a_without_filters = false,
+       .has_win_b_vfilter_mem_client = false,
        .has_win_c_without_vert_filter = false,
+       .plane_tiled_memory_bandwidth_x2 = false,
 };
 
 static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
@@ -2423,7 +2773,9 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
        .overlay_formats = tegra114_overlay_formats,
        .modifiers = tegra124_modifiers,
        .has_win_a_without_filters = false,
+       .has_win_b_vfilter_mem_client = false,
        .has_win_c_without_vert_filter = false,
+       .plane_tiled_memory_bandwidth_x2 = false,
 };
 
 static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
@@ -2473,6 +2825,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = {
        .has_nvdisplay = true,
        .wgrps = tegra186_dc_wgrps,
        .num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps),
+       .plane_tiled_memory_bandwidth_x2 = false,
 };
 
 static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = {
@@ -2522,6 +2875,7 @@ static const struct tegra_dc_soc_info tegra194_dc_soc_info = {
        .has_nvdisplay = true,
        .wgrps = tegra194_dc_wgrps,
        .num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps),
+       .plane_tiled_memory_bandwidth_x2 = false,
 };
 
 static const struct of_device_id tegra_dc_of_match[] = {