89543fa8ca4de2ad6bc2641a003763c91bdfb3b6
[linux-2.6-microblaze.git] / drivers / gpu / drm / vc4 / vc4_plane.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Broadcom
4  */
5
6 /**
7  * DOC: VC4 plane module
8  *
9  * Each DRM plane is a layer of pixels being scanned out by the HVS.
10  *
11  * At atomic modeset check time, we compute the HVS display element
12  * state that would be necessary for displaying the plane (giving us a
13  * chance to figure out if a plane configuration is invalid), then at
14  * atomic flush time the CRTC will ask us to write our element state
15  * into the region of the HVS that it has allocated for us.
16  */
17
18 #include <drm/drm_atomic.h>
19 #include <drm/drm_atomic_helper.h>
20 #include <drm/drm_atomic_uapi.h>
21 #include <drm/drm_fb_cma_helper.h>
22 #include <drm/drm_fourcc.h>
23 #include <drm/drm_gem_framebuffer_helper.h>
24 #include <drm/drm_plane_helper.h>
25
26 #include "uapi/drm/vc4_drm.h"
27
28 #include "vc4_drv.h"
29 #include "vc4_regs.h"
30
31 static const struct hvs_format {
32         u32 drm; /* DRM_FORMAT_* */
33         u32 hvs; /* HVS_FORMAT_* */
34         u32 pixel_order;
35         u32 pixel_order_hvs5;
36 } hvs_formats[] = {
37         {
38                 .drm = DRM_FORMAT_XRGB8888,
39                 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
40                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
41                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
42         },
43         {
44                 .drm = DRM_FORMAT_ARGB8888,
45                 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
46                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
47                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
48         },
49         {
50                 .drm = DRM_FORMAT_ABGR8888,
51                 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
52                 .pixel_order = HVS_PIXEL_ORDER_ARGB,
53                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
54         },
55         {
56                 .drm = DRM_FORMAT_XBGR8888,
57                 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
58                 .pixel_order = HVS_PIXEL_ORDER_ARGB,
59                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
60         },
61         {
62                 .drm = DRM_FORMAT_RGB565,
63                 .hvs = HVS_PIXEL_FORMAT_RGB565,
64                 .pixel_order = HVS_PIXEL_ORDER_XRGB,
65         },
66         {
67                 .drm = DRM_FORMAT_BGR565,
68                 .hvs = HVS_PIXEL_FORMAT_RGB565,
69                 .pixel_order = HVS_PIXEL_ORDER_XBGR,
70         },
71         {
72                 .drm = DRM_FORMAT_ARGB1555,
73                 .hvs = HVS_PIXEL_FORMAT_RGBA5551,
74                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
75         },
76         {
77                 .drm = DRM_FORMAT_XRGB1555,
78                 .hvs = HVS_PIXEL_FORMAT_RGBA5551,
79                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
80         },
81         {
82                 .drm = DRM_FORMAT_RGB888,
83                 .hvs = HVS_PIXEL_FORMAT_RGB888,
84                 .pixel_order = HVS_PIXEL_ORDER_XRGB,
85         },
86         {
87                 .drm = DRM_FORMAT_BGR888,
88                 .hvs = HVS_PIXEL_FORMAT_RGB888,
89                 .pixel_order = HVS_PIXEL_ORDER_XBGR,
90         },
91         {
92                 .drm = DRM_FORMAT_YUV422,
93                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
94                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
95         },
96         {
97                 .drm = DRM_FORMAT_YVU422,
98                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
99                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
100         },
101         {
102                 .drm = DRM_FORMAT_YUV420,
103                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
104                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
105         },
106         {
107                 .drm = DRM_FORMAT_YVU420,
108                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
109                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
110         },
111         {
112                 .drm = DRM_FORMAT_NV12,
113                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
114                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
115         },
116         {
117                 .drm = DRM_FORMAT_NV21,
118                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
119                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
120         },
121         {
122                 .drm = DRM_FORMAT_NV16,
123                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
124                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
125         },
126         {
127                 .drm = DRM_FORMAT_NV61,
128                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
129                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
130         },
131 };
132
133 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
134 {
135         unsigned i;
136
137         for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
138                 if (hvs_formats[i].drm == drm_format)
139                         return &hvs_formats[i];
140         }
141
142         return NULL;
143 }
144
145 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
146 {
147         if (dst == src)
148                 return VC4_SCALING_NONE;
149         if (3 * dst >= 2 * src)
150                 return VC4_SCALING_PPF;
151         else
152                 return VC4_SCALING_TPZ;
153 }
154
155 static bool plane_enabled(struct drm_plane_state *state)
156 {
157         return state->fb && !WARN_ON(!state->crtc);
158 }
159
160 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
161 {
162         struct vc4_plane_state *vc4_state;
163
164         if (WARN_ON(!plane->state))
165                 return NULL;
166
167         vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
168         if (!vc4_state)
169                 return NULL;
170
171         memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
172         vc4_state->dlist_initialized = 0;
173
174         __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
175
176         if (vc4_state->dlist) {
177                 vc4_state->dlist = kmemdup(vc4_state->dlist,
178                                            vc4_state->dlist_count * 4,
179                                            GFP_KERNEL);
180                 if (!vc4_state->dlist) {
181                         kfree(vc4_state);
182                         return NULL;
183                 }
184                 vc4_state->dlist_size = vc4_state->dlist_count;
185         }
186
187         return &vc4_state->base;
188 }
189
190 static void vc4_plane_destroy_state(struct drm_plane *plane,
191                                     struct drm_plane_state *state)
192 {
193         struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
194         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
195
196         if (drm_mm_node_allocated(&vc4_state->lbm)) {
197                 unsigned long irqflags;
198
199                 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
200                 drm_mm_remove_node(&vc4_state->lbm);
201                 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
202         }
203
204         kfree(vc4_state->dlist);
205         __drm_atomic_helper_plane_destroy_state(&vc4_state->base);
206         kfree(state);
207 }
208
209 /* Called during init to allocate the plane's atomic state. */
210 static void vc4_plane_reset(struct drm_plane *plane)
211 {
212         struct vc4_plane_state *vc4_state;
213
214         WARN_ON(plane->state);
215
216         vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
217         if (!vc4_state)
218                 return;
219
220         __drm_atomic_helper_plane_reset(plane, &vc4_state->base);
221 }
222
223 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
224 {
225         if (vc4_state->dlist_count == vc4_state->dlist_size) {
226                 u32 new_size = max(4u, vc4_state->dlist_count * 2);
227                 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
228
229                 if (!new_dlist)
230                         return;
231                 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
232
233                 kfree(vc4_state->dlist);
234                 vc4_state->dlist = new_dlist;
235                 vc4_state->dlist_size = new_size;
236         }
237
238         vc4_state->dlist[vc4_state->dlist_count++] = val;
239 }
240
241 /* Returns the scl0/scl1 field based on whether the dimensions need to
242  * be up/down/non-scaled.
243  *
244  * This is a replication of a table from the spec.
245  */
246 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
247 {
248         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
249
250         switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
251         case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
252                 return SCALER_CTL0_SCL_H_PPF_V_PPF;
253         case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
254                 return SCALER_CTL0_SCL_H_TPZ_V_PPF;
255         case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
256                 return SCALER_CTL0_SCL_H_PPF_V_TPZ;
257         case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
258                 return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
259         case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
260                 return SCALER_CTL0_SCL_H_PPF_V_NONE;
261         case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
262                 return SCALER_CTL0_SCL_H_NONE_V_PPF;
263         case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
264                 return SCALER_CTL0_SCL_H_NONE_V_TPZ;
265         case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
266                 return SCALER_CTL0_SCL_H_TPZ_V_NONE;
267         default:
268         case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
269                 /* The unity case is independently handled by
270                  * SCALER_CTL0_UNITY.
271                  */
272                 return 0;
273         }
274 }
275
276 static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
277 {
278         struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
279         unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
280         struct drm_crtc_state *crtc_state;
281
282         crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
283                                                    pstate->crtc);
284
285         vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
286         if (!left && !right && !top && !bottom)
287                 return 0;
288
289         if (left + right >= crtc_state->mode.hdisplay ||
290             top + bottom >= crtc_state->mode.vdisplay)
291                 return -EINVAL;
292
293         adjhdisplay = crtc_state->mode.hdisplay - (left + right);
294         vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
295                                                adjhdisplay,
296                                                crtc_state->mode.hdisplay);
297         vc4_pstate->crtc_x += left;
298         if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - left)
299                 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - left;
300
301         adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
302         vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
303                                                adjvdisplay,
304                                                crtc_state->mode.vdisplay);
305         vc4_pstate->crtc_y += top;
306         if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - top)
307                 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - top;
308
309         vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
310                                                adjhdisplay,
311                                                crtc_state->mode.hdisplay);
312         vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
313                                                adjvdisplay,
314                                                crtc_state->mode.vdisplay);
315
316         if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
317                 return -EINVAL;
318
319         return 0;
320 }
321
322 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
323 {
324         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
325         struct drm_framebuffer *fb = state->fb;
326         struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
327         u32 subpixel_src_mask = (1 << 16) - 1;
328         int num_planes = fb->format->num_planes;
329         struct drm_crtc_state *crtc_state;
330         u32 h_subsample = fb->format->hsub;
331         u32 v_subsample = fb->format->vsub;
332         int i, ret;
333
334         crtc_state = drm_atomic_get_existing_crtc_state(state->state,
335                                                         state->crtc);
336         if (!crtc_state) {
337                 DRM_DEBUG_KMS("Invalid crtc state\n");
338                 return -EINVAL;
339         }
340
341         ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
342                                                   INT_MAX, true, true);
343         if (ret)
344                 return ret;
345
346         for (i = 0; i < num_planes; i++)
347                 vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
348
349         /* We don't support subpixel source positioning for scaling. */
350         if ((state->src.x1 & subpixel_src_mask) ||
351             (state->src.x2 & subpixel_src_mask) ||
352             (state->src.y1 & subpixel_src_mask) ||
353             (state->src.y2 & subpixel_src_mask)) {
354                 return -EINVAL;
355         }
356
357         vc4_state->src_x = state->src.x1 >> 16;
358         vc4_state->src_y = state->src.y1 >> 16;
359         vc4_state->src_w[0] = (state->src.x2 - state->src.x1) >> 16;
360         vc4_state->src_h[0] = (state->src.y2 - state->src.y1) >> 16;
361
362         vc4_state->crtc_x = state->dst.x1;
363         vc4_state->crtc_y = state->dst.y1;
364         vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
365         vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
366
367         ret = vc4_plane_margins_adj(state);
368         if (ret)
369                 return ret;
370
371         vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
372                                                        vc4_state->crtc_w);
373         vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
374                                                        vc4_state->crtc_h);
375
376         vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
377                                vc4_state->y_scaling[0] == VC4_SCALING_NONE);
378
379         if (num_planes > 1) {
380                 vc4_state->is_yuv = true;
381
382                 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
383                 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
384
385                 vc4_state->x_scaling[1] =
386                         vc4_get_scaling_mode(vc4_state->src_w[1],
387                                              vc4_state->crtc_w);
388                 vc4_state->y_scaling[1] =
389                         vc4_get_scaling_mode(vc4_state->src_h[1],
390                                              vc4_state->crtc_h);
391
392                 /* YUV conversion requires that horizontal scaling be enabled
393                  * on the UV plane even if vc4_get_scaling_mode() returned
394                  * VC4_SCALING_NONE (which can happen when the down-scaling
395                  * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
396                  * case.
397                  */
398                 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
399                         vc4_state->x_scaling[1] = VC4_SCALING_PPF;
400         } else {
401                 vc4_state->is_yuv = false;
402                 vc4_state->x_scaling[1] = VC4_SCALING_NONE;
403                 vc4_state->y_scaling[1] = VC4_SCALING_NONE;
404         }
405
406         return 0;
407 }
408
409 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
410 {
411         u32 scale, recip;
412
413         scale = (1 << 16) * src / dst;
414
415         /* The specs note that while the reciprocal would be defined
416          * as (1<<32)/scale, ~0 is close enough.
417          */
418         recip = ~0 / scale;
419
420         vc4_dlist_write(vc4_state,
421                         VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
422                         VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
423         vc4_dlist_write(vc4_state,
424                         VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
425 }
426
427 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
428 {
429         u32 scale = (1 << 16) * src / dst;
430
431         vc4_dlist_write(vc4_state,
432                         SCALER_PPF_AGC |
433                         VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
434                         VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
435 }
436
437 static u32 vc4_lbm_size(struct drm_plane_state *state)
438 {
439         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
440         u32 pix_per_line;
441         u32 lbm;
442
443         /* LBM is not needed when there's no vertical scaling. */
444         if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
445             vc4_state->y_scaling[1] == VC4_SCALING_NONE)
446                 return 0;
447
448         /*
449          * This can be further optimized in the RGB/YUV444 case if the PPF
450          * decimation factor is between 0.5 and 1.0 by using crtc_w.
451          *
452          * It's not an issue though, since in that case since src_w[0] is going
453          * to be greater than or equal to crtc_w.
454          */
455         if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
456                 pix_per_line = vc4_state->crtc_w;
457         else
458                 pix_per_line = vc4_state->src_w[0];
459
460         if (!vc4_state->is_yuv) {
461                 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
462                         lbm = pix_per_line * 8;
463                 else {
464                         /* In special cases, this multiplier might be 12. */
465                         lbm = pix_per_line * 16;
466                 }
467         } else {
468                 /* There are cases for this going down to a multiplier
469                  * of 2, but according to the firmware source, the
470                  * table in the docs is somewhat wrong.
471                  */
472                 lbm = pix_per_line * 16;
473         }
474
475         lbm = roundup(lbm, 32);
476
477         return lbm;
478 }
479
480 static void vc4_write_scaling_parameters(struct drm_plane_state *state,
481                                          int channel)
482 {
483         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
484
485         /* Ch0 H-PPF Word 0: Scaling Parameters */
486         if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
487                 vc4_write_ppf(vc4_state,
488                               vc4_state->src_w[channel], vc4_state->crtc_w);
489         }
490
491         /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
492         if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
493                 vc4_write_ppf(vc4_state,
494                               vc4_state->src_h[channel], vc4_state->crtc_h);
495                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
496         }
497
498         /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
499         if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
500                 vc4_write_tpz(vc4_state,
501                               vc4_state->src_w[channel], vc4_state->crtc_w);
502         }
503
504         /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
505         if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
506                 vc4_write_tpz(vc4_state,
507                               vc4_state->src_h[channel], vc4_state->crtc_h);
508                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
509         }
510 }
511
512 static void vc4_plane_calc_load(struct drm_plane_state *state)
513 {
514         unsigned int hvs_load_shift, vrefresh, i;
515         struct drm_framebuffer *fb = state->fb;
516         struct vc4_plane_state *vc4_state;
517         struct drm_crtc_state *crtc_state;
518         unsigned int vscale_factor;
519         struct vc4_dev *vc4;
520
521         vc4 = to_vc4_dev(state->plane->dev);
522         if (!vc4->load_tracker_available)
523                 return;
524
525         vc4_state = to_vc4_plane_state(state);
526         crtc_state = drm_atomic_get_existing_crtc_state(state->state,
527                                                         state->crtc);
528         vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
529
530         /* The HVS is able to process 2 pixels/cycle when scaling the source,
531          * 4 pixels/cycle otherwise.
532          * Alpha blending step seems to be pipelined and it's always operating
533          * at 4 pixels/cycle, so the limiting aspect here seems to be the
534          * scaler block.
535          * HVS load is expressed in clk-cycles/sec (AKA Hz).
536          */
537         if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
538             vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
539             vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
540             vc4_state->y_scaling[1] != VC4_SCALING_NONE)
541                 hvs_load_shift = 1;
542         else
543                 hvs_load_shift = 2;
544
545         vc4_state->membus_load = 0;
546         vc4_state->hvs_load = 0;
547         for (i = 0; i < fb->format->num_planes; i++) {
548                 /* Even if the bandwidth/plane required for a single frame is
549                  *
550                  * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
551                  *
552                  * when downscaling, we have to read more pixels per line in
553                  * the time frame reserved for a single line, so the bandwidth
554                  * demand can be punctually higher. To account for that, we
555                  * calculate the down-scaling factor and multiply the plane
556                  * load by this number. We're likely over-estimating the read
557                  * demand, but that's better than under-estimating it.
558                  */
559                 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
560                                              vc4_state->crtc_h);
561                 vc4_state->membus_load += vc4_state->src_w[i] *
562                                           vc4_state->src_h[i] * vscale_factor *
563                                           fb->format->cpp[i];
564                 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
565         }
566
567         vc4_state->hvs_load *= vrefresh;
568         vc4_state->hvs_load >>= hvs_load_shift;
569         vc4_state->membus_load *= vrefresh;
570 }
571
572 static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
573 {
574         struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
575         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
576         unsigned long irqflags;
577         u32 lbm_size;
578
579         lbm_size = vc4_lbm_size(state);
580         if (!lbm_size)
581                 return 0;
582
583         if (WARN_ON(!vc4_state->lbm_offset))
584                 return -EINVAL;
585
586         /* Allocate the LBM memory that the HVS will use for temporary
587          * storage due to our scaling/format conversion.
588          */
589         if (!drm_mm_node_allocated(&vc4_state->lbm)) {
590                 int ret;
591
592                 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
593                 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
594                                                  &vc4_state->lbm,
595                                                  lbm_size,
596                                                  vc4->hvs->hvs5 ? 64 : 32,
597                                                  0, 0);
598                 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
599
600                 if (ret)
601                         return ret;
602         } else {
603                 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
604         }
605
606         vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
607
608         return 0;
609 }
610
611 /* Writes out a full display list for an active plane to the plane's
612  * private dlist state.
613  */
614 static int vc4_plane_mode_set(struct drm_plane *plane,
615                               struct drm_plane_state *state)
616 {
617         struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
618         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
619         struct drm_framebuffer *fb = state->fb;
620         u32 ctl0_offset = vc4_state->dlist_count;
621         const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
622         u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
623         int num_planes = fb->format->num_planes;
624         u32 h_subsample = fb->format->hsub;
625         u32 v_subsample = fb->format->vsub;
626         bool mix_plane_alpha;
627         bool covers_screen;
628         u32 scl0, scl1, pitch0;
629         u32 tiling, src_y;
630         u32 hvs_format = format->hvs;
631         unsigned int rotation;
632         int ret, i;
633
634         if (vc4_state->dlist_initialized)
635                 return 0;
636
637         ret = vc4_plane_setup_clipping_and_scaling(state);
638         if (ret)
639                 return ret;
640
641         /* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
642          * and 4:4:4, scl1 should be set to scl0 so both channels of
643          * the scaler do the same thing.  For YUV, the Y plane needs
644          * to be put in channel 1 and Cb/Cr in channel 0, so we swap
645          * the scl fields here.
646          */
647         if (num_planes == 1) {
648                 scl0 = vc4_get_scl_field(state, 0);
649                 scl1 = scl0;
650         } else {
651                 scl0 = vc4_get_scl_field(state, 1);
652                 scl1 = vc4_get_scl_field(state, 0);
653         }
654
655         rotation = drm_rotation_simplify(state->rotation,
656                                          DRM_MODE_ROTATE_0 |
657                                          DRM_MODE_REFLECT_X |
658                                          DRM_MODE_REFLECT_Y);
659
660         /* We must point to the last line when Y reflection is enabled. */
661         src_y = vc4_state->src_y;
662         if (rotation & DRM_MODE_REFLECT_Y)
663                 src_y += vc4_state->src_h[0] - 1;
664
665         switch (base_format_mod) {
666         case DRM_FORMAT_MOD_LINEAR:
667                 tiling = SCALER_CTL0_TILING_LINEAR;
668                 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
669
670                 /* Adjust the base pointer to the first pixel to be scanned
671                  * out.
672                  */
673                 for (i = 0; i < num_planes; i++) {
674                         vc4_state->offsets[i] += src_y /
675                                                  (i ? v_subsample : 1) *
676                                                  fb->pitches[i];
677
678                         vc4_state->offsets[i] += vc4_state->src_x /
679                                                  (i ? h_subsample : 1) *
680                                                  fb->format->cpp[i];
681                 }
682
683                 break;
684
685         case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
686                 u32 tile_size_shift = 12; /* T tiles are 4kb */
687                 /* Whole-tile offsets, mostly for setting the pitch. */
688                 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
689                 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
690                 u32 tile_w_mask = (1 << tile_w_shift) - 1;
691                 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
692                  * the height (in pixels) of a 4k tile.
693                  */
694                 u32 tile_h_mask = (2 << tile_h_shift) - 1;
695                 /* For T-tiled, the FB pitch is "how many bytes from one row to
696                  * the next, such that
697                  *
698                  *      pitch * tile_h == tile_size * tiles_per_row
699                  */
700                 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
701                 u32 tiles_l = vc4_state->src_x >> tile_w_shift;
702                 u32 tiles_r = tiles_w - tiles_l;
703                 u32 tiles_t = src_y >> tile_h_shift;
704                 /* Intra-tile offsets, which modify the base address (the
705                  * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
706                  * base address).
707                  */
708                 u32 tile_y = (src_y >> 4) & 1;
709                 u32 subtile_y = (src_y >> 2) & 3;
710                 u32 utile_y = src_y & 3;
711                 u32 x_off = vc4_state->src_x & tile_w_mask;
712                 u32 y_off = src_y & tile_h_mask;
713
714                 /* When Y reflection is requested we must set the
715                  * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
716                  * after the initial one should be fetched in descending order,
717                  * which makes sense since we start from the last line and go
718                  * backward.
719                  * Don't know why we need y_off = max_y_off - y_off, but it's
720                  * definitely required (I guess it's also related to the "going
721                  * backward" situation).
722                  */
723                 if (rotation & DRM_MODE_REFLECT_Y) {
724                         y_off = tile_h_mask - y_off;
725                         pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
726                 } else {
727                         pitch0 = 0;
728                 }
729
730                 tiling = SCALER_CTL0_TILING_256B_OR_T;
731                 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
732                            VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
733                            VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
734                            VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
735                 vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift);
736                 vc4_state->offsets[0] += subtile_y << 8;
737                 vc4_state->offsets[0] += utile_y << 4;
738
739                 /* Rows of tiles alternate left-to-right and right-to-left. */
740                 if (tiles_t & 1) {
741                         pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
742                         vc4_state->offsets[0] += (tiles_w - tiles_l) <<
743                                                  tile_size_shift;
744                         vc4_state->offsets[0] -= (1 + !tile_y) << 10;
745                 } else {
746                         vc4_state->offsets[0] += tiles_l << tile_size_shift;
747                         vc4_state->offsets[0] += tile_y << 10;
748                 }
749
750                 break;
751         }
752
753         case DRM_FORMAT_MOD_BROADCOM_SAND64:
754         case DRM_FORMAT_MOD_BROADCOM_SAND128:
755         case DRM_FORMAT_MOD_BROADCOM_SAND256: {
756                 uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
757                 u32 tile_w, tile, x_off, pix_per_tile;
758
759                 hvs_format = HVS_PIXEL_FORMAT_H264;
760
761                 switch (base_format_mod) {
762                 case DRM_FORMAT_MOD_BROADCOM_SAND64:
763                         tiling = SCALER_CTL0_TILING_64B;
764                         tile_w = 64;
765                         break;
766                 case DRM_FORMAT_MOD_BROADCOM_SAND128:
767                         tiling = SCALER_CTL0_TILING_128B;
768                         tile_w = 128;
769                         break;
770                 case DRM_FORMAT_MOD_BROADCOM_SAND256:
771                         tiling = SCALER_CTL0_TILING_256B_OR_T;
772                         tile_w = 256;
773                         break;
774                 default:
775                         break;
776                 }
777
778                 if (param > SCALER_TILE_HEIGHT_MASK) {
779                         DRM_DEBUG_KMS("SAND height too large (%d)\n", param);
780                         return -EINVAL;
781                 }
782
783                 pix_per_tile = tile_w / fb->format->cpp[0];
784                 tile = vc4_state->src_x / pix_per_tile;
785                 x_off = vc4_state->src_x % pix_per_tile;
786
787                 /* Adjust the base pointer to the first pixel to be scanned
788                  * out.
789                  */
790                 for (i = 0; i < num_planes; i++) {
791                         vc4_state->offsets[i] += param * tile_w * tile;
792                         vc4_state->offsets[i] += src_y /
793                                                  (i ? v_subsample : 1) *
794                                                  tile_w;
795                         vc4_state->offsets[i] += x_off /
796                                                  (i ? h_subsample : 1) *
797                                                  fb->format->cpp[i];
798                 }
799
800                 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
801                 break;
802         }
803
804         default:
805                 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
806                               (long long)fb->modifier);
807                 return -EINVAL;
808         }
809
810         /* Don't waste cycles mixing with plane alpha if the set alpha
811          * is opaque or there is no per-pixel alpha information.
812          * In any case we use the alpha property value as the fixed alpha.
813          */
814         mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
815                           fb->format->has_alpha;
816
817         if (!vc4->hvs->hvs5) {
818         /* Control word */
819                 vc4_dlist_write(vc4_state,
820                                 SCALER_CTL0_VALID |
821                                 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
822                                 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
823                                 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
824                                 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
825                                 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
826                                 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
827                                 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
828                                 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
829                                 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
830
831                 /* Position Word 0: Image Positions and Alpha Value */
832                 vc4_state->pos0_offset = vc4_state->dlist_count;
833                 vc4_dlist_write(vc4_state,
834                                 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
835                                 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
836                                 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
837
838                 /* Position Word 1: Scaled Image Dimensions. */
839                 if (!vc4_state->is_unity) {
840                         vc4_dlist_write(vc4_state,
841                                         VC4_SET_FIELD(vc4_state->crtc_w,
842                                                       SCALER_POS1_SCL_WIDTH) |
843                                         VC4_SET_FIELD(vc4_state->crtc_h,
844                                                       SCALER_POS1_SCL_HEIGHT));
845                 }
846
847                 /* Position Word 2: Source Image Size, Alpha */
848                 vc4_state->pos2_offset = vc4_state->dlist_count;
849                 vc4_dlist_write(vc4_state,
850                                 VC4_SET_FIELD(fb->format->has_alpha ?
851                                               SCALER_POS2_ALPHA_MODE_PIPELINE :
852                                               SCALER_POS2_ALPHA_MODE_FIXED,
853                                               SCALER_POS2_ALPHA_MODE) |
854                                 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
855                                 (fb->format->has_alpha ?
856                                                 SCALER_POS2_ALPHA_PREMULT : 0) |
857                                 VC4_SET_FIELD(vc4_state->src_w[0],
858                                               SCALER_POS2_WIDTH) |
859                                 VC4_SET_FIELD(vc4_state->src_h[0],
860                                               SCALER_POS2_HEIGHT));
861
862                 /* Position Word 3: Context.  Written by the HVS. */
863                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
864
865         } else {
866                 u32 hvs_pixel_order = format->pixel_order;
867
868                 if (format->pixel_order_hvs5)
869                         hvs_pixel_order = format->pixel_order_hvs5;
870
871                 /* Control word */
872                 vc4_dlist_write(vc4_state,
873                                 SCALER_CTL0_VALID |
874                                 (hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) |
875                                 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
876                                 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
877                                 (vc4_state->is_unity ?
878                                                 SCALER5_CTL0_UNITY : 0) |
879                                 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
880                                 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
881                                 SCALER5_CTL0_ALPHA_EXPAND |
882                                 SCALER5_CTL0_RGB_EXPAND);
883
884                 /* Position Word 0: Image Positions and Alpha Value */
885                 vc4_state->pos0_offset = vc4_state->dlist_count;
886                 vc4_dlist_write(vc4_state,
887                                 (rotation & DRM_MODE_REFLECT_Y ?
888                                                 SCALER5_POS0_VFLIP : 0) |
889                                 VC4_SET_FIELD(vc4_state->crtc_x,
890                                               SCALER_POS0_START_X) |
891                                 (rotation & DRM_MODE_REFLECT_X ?
892                                               SCALER5_POS0_HFLIP : 0) |
893                                 VC4_SET_FIELD(vc4_state->crtc_y,
894                                               SCALER5_POS0_START_Y)
895                                );
896
897                 /* Control Word 2 */
898                 vc4_dlist_write(vc4_state,
899                                 VC4_SET_FIELD(state->alpha >> 4,
900                                               SCALER5_CTL2_ALPHA) |
901                                 (fb->format->has_alpha ?
902                                         SCALER5_CTL2_ALPHA_PREMULT : 0) |
903                                 (mix_plane_alpha ?
904                                         SCALER5_CTL2_ALPHA_MIX : 0) |
905                                 VC4_SET_FIELD(fb->format->has_alpha ?
906                                       SCALER5_CTL2_ALPHA_MODE_PIPELINE :
907                                       SCALER5_CTL2_ALPHA_MODE_FIXED,
908                                       SCALER5_CTL2_ALPHA_MODE)
909                                );
910
911                 /* Position Word 1: Scaled Image Dimensions. */
912                 if (!vc4_state->is_unity) {
913                         vc4_dlist_write(vc4_state,
914                                         VC4_SET_FIELD(vc4_state->crtc_w,
915                                                       SCALER_POS1_SCL_WIDTH) |
916                                         VC4_SET_FIELD(vc4_state->crtc_h,
917                                                       SCALER_POS1_SCL_HEIGHT));
918                 }
919
920                 /* Position Word 2: Source Image Size */
921                 vc4_state->pos2_offset = vc4_state->dlist_count;
922                 vc4_dlist_write(vc4_state,
923                                 VC4_SET_FIELD(vc4_state->src_w[0],
924                                               SCALER5_POS2_WIDTH) |
925                                 VC4_SET_FIELD(vc4_state->src_h[0],
926                                               SCALER5_POS2_HEIGHT));
927
928                 /* Position Word 3: Context.  Written by the HVS. */
929                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
930         }
931
932
933         /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
934          *
935          * The pointers may be any byte address.
936          */
937         vc4_state->ptr0_offset = vc4_state->dlist_count;
938         for (i = 0; i < num_planes; i++)
939                 vc4_dlist_write(vc4_state, vc4_state->offsets[i]);
940
941         /* Pointer Context Word 0/1/2: Written by the HVS */
942         for (i = 0; i < num_planes; i++)
943                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
944
945         /* Pitch word 0 */
946         vc4_dlist_write(vc4_state, pitch0);
947
948         /* Pitch word 1/2 */
949         for (i = 1; i < num_planes; i++) {
950                 if (hvs_format != HVS_PIXEL_FORMAT_H264) {
951                         vc4_dlist_write(vc4_state,
952                                         VC4_SET_FIELD(fb->pitches[i],
953                                                       SCALER_SRC_PITCH));
954                 } else {
955                         vc4_dlist_write(vc4_state, pitch0);
956                 }
957         }
958
959         /* Colorspace conversion words */
960         if (vc4_state->is_yuv) {
961                 vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5);
962                 vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5);
963                 vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5);
964         }
965
966         vc4_state->lbm_offset = 0;
967
968         if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
969             vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
970             vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
971             vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
972                 /* Reserve a slot for the LBM Base Address. The real value will
973                  * be set when calling vc4_plane_allocate_lbm().
974                  */
975                 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
976                     vc4_state->y_scaling[1] != VC4_SCALING_NONE)
977                         vc4_state->lbm_offset = vc4_state->dlist_count++;
978
979                 if (num_planes > 1) {
980                         /* Emit Cb/Cr as channel 0 and Y as channel
981                          * 1. This matches how we set up scl0/scl1
982                          * above.
983                          */
984                         vc4_write_scaling_parameters(state, 1);
985                 }
986                 vc4_write_scaling_parameters(state, 0);
987
988                 /* If any PPF setup was done, then all the kernel
989                  * pointers get uploaded.
990                  */
991                 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
992                     vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
993                     vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
994                     vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
995                         u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
996                                                    SCALER_PPF_KERNEL_OFFSET);
997
998                         /* HPPF plane 0 */
999                         vc4_dlist_write(vc4_state, kernel);
1000                         /* VPPF plane 0 */
1001                         vc4_dlist_write(vc4_state, kernel);
1002                         /* HPPF plane 1 */
1003                         vc4_dlist_write(vc4_state, kernel);
1004                         /* VPPF plane 1 */
1005                         vc4_dlist_write(vc4_state, kernel);
1006                 }
1007         }
1008
1009         vc4_state->dlist[ctl0_offset] |=
1010                 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
1011
1012         /* crtc_* are already clipped coordinates. */
1013         covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
1014                         vc4_state->crtc_w == state->crtc->mode.hdisplay &&
1015                         vc4_state->crtc_h == state->crtc->mode.vdisplay;
1016         /* Background fill might be necessary when the plane has per-pixel
1017          * alpha content or a non-opaque plane alpha and could blend from the
1018          * background or does not cover the entire screen.
1019          */
1020         vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
1021                                    state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1022
1023         /* Flag the dlist as initialized to avoid checking it twice in case
1024          * the async update check already called vc4_plane_mode_set() and
1025          * decided to fallback to sync update because async update was not
1026          * possible.
1027          */
1028         vc4_state->dlist_initialized = 1;
1029
1030         vc4_plane_calc_load(state);
1031
1032         return 0;
1033 }
1034
1035 /* If a modeset involves changing the setup of a plane, the atomic
1036  * infrastructure will call this to validate a proposed plane setup.
1037  * However, if a plane isn't getting updated, this (and the
1038  * corresponding vc4_plane_atomic_update) won't get called.  Thus, we
1039  * compute the dlist here and have all active plane dlists get updated
1040  * in the CRTC's flush.
1041  */
1042 static int vc4_plane_atomic_check(struct drm_plane *plane,
1043                                   struct drm_plane_state *state)
1044 {
1045         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1046         int ret;
1047
1048         vc4_state->dlist_count = 0;
1049
1050         if (!plane_enabled(state))
1051                 return 0;
1052
1053         ret = vc4_plane_mode_set(plane, state);
1054         if (ret)
1055                 return ret;
1056
1057         return vc4_plane_allocate_lbm(state);
1058 }
1059
1060 static void vc4_plane_atomic_update(struct drm_plane *plane,
1061                                     struct drm_plane_state *old_state)
1062 {
1063         /* No contents here.  Since we don't know where in the CRTC's
1064          * dlist we should be stored, our dlist is uploaded to the
1065          * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
1066          * time.
1067          */
1068 }
1069
1070 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
1071 {
1072         struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
1073         int i;
1074
1075         vc4_state->hw_dlist = dlist;
1076
1077         /* Can't memcpy_toio() because it needs to be 32-bit writes. */
1078         for (i = 0; i < vc4_state->dlist_count; i++)
1079                 writel(vc4_state->dlist[i], &dlist[i]);
1080
1081         return vc4_state->dlist_count;
1082 }
1083
1084 u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
1085 {
1086         const struct vc4_plane_state *vc4_state =
1087                 container_of(state, typeof(*vc4_state), base);
1088
1089         return vc4_state->dlist_count;
1090 }
1091
1092 /* Updates the plane to immediately (well, once the FIFO needs
1093  * refilling) scan out from at a new framebuffer.
1094  */
1095 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
1096 {
1097         struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
1098         struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
1099         uint32_t addr;
1100
1101         /* We're skipping the address adjustment for negative origin,
1102          * because this is only called on the primary plane.
1103          */
1104         WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
1105         addr = bo->paddr + fb->offsets[0];
1106
1107         /* Write the new address into the hardware immediately.  The
1108          * scanout will start from this address as soon as the FIFO
1109          * needs to refill with pixels.
1110          */
1111         writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
1112
1113         /* Also update the CPU-side dlist copy, so that any later
1114          * atomic updates that don't do a new modeset on our plane
1115          * also use our updated address.
1116          */
1117         vc4_state->dlist[vc4_state->ptr0_offset] = addr;
1118 }
1119
1120 static void vc4_plane_atomic_async_update(struct drm_plane *plane,
1121                                           struct drm_plane_state *state)
1122 {
1123         struct vc4_plane_state *vc4_state, *new_vc4_state;
1124
1125         swap(plane->state->fb, state->fb);
1126         plane->state->crtc_x = state->crtc_x;
1127         plane->state->crtc_y = state->crtc_y;
1128         plane->state->crtc_w = state->crtc_w;
1129         plane->state->crtc_h = state->crtc_h;
1130         plane->state->src_x = state->src_x;
1131         plane->state->src_y = state->src_y;
1132         plane->state->src_w = state->src_w;
1133         plane->state->src_h = state->src_h;
1134         plane->state->src_h = state->src_h;
1135         plane->state->alpha = state->alpha;
1136         plane->state->pixel_blend_mode = state->pixel_blend_mode;
1137         plane->state->rotation = state->rotation;
1138         plane->state->zpos = state->zpos;
1139         plane->state->normalized_zpos = state->normalized_zpos;
1140         plane->state->color_encoding = state->color_encoding;
1141         plane->state->color_range = state->color_range;
1142         plane->state->src = state->src;
1143         plane->state->dst = state->dst;
1144         plane->state->visible = state->visible;
1145
1146         new_vc4_state = to_vc4_plane_state(state);
1147         vc4_state = to_vc4_plane_state(plane->state);
1148
1149         vc4_state->crtc_x = new_vc4_state->crtc_x;
1150         vc4_state->crtc_y = new_vc4_state->crtc_y;
1151         vc4_state->crtc_h = new_vc4_state->crtc_h;
1152         vc4_state->crtc_w = new_vc4_state->crtc_w;
1153         vc4_state->src_x = new_vc4_state->src_x;
1154         vc4_state->src_y = new_vc4_state->src_y;
1155         memcpy(vc4_state->src_w, new_vc4_state->src_w,
1156                sizeof(vc4_state->src_w));
1157         memcpy(vc4_state->src_h, new_vc4_state->src_h,
1158                sizeof(vc4_state->src_h));
1159         memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
1160                sizeof(vc4_state->x_scaling));
1161         memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
1162                sizeof(vc4_state->y_scaling));
1163         vc4_state->is_unity = new_vc4_state->is_unity;
1164         vc4_state->is_yuv = new_vc4_state->is_yuv;
1165         memcpy(vc4_state->offsets, new_vc4_state->offsets,
1166                sizeof(vc4_state->offsets));
1167         vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;
1168
1169         /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
1170         vc4_state->dlist[vc4_state->pos0_offset] =
1171                 new_vc4_state->dlist[vc4_state->pos0_offset];
1172         vc4_state->dlist[vc4_state->pos2_offset] =
1173                 new_vc4_state->dlist[vc4_state->pos2_offset];
1174         vc4_state->dlist[vc4_state->ptr0_offset] =
1175                 new_vc4_state->dlist[vc4_state->ptr0_offset];
1176
1177         /* Note that we can't just call vc4_plane_write_dlist()
1178          * because that would smash the context data that the HVS is
1179          * currently using.
1180          */
1181         writel(vc4_state->dlist[vc4_state->pos0_offset],
1182                &vc4_state->hw_dlist[vc4_state->pos0_offset]);
1183         writel(vc4_state->dlist[vc4_state->pos2_offset],
1184                &vc4_state->hw_dlist[vc4_state->pos2_offset]);
1185         writel(vc4_state->dlist[vc4_state->ptr0_offset],
1186                &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
1187 }
1188
1189 static int vc4_plane_atomic_async_check(struct drm_plane *plane,
1190                                         struct drm_plane_state *state)
1191 {
1192         struct vc4_plane_state *old_vc4_state, *new_vc4_state;
1193         int ret;
1194         u32 i;
1195
1196         ret = vc4_plane_mode_set(plane, state);
1197         if (ret)
1198                 return ret;
1199
1200         old_vc4_state = to_vc4_plane_state(plane->state);
1201         new_vc4_state = to_vc4_plane_state(state);
1202         if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
1203             old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
1204             old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
1205             old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset ||
1206             vc4_lbm_size(plane->state) != vc4_lbm_size(state))
1207                 return -EINVAL;
1208
1209         /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
1210          * if anything else has changed, fallback to a sync update.
1211          */
1212         for (i = 0; i < new_vc4_state->dlist_count; i++) {
1213                 if (i == new_vc4_state->pos0_offset ||
1214                     i == new_vc4_state->pos2_offset ||
1215                     i == new_vc4_state->ptr0_offset ||
1216                     (new_vc4_state->lbm_offset &&
1217                      i == new_vc4_state->lbm_offset))
1218                         continue;
1219
1220                 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
1221                         return -EINVAL;
1222         }
1223
1224         return 0;
1225 }
1226
1227 static int vc4_prepare_fb(struct drm_plane *plane,
1228                           struct drm_plane_state *state)
1229 {
1230         struct vc4_bo *bo;
1231         int ret;
1232
1233         if (!state->fb)
1234                 return 0;
1235
1236         bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
1237
1238         drm_gem_fb_prepare_fb(plane, state);
1239
1240         if (plane->state->fb == state->fb)
1241                 return 0;
1242
1243         ret = vc4_bo_inc_usecnt(bo);
1244         if (ret)
1245                 return ret;
1246
1247         return 0;
1248 }
1249
1250 static void vc4_cleanup_fb(struct drm_plane *plane,
1251                            struct drm_plane_state *state)
1252 {
1253         struct vc4_bo *bo;
1254
1255         if (plane->state->fb == state->fb || !state->fb)
1256                 return;
1257
1258         bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
1259         vc4_bo_dec_usecnt(bo);
1260 }
1261
1262 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
1263         .atomic_check = vc4_plane_atomic_check,
1264         .atomic_update = vc4_plane_atomic_update,
1265         .prepare_fb = vc4_prepare_fb,
1266         .cleanup_fb = vc4_cleanup_fb,
1267         .atomic_async_check = vc4_plane_atomic_async_check,
1268         .atomic_async_update = vc4_plane_atomic_async_update,
1269 };
1270
1271 static void vc4_plane_destroy(struct drm_plane *plane)
1272 {
1273         drm_plane_cleanup(plane);
1274 }
1275
1276 static bool vc4_format_mod_supported(struct drm_plane *plane,
1277                                      uint32_t format,
1278                                      uint64_t modifier)
1279 {
1280         /* Support T_TILING for RGB formats only. */
1281         switch (format) {
1282         case DRM_FORMAT_XRGB8888:
1283         case DRM_FORMAT_ARGB8888:
1284         case DRM_FORMAT_ABGR8888:
1285         case DRM_FORMAT_XBGR8888:
1286         case DRM_FORMAT_RGB565:
1287         case DRM_FORMAT_BGR565:
1288         case DRM_FORMAT_ARGB1555:
1289         case DRM_FORMAT_XRGB1555:
1290                 switch (fourcc_mod_broadcom_mod(modifier)) {
1291                 case DRM_FORMAT_MOD_LINEAR:
1292                 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
1293                         return true;
1294                 default:
1295                         return false;
1296                 }
1297         case DRM_FORMAT_NV12:
1298         case DRM_FORMAT_NV21:
1299                 switch (fourcc_mod_broadcom_mod(modifier)) {
1300                 case DRM_FORMAT_MOD_LINEAR:
1301                 case DRM_FORMAT_MOD_BROADCOM_SAND64:
1302                 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1303                 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1304                         return true;
1305                 default:
1306                         return false;
1307                 }
1308         case DRM_FORMAT_RGBX1010102:
1309         case DRM_FORMAT_BGRX1010102:
1310         case DRM_FORMAT_RGBA1010102:
1311         case DRM_FORMAT_BGRA1010102:
1312         case DRM_FORMAT_YUV422:
1313         case DRM_FORMAT_YVU422:
1314         case DRM_FORMAT_YUV420:
1315         case DRM_FORMAT_YVU420:
1316         case DRM_FORMAT_NV16:
1317         case DRM_FORMAT_NV61:
1318         default:
1319                 return (modifier == DRM_FORMAT_MOD_LINEAR);
1320         }
1321 }
1322
1323 static const struct drm_plane_funcs vc4_plane_funcs = {
1324         .update_plane = drm_atomic_helper_update_plane,
1325         .disable_plane = drm_atomic_helper_disable_plane,
1326         .destroy = vc4_plane_destroy,
1327         .set_property = NULL,
1328         .reset = vc4_plane_reset,
1329         .atomic_duplicate_state = vc4_plane_duplicate_state,
1330         .atomic_destroy_state = vc4_plane_destroy_state,
1331         .format_mod_supported = vc4_format_mod_supported,
1332 };
1333
1334 struct drm_plane *vc4_plane_init(struct drm_device *dev,
1335                                  enum drm_plane_type type)
1336 {
1337         struct drm_plane *plane = NULL;
1338         struct vc4_plane *vc4_plane;
1339         u32 formats[ARRAY_SIZE(hvs_formats)];
1340         int ret = 0;
1341         unsigned i;
1342         static const uint64_t modifiers[] = {
1343                 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
1344                 DRM_FORMAT_MOD_BROADCOM_SAND128,
1345                 DRM_FORMAT_MOD_BROADCOM_SAND64,
1346                 DRM_FORMAT_MOD_BROADCOM_SAND256,
1347                 DRM_FORMAT_MOD_LINEAR,
1348                 DRM_FORMAT_MOD_INVALID
1349         };
1350
1351         vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane),
1352                                  GFP_KERNEL);
1353         if (!vc4_plane)
1354                 return ERR_PTR(-ENOMEM);
1355
1356         for (i = 0; i < ARRAY_SIZE(hvs_formats); i++)
1357                 formats[i] = hvs_formats[i].drm;
1358
1359         plane = &vc4_plane->base;
1360         ret = drm_universal_plane_init(dev, plane, 0,
1361                                        &vc4_plane_funcs,
1362                                        formats, ARRAY_SIZE(formats),
1363                                        modifiers, type, NULL);
1364
1365         drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
1366
1367         drm_plane_create_alpha_property(plane);
1368         drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
1369                                            DRM_MODE_ROTATE_0 |
1370                                            DRM_MODE_ROTATE_180 |
1371                                            DRM_MODE_REFLECT_X |
1372                                            DRM_MODE_REFLECT_Y);
1373
1374         return plane;
1375 }
1376
1377 int vc4_plane_create_additional_planes(struct drm_device *drm)
1378 {
1379         struct drm_plane *cursor_plane;
1380         struct drm_crtc *crtc;
1381         unsigned int i;
1382
1383         /* Set up some arbitrary number of planes.  We're not limited
1384          * by a set number of physical registers, just the space in
1385          * the HVS (16k) and how small an plane can be (28 bytes).
1386          * However, each plane we set up takes up some memory, and
1387          * increases the cost of looping over planes, which atomic
1388          * modesetting does quite a bit.  As a result, we pick a
1389          * modest number of planes to expose, that should hopefully
1390          * still cover any sane usecase.
1391          */
1392         for (i = 0; i < 16; i++) {
1393                 struct drm_plane *plane =
1394                         vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY);
1395
1396                 if (IS_ERR(plane))
1397                         continue;
1398
1399                 plane->possible_crtcs =
1400                         GENMASK(drm->mode_config.num_crtc - 1, 0);
1401         }
1402
1403         drm_for_each_crtc(crtc, drm) {
1404                 /* Set up the legacy cursor after overlay initialization,
1405                  * since we overlay planes on the CRTC in the order they were
1406                  * initialized.
1407                  */
1408                 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
1409                 if (!IS_ERR(cursor_plane)) {
1410                         cursor_plane->possible_crtcs = drm_crtc_mask(crtc);
1411                         crtc->cursor = cursor_plane;
1412                 }
1413         }
1414
1415         return 0;
1416 }