media: uapi: h264: Drop SLICE_PARAMS 'size' field
[linux-2.6-microblaze.git] / drivers / staging / media / sunxi / cedrus / cedrus_h264.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Cedrus VPU driver
4  *
5  * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com>
6  * Copyright (c) 2018 Bootlin
7  */
8
9 #include <linux/delay.h>
10 #include <linux/types.h>
11
12 #include <media/videobuf2-dma-contig.h>
13
14 #include "cedrus.h"
15 #include "cedrus_hw.h"
16 #include "cedrus_regs.h"
17
18 enum cedrus_h264_sram_off {
19         CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE      = 0x000,
20         CEDRUS_SRAM_H264_FRAMEBUFFER_LIST       = 0x100,
21         CEDRUS_SRAM_H264_REF_LIST_0             = 0x190,
22         CEDRUS_SRAM_H264_REF_LIST_1             = 0x199,
23         CEDRUS_SRAM_H264_SCALING_LIST_8x8_0     = 0x200,
24         CEDRUS_SRAM_H264_SCALING_LIST_8x8_1     = 0x210,
25         CEDRUS_SRAM_H264_SCALING_LIST_4x4       = 0x220,
26 };
27
28 struct cedrus_h264_sram_ref_pic {
29         __le32  top_field_order_cnt;
30         __le32  bottom_field_order_cnt;
31         __le32  frame_info;
32         __le32  luma_ptr;
33         __le32  chroma_ptr;
34         __le32  mv_col_top_ptr;
35         __le32  mv_col_bot_ptr;
36         __le32  reserved;
37 } __packed;
38
39 #define CEDRUS_H264_FRAME_NUM           18
40
41 #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE   (16 * SZ_1K)
42 #define CEDRUS_MIN_PIC_INFO_BUF_SIZE       (130 * SZ_1K)
43
44 static void cedrus_h264_write_sram(struct cedrus_dev *dev,
45                                    enum cedrus_h264_sram_off off,
46                                    const void *data, size_t len)
47 {
48         const u32 *buffer = data;
49         size_t count = DIV_ROUND_UP(len, 4);
50
51         cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2);
52
53         while (count--)
54                 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
55 }
56
57 static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_ctx *ctx,
58                                               unsigned int position,
59                                               unsigned int field)
60 {
61         dma_addr_t addr = ctx->codec.h264.mv_col_buf_dma;
62
63         /* Adjust for the position */
64         addr += position * ctx->codec.h264.mv_col_buf_field_size * 2;
65
66         /* Adjust for the field */
67         addr += field * ctx->codec.h264.mv_col_buf_field_size;
68
69         return addr;
70 }
71
72 static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
73                                 struct cedrus_buffer *buf,
74                                 unsigned int top_field_order_cnt,
75                                 unsigned int bottom_field_order_cnt,
76                                 struct cedrus_h264_sram_ref_pic *pic)
77 {
78         struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf;
79         unsigned int position = buf->codec.h264.position;
80
81         pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt);
82         pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt);
83         pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8);
84
85         pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0));
86         pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1));
87         pic->mv_col_top_ptr =
88                 cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 0));
89         pic->mv_col_bot_ptr =
90                 cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 1));
91 }
92
93 static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
94                                     struct cedrus_run *run)
95 {
96         struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM];
97         const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
98         const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
99         const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
100         struct vb2_queue *cap_q;
101         struct cedrus_buffer *output_buf;
102         struct cedrus_dev *dev = ctx->dev;
103         unsigned long used_dpbs = 0;
104         unsigned int position;
105         unsigned int output = 0;
106         unsigned int i;
107
108         cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
109
110         memset(pic_list, 0, sizeof(pic_list));
111
112         for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) {
113                 const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i];
114                 struct cedrus_buffer *cedrus_buf;
115                 int buf_idx;
116
117                 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
118                         continue;
119
120                 buf_idx = vb2_find_timestamp(cap_q, dpb->reference_ts, 0);
121                 if (buf_idx < 0)
122                         continue;
123
124                 cedrus_buf = vb2_to_cedrus_buffer(cap_q->bufs[buf_idx]);
125                 position = cedrus_buf->codec.h264.position;
126                 used_dpbs |= BIT(position);
127
128                 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
129                         continue;
130
131                 cedrus_fill_ref_pic(ctx, cedrus_buf,
132                                     dpb->top_field_order_cnt,
133                                     dpb->bottom_field_order_cnt,
134                                     &pic_list[position]);
135
136                 output = max(position, output);
137         }
138
139         position = find_next_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM,
140                                       output);
141         if (position >= CEDRUS_H264_FRAME_NUM)
142                 position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM);
143
144         output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
145         output_buf->codec.h264.position = position;
146
147         if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)
148                 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD;
149         else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
150                 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF;
151         else
152                 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME;
153
154         cedrus_fill_ref_pic(ctx, output_buf,
155                             decode->top_field_order_cnt,
156                             decode->bottom_field_order_cnt,
157                             &pic_list[position]);
158
159         cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST,
160                                pic_list, sizeof(pic_list));
161
162         cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position);
163 }
164
165 #define CEDRUS_MAX_REF_IDX      32
166
167 static void _cedrus_write_ref_list(struct cedrus_ctx *ctx,
168                                    struct cedrus_run *run,
169                                    const struct v4l2_h264_reference *ref_list,
170                                    u8 num_ref, enum cedrus_h264_sram_off sram)
171 {
172         const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
173         struct vb2_queue *cap_q;
174         struct cedrus_dev *dev = ctx->dev;
175         u8 sram_array[CEDRUS_MAX_REF_IDX];
176         unsigned int i;
177         size_t size;
178
179         cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
180
181         memset(sram_array, 0, sizeof(sram_array));
182
183         for (i = 0; i < num_ref; i++) {
184                 const struct v4l2_h264_dpb_entry *dpb;
185                 const struct cedrus_buffer *cedrus_buf;
186                 const struct vb2_v4l2_buffer *ref_buf;
187                 unsigned int position;
188                 int buf_idx;
189                 u8 dpb_idx;
190
191                 dpb_idx = ref_list[i].index;
192                 dpb = &decode->dpb[dpb_idx];
193
194                 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
195                         continue;
196
197                 buf_idx = vb2_find_timestamp(cap_q, dpb->reference_ts, 0);
198                 if (buf_idx < 0)
199                         continue;
200
201                 ref_buf = to_vb2_v4l2_buffer(cap_q->bufs[buf_idx]);
202                 cedrus_buf = vb2_v4l2_to_cedrus_buffer(ref_buf);
203                 position = cedrus_buf->codec.h264.position;
204
205                 sram_array[i] |= position << 1;
206                 if (ref_buf->field == V4L2_FIELD_BOTTOM)
207                         sram_array[i] |= BIT(0);
208         }
209
210         size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array));
211         cedrus_h264_write_sram(dev, sram, &sram_array, size);
212 }
213
214 static void cedrus_write_ref_list0(struct cedrus_ctx *ctx,
215                                    struct cedrus_run *run)
216 {
217         const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
218
219         _cedrus_write_ref_list(ctx, run,
220                                slice->ref_pic_list0,
221                                slice->num_ref_idx_l0_active_minus1 + 1,
222                                CEDRUS_SRAM_H264_REF_LIST_0);
223 }
224
225 static void cedrus_write_ref_list1(struct cedrus_ctx *ctx,
226                                    struct cedrus_run *run)
227 {
228         const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
229
230         _cedrus_write_ref_list(ctx, run,
231                                slice->ref_pic_list1,
232                                slice->num_ref_idx_l1_active_minus1 + 1,
233                                CEDRUS_SRAM_H264_REF_LIST_1);
234 }
235
236 static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx,
237                                        struct cedrus_run *run)
238 {
239         const struct v4l2_ctrl_h264_scaling_matrix *scaling =
240                 run->h264.scaling_matrix;
241         struct cedrus_dev *dev = ctx->dev;
242
243         cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0,
244                                scaling->scaling_list_8x8[0],
245                                sizeof(scaling->scaling_list_8x8[0]));
246
247         cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1,
248                                scaling->scaling_list_8x8[1],
249                                sizeof(scaling->scaling_list_8x8[1]));
250
251         cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4,
252                                scaling->scaling_list_4x4,
253                                sizeof(scaling->scaling_list_4x4));
254 }
255
256 static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx,
257                                            struct cedrus_run *run)
258 {
259         const struct v4l2_ctrl_h264_pred_weights *pred_weight =
260                 run->h264.pred_weights;
261         struct cedrus_dev *dev = ctx->dev;
262         int i, j, k;
263
264         cedrus_write(dev, VE_H264_SHS_WP,
265                      ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) |
266                      ((pred_weight->luma_log2_weight_denom & 0x7) << 0));
267
268         cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET,
269                      CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2);
270
271         for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) {
272                 const struct v4l2_h264_weight_factors *factors =
273                         &pred_weight->weight_factors[i];
274
275                 for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) {
276                         u32 val;
277
278                         val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) |
279                                 (factors->luma_weight[j] & 0x1ff);
280                         cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
281                 }
282
283                 for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) {
284                         for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) {
285                                 u32 val;
286
287                                 val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) |
288                                         (factors->chroma_weight[j][k] & 0x1ff);
289                                 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
290                         }
291                 }
292         }
293 }
294
295 /*
296  * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In
297  * rare cases frame is not decoded correctly. However, setting offset to 0 and
298  * skipping appropriate amount of bits with flush bits trigger always works.
299  */
300 static void cedrus_skip_bits(struct cedrus_dev *dev, int num)
301 {
302         int count = 0;
303
304         while (count < num) {
305                 int tmp = min(num - count, 32);
306
307                 cedrus_write(dev, VE_H264_TRIGGER_TYPE,
308                              VE_H264_TRIGGER_TYPE_FLUSH_BITS |
309                              VE_H264_TRIGGER_TYPE_N_BITS(tmp));
310                 while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY)
311                         udelay(1);
312
313                 count += tmp;
314         }
315 }
316
317 static void cedrus_set_params(struct cedrus_ctx *ctx,
318                               struct cedrus_run *run)
319 {
320         const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
321         const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
322         const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
323         const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
324         struct vb2_buffer *src_buf = &run->src->vb2_buf;
325         struct cedrus_dev *dev = ctx->dev;
326         dma_addr_t src_buf_addr;
327         size_t slice_bytes = vb2_get_plane_payload(src_buf, 0);
328         unsigned int pic_width_in_mbs;
329         bool mbaff_pic;
330         u32 reg;
331
332         cedrus_write(dev, VE_H264_VLD_LEN, slice_bytes * 8);
333         cedrus_write(dev, VE_H264_VLD_OFFSET, 0);
334
335         src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
336         cedrus_write(dev, VE_H264_VLD_END, src_buf_addr + slice_bytes);
337         cedrus_write(dev, VE_H264_VLD_ADDR,
338                      VE_H264_VLD_ADDR_VAL(src_buf_addr) |
339                      VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID |
340                      VE_H264_VLD_ADDR_LAST);
341
342         if (ctx->src_fmt.width > 2048) {
343                 cedrus_write(dev, VE_BUF_CTRL,
344                              VE_BUF_CTRL_INTRAPRED_MIXED_RAM |
345                              VE_BUF_CTRL_DBLK_MIXED_RAM);
346                 cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR,
347                              ctx->codec.h264.deblk_buf_dma);
348                 cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR,
349                              ctx->codec.h264.intra_pred_buf_dma);
350         } else {
351                 cedrus_write(dev, VE_BUF_CTRL,
352                              VE_BUF_CTRL_INTRAPRED_INT_SRAM |
353                              VE_BUF_CTRL_DBLK_INT_SRAM);
354         }
355
356         /*
357          * FIXME: Since the bitstream parsing is done in software, and
358          * in userspace, this shouldn't be needed anymore. But it
359          * turns out that removing it breaks the decoding process,
360          * without any clear indication why.
361          */
362         cedrus_write(dev, VE_H264_TRIGGER_TYPE,
363                      VE_H264_TRIGGER_TYPE_INIT_SWDEC);
364
365         cedrus_skip_bits(dev, slice->header_bit_size);
366
367         if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice))
368                 cedrus_write_pred_weight_table(ctx, run);
369
370         if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) ||
371             (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) ||
372             (slice->slice_type == V4L2_H264_SLICE_TYPE_B))
373                 cedrus_write_ref_list0(ctx, run);
374
375         if (slice->slice_type == V4L2_H264_SLICE_TYPE_B)
376                 cedrus_write_ref_list1(ctx, run);
377
378         // picture parameters
379         reg = 0;
380         /*
381          * FIXME: the kernel headers are allowing the default value to
382          * be passed, but the libva doesn't give us that.
383          */
384         reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10;
385         reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5;
386         reg |= (pps->weighted_bipred_idc & 0x3) << 2;
387         if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
388                 reg |= VE_H264_PPS_ENTROPY_CODING_MODE;
389         if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
390                 reg |= VE_H264_PPS_WEIGHTED_PRED;
391         if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
392                 reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED;
393         if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE)
394                 reg |= VE_H264_PPS_TRANSFORM_8X8_MODE;
395         cedrus_write(dev, VE_H264_PPS, reg);
396
397         // sequence parameters
398         reg = 0;
399         reg |= (sps->chroma_format_idc & 0x7) << 19;
400         reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8;
401         reg |= sps->pic_height_in_map_units_minus1 & 0xff;
402         if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
403                 reg |= VE_H264_SPS_MBS_ONLY;
404         if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
405                 reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD;
406         if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
407                 reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE;
408         cedrus_write(dev, VE_H264_SPS, reg);
409
410         mbaff_pic = !(slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC) &&
411                     (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
412         pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1;
413
414         // slice parameters
415         reg = 0;
416         reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24;
417         reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) *
418                  (mbaff_pic + 1)) & 0xff) << 16;
419         reg |= decode->nal_ref_idc ? BIT(12) : 0;
420         reg |= (slice->slice_type & 0xf) << 8;
421         reg |= slice->cabac_init_idc & 0x3;
422         if (ctx->fh.m2m_ctx->new_frame)
423                 reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC;
424         if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)
425                 reg |= VE_H264_SHS_FIELD_PIC;
426         if (slice->flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
427                 reg |= VE_H264_SHS_BOTTOM_FIELD;
428         if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED)
429                 reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED;
430         cedrus_write(dev, VE_H264_SHS, reg);
431
432         reg = 0;
433         reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD;
434         reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24;
435         reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16;
436         reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8;
437         reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4;
438         reg |= slice->slice_beta_offset_div2 & 0xf;
439         cedrus_write(dev, VE_H264_SHS2, reg);
440
441         reg = 0;
442         reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16;
443         reg |= (pps->chroma_qp_index_offset & 0x3f) << 8;
444         reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f;
445         cedrus_write(dev, VE_H264_SHS_QP, reg);
446
447         // clear status flags
448         cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS));
449
450         // enable int
451         cedrus_write(dev, VE_H264_CTRL,
452                      VE_H264_CTRL_SLICE_DECODE_INT |
453                      VE_H264_CTRL_DECODE_ERR_INT |
454                      VE_H264_CTRL_VLD_DATA_REQ_INT);
455 }
456
457 static enum cedrus_irq_status
458 cedrus_h264_irq_status(struct cedrus_ctx *ctx)
459 {
460         struct cedrus_dev *dev = ctx->dev;
461         u32 reg = cedrus_read(dev, VE_H264_STATUS);
462
463         if (reg & (VE_H264_STATUS_DECODE_ERR_INT |
464                    VE_H264_STATUS_VLD_DATA_REQ_INT))
465                 return CEDRUS_IRQ_ERROR;
466
467         if (reg & VE_H264_CTRL_SLICE_DECODE_INT)
468                 return CEDRUS_IRQ_OK;
469
470         return CEDRUS_IRQ_NONE;
471 }
472
473 static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx)
474 {
475         struct cedrus_dev *dev = ctx->dev;
476
477         cedrus_write(dev, VE_H264_STATUS,
478                      VE_H264_STATUS_INT_MASK);
479 }
480
481 static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx)
482 {
483         struct cedrus_dev *dev = ctx->dev;
484         u32 reg = cedrus_read(dev, VE_H264_CTRL);
485
486         cedrus_write(dev, VE_H264_CTRL,
487                      reg & ~VE_H264_CTRL_INT_MASK);
488 }
489
490 static void cedrus_h264_setup(struct cedrus_ctx *ctx,
491                               struct cedrus_run *run)
492 {
493         struct cedrus_dev *dev = ctx->dev;
494
495         cedrus_engine_enable(ctx, CEDRUS_CODEC_H264);
496
497         cedrus_write(dev, VE_H264_SDROT_CTRL, 0);
498         cedrus_write(dev, VE_H264_EXTRA_BUFFER1,
499                      ctx->codec.h264.pic_info_buf_dma);
500         cedrus_write(dev, VE_H264_EXTRA_BUFFER2,
501                      ctx->codec.h264.neighbor_info_buf_dma);
502
503         cedrus_write_scaling_lists(ctx, run);
504         cedrus_write_frame_list(ctx, run);
505
506         cedrus_set_params(ctx, run);
507 }
508
509 static int cedrus_h264_start(struct cedrus_ctx *ctx)
510 {
511         struct cedrus_dev *dev = ctx->dev;
512         unsigned int pic_info_size;
513         unsigned int field_size;
514         unsigned int mv_col_size;
515         int ret;
516
517         /* Formula for picture buffer size is taken from CedarX source. */
518
519         if (ctx->src_fmt.width > 2048)
520                 pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000;
521         else
522                 pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000;
523
524         /*
525          * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set,
526          * there is no need to multiply by 2.
527          */
528         pic_info_size += ctx->src_fmt.height * 2 * 64;
529
530         if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE)
531                 pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE;
532
533         ctx->codec.h264.pic_info_buf_size = pic_info_size;
534         ctx->codec.h264.pic_info_buf =
535                 dma_alloc_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
536                                    &ctx->codec.h264.pic_info_buf_dma,
537                                    GFP_KERNEL);
538         if (!ctx->codec.h264.pic_info_buf)
539                 return -ENOMEM;
540
541         /*
542          * That buffer is supposed to be 16kiB in size, and be aligned
543          * on 16kiB as well. However, dma_alloc_coherent provides the
544          * guarantee that we'll have a CPU and DMA address aligned on
545          * the smallest page order that is greater to the requested
546          * size, so we don't have to overallocate.
547          */
548         ctx->codec.h264.neighbor_info_buf =
549                 dma_alloc_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
550                                    &ctx->codec.h264.neighbor_info_buf_dma,
551                                    GFP_KERNEL);
552         if (!ctx->codec.h264.neighbor_info_buf) {
553                 ret = -ENOMEM;
554                 goto err_pic_buf;
555         }
556
557         field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
558                 DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
559
560         /*
561          * FIXME: This is actually conditional to
562          * V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE not being set, we
563          * might have to rework this if memory efficiency ever is
564          * something we need to work on.
565          */
566         field_size = field_size * 2;
567
568         /*
569          * FIXME: This is actually conditional to
570          * V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY not being set, we might
571          * have to rework this if memory efficiency ever is something
572          * we need to work on.
573          */
574         field_size = field_size * 2;
575         ctx->codec.h264.mv_col_buf_field_size = field_size;
576
577         mv_col_size = field_size * 2 * CEDRUS_H264_FRAME_NUM;
578         ctx->codec.h264.mv_col_buf_size = mv_col_size;
579         ctx->codec.h264.mv_col_buf = dma_alloc_coherent(dev->dev,
580                                                         ctx->codec.h264.mv_col_buf_size,
581                                                         &ctx->codec.h264.mv_col_buf_dma,
582                                                         GFP_KERNEL);
583         if (!ctx->codec.h264.mv_col_buf) {
584                 ret = -ENOMEM;
585                 goto err_neighbor_buf;
586         }
587
588         if (ctx->src_fmt.width > 2048) {
589                 /*
590                  * Formulas for deblock and intra prediction buffer sizes
591                  * are taken from CedarX source.
592                  */
593
594                 ctx->codec.h264.deblk_buf_size =
595                         ALIGN(ctx->src_fmt.width, 32) * 12;
596                 ctx->codec.h264.deblk_buf =
597                         dma_alloc_coherent(dev->dev,
598                                            ctx->codec.h264.deblk_buf_size,
599                                            &ctx->codec.h264.deblk_buf_dma,
600                                            GFP_KERNEL);
601                 if (!ctx->codec.h264.deblk_buf) {
602                         ret = -ENOMEM;
603                         goto err_mv_col_buf;
604                 }
605
606                 /*
607                  * NOTE: Multiplying by two deviates from CedarX logic, but it
608                  * is for some unknown reason needed for H264 4K decoding on H6.
609                  */
610                 ctx->codec.h264.intra_pred_buf_size =
611                         ALIGN(ctx->src_fmt.width, 64) * 5 * 2;
612                 ctx->codec.h264.intra_pred_buf =
613                         dma_alloc_coherent(dev->dev,
614                                            ctx->codec.h264.intra_pred_buf_size,
615                                            &ctx->codec.h264.intra_pred_buf_dma,
616                                            GFP_KERNEL);
617                 if (!ctx->codec.h264.intra_pred_buf) {
618                         ret = -ENOMEM;
619                         goto err_deblk_buf;
620                 }
621         }
622
623         return 0;
624
625 err_deblk_buf:
626         dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size,
627                           ctx->codec.h264.deblk_buf,
628                           ctx->codec.h264.deblk_buf_dma);
629
630 err_mv_col_buf:
631         dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size,
632                           ctx->codec.h264.mv_col_buf,
633                           ctx->codec.h264.mv_col_buf_dma);
634
635 err_neighbor_buf:
636         dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
637                           ctx->codec.h264.neighbor_info_buf,
638                           ctx->codec.h264.neighbor_info_buf_dma);
639
640 err_pic_buf:
641         dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
642                           ctx->codec.h264.pic_info_buf,
643                           ctx->codec.h264.pic_info_buf_dma);
644         return ret;
645 }
646
647 static void cedrus_h264_stop(struct cedrus_ctx *ctx)
648 {
649         struct cedrus_dev *dev = ctx->dev;
650
651         dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size,
652                           ctx->codec.h264.mv_col_buf,
653                           ctx->codec.h264.mv_col_buf_dma);
654         dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
655                           ctx->codec.h264.neighbor_info_buf,
656                           ctx->codec.h264.neighbor_info_buf_dma);
657         dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
658                           ctx->codec.h264.pic_info_buf,
659                           ctx->codec.h264.pic_info_buf_dma);
660         if (ctx->codec.h264.deblk_buf_size)
661                 dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size,
662                                   ctx->codec.h264.deblk_buf,
663                                   ctx->codec.h264.deblk_buf_dma);
664         if (ctx->codec.h264.intra_pred_buf_size)
665                 dma_free_coherent(dev->dev, ctx->codec.h264.intra_pred_buf_size,
666                                   ctx->codec.h264.intra_pred_buf,
667                                   ctx->codec.h264.intra_pred_buf_dma);
668 }
669
670 static void cedrus_h264_trigger(struct cedrus_ctx *ctx)
671 {
672         struct cedrus_dev *dev = ctx->dev;
673
674         cedrus_write(dev, VE_H264_TRIGGER_TYPE,
675                      VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE);
676 }
677
678 struct cedrus_dec_ops cedrus_dec_ops_h264 = {
679         .irq_clear      = cedrus_h264_irq_clear,
680         .irq_disable    = cedrus_h264_irq_disable,
681         .irq_status     = cedrus_h264_irq_status,
682         .setup          = cedrus_h264_setup,
683         .start          = cedrus_h264_start,
684         .stop           = cedrus_h264_stop,
685         .trigger        = cedrus_h264_trigger,
686 };