1 // SPDX-License-Identifier: GPL-2.0-or-later
5 * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com>
6 * Copyright (c) 2018 Bootlin
9 #include <linux/delay.h>
10 #include <linux/types.h>
12 #include <media/videobuf2-dma-contig.h>
15 #include "cedrus_hw.h"
16 #include "cedrus_regs.h"
18 enum cedrus_h264_sram_off {
19 CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE = 0x000,
20 CEDRUS_SRAM_H264_FRAMEBUFFER_LIST = 0x100,
21 CEDRUS_SRAM_H264_REF_LIST_0 = 0x190,
22 CEDRUS_SRAM_H264_REF_LIST_1 = 0x199,
23 CEDRUS_SRAM_H264_SCALING_LIST_8x8_0 = 0x200,
24 CEDRUS_SRAM_H264_SCALING_LIST_8x8_1 = 0x210,
25 CEDRUS_SRAM_H264_SCALING_LIST_4x4 = 0x220,
28 struct cedrus_h264_sram_ref_pic {
29 __le32 top_field_order_cnt;
30 __le32 bottom_field_order_cnt;
34 __le32 mv_col_top_ptr;
35 __le32 mv_col_bot_ptr;
39 #define CEDRUS_H264_FRAME_NUM 18
41 #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (16 * SZ_1K)
42 #define CEDRUS_MIN_PIC_INFO_BUF_SIZE (130 * SZ_1K)
44 static void cedrus_h264_write_sram(struct cedrus_dev *dev,
45 enum cedrus_h264_sram_off off,
46 const void *data, size_t len)
48 const u32 *buffer = data;
49 size_t count = DIV_ROUND_UP(len, 4);
51 cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2);
54 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
57 static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_ctx *ctx,
58 unsigned int position,
61 dma_addr_t addr = ctx->codec.h264.mv_col_buf_dma;
63 /* Adjust for the position */
64 addr += position * ctx->codec.h264.mv_col_buf_field_size * 2;
66 /* Adjust for the field */
67 addr += field * ctx->codec.h264.mv_col_buf_field_size;
72 static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
73 struct cedrus_buffer *buf,
74 unsigned int top_field_order_cnt,
75 unsigned int bottom_field_order_cnt,
76 struct cedrus_h264_sram_ref_pic *pic)
78 struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf;
79 unsigned int position = buf->codec.h264.position;
81 pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt);
82 pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt);
83 pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8);
85 pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0));
86 pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1));
88 cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 0));
90 cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 1));
93 static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
94 struct cedrus_run *run)
96 struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM];
97 const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
98 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
99 const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
100 struct vb2_queue *cap_q;
101 struct cedrus_buffer *output_buf;
102 struct cedrus_dev *dev = ctx->dev;
103 unsigned long used_dpbs = 0;
104 unsigned int position;
105 unsigned int output = 0;
108 cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
110 memset(pic_list, 0, sizeof(pic_list));
112 for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) {
113 const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i];
114 struct cedrus_buffer *cedrus_buf;
117 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
120 buf_idx = vb2_find_timestamp(cap_q, dpb->reference_ts, 0);
124 cedrus_buf = vb2_to_cedrus_buffer(cap_q->bufs[buf_idx]);
125 position = cedrus_buf->codec.h264.position;
126 used_dpbs |= BIT(position);
128 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
131 cedrus_fill_ref_pic(ctx, cedrus_buf,
132 dpb->top_field_order_cnt,
133 dpb->bottom_field_order_cnt,
134 &pic_list[position]);
136 output = max(position, output);
139 position = find_next_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM,
141 if (position >= CEDRUS_H264_FRAME_NUM)
142 position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM);
144 output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
145 output_buf->codec.h264.position = position;
147 if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)
148 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD;
149 else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
150 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF;
152 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME;
154 cedrus_fill_ref_pic(ctx, output_buf,
155 decode->top_field_order_cnt,
156 decode->bottom_field_order_cnt,
157 &pic_list[position]);
159 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST,
160 pic_list, sizeof(pic_list));
162 cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position);
165 #define CEDRUS_MAX_REF_IDX 32
167 static void _cedrus_write_ref_list(struct cedrus_ctx *ctx,
168 struct cedrus_run *run,
169 const struct v4l2_h264_reference *ref_list,
170 u8 num_ref, enum cedrus_h264_sram_off sram)
172 const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
173 struct vb2_queue *cap_q;
174 struct cedrus_dev *dev = ctx->dev;
175 u8 sram_array[CEDRUS_MAX_REF_IDX];
179 cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
181 memset(sram_array, 0, sizeof(sram_array));
183 for (i = 0; i < num_ref; i++) {
184 const struct v4l2_h264_dpb_entry *dpb;
185 const struct cedrus_buffer *cedrus_buf;
186 const struct vb2_v4l2_buffer *ref_buf;
187 unsigned int position;
191 dpb_idx = ref_list[i].index;
192 dpb = &decode->dpb[dpb_idx];
194 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
197 buf_idx = vb2_find_timestamp(cap_q, dpb->reference_ts, 0);
201 ref_buf = to_vb2_v4l2_buffer(cap_q->bufs[buf_idx]);
202 cedrus_buf = vb2_v4l2_to_cedrus_buffer(ref_buf);
203 position = cedrus_buf->codec.h264.position;
205 sram_array[i] |= position << 1;
206 if (ref_buf->field == V4L2_FIELD_BOTTOM)
207 sram_array[i] |= BIT(0);
210 size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array));
211 cedrus_h264_write_sram(dev, sram, &sram_array, size);
214 static void cedrus_write_ref_list0(struct cedrus_ctx *ctx,
215 struct cedrus_run *run)
217 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
219 _cedrus_write_ref_list(ctx, run,
220 slice->ref_pic_list0,
221 slice->num_ref_idx_l0_active_minus1 + 1,
222 CEDRUS_SRAM_H264_REF_LIST_0);
225 static void cedrus_write_ref_list1(struct cedrus_ctx *ctx,
226 struct cedrus_run *run)
228 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
230 _cedrus_write_ref_list(ctx, run,
231 slice->ref_pic_list1,
232 slice->num_ref_idx_l1_active_minus1 + 1,
233 CEDRUS_SRAM_H264_REF_LIST_1);
236 static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx,
237 struct cedrus_run *run)
239 const struct v4l2_ctrl_h264_scaling_matrix *scaling =
240 run->h264.scaling_matrix;
241 struct cedrus_dev *dev = ctx->dev;
243 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0,
244 scaling->scaling_list_8x8[0],
245 sizeof(scaling->scaling_list_8x8[0]));
247 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1,
248 scaling->scaling_list_8x8[1],
249 sizeof(scaling->scaling_list_8x8[1]));
251 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4,
252 scaling->scaling_list_4x4,
253 sizeof(scaling->scaling_list_4x4));
256 static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx,
257 struct cedrus_run *run)
259 const struct v4l2_ctrl_h264_pred_weights *pred_weight =
260 run->h264.pred_weights;
261 struct cedrus_dev *dev = ctx->dev;
264 cedrus_write(dev, VE_H264_SHS_WP,
265 ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) |
266 ((pred_weight->luma_log2_weight_denom & 0x7) << 0));
268 cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET,
269 CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2);
271 for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) {
272 const struct v4l2_h264_weight_factors *factors =
273 &pred_weight->weight_factors[i];
275 for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) {
278 val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) |
279 (factors->luma_weight[j] & 0x1ff);
280 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
283 for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) {
284 for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) {
287 val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) |
288 (factors->chroma_weight[j][k] & 0x1ff);
289 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
296 * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In
297 * rare cases frame is not decoded correctly. However, setting offset to 0 and
298 * skipping appropriate amount of bits with flush bits trigger always works.
300 static void cedrus_skip_bits(struct cedrus_dev *dev, int num)
304 while (count < num) {
305 int tmp = min(num - count, 32);
307 cedrus_write(dev, VE_H264_TRIGGER_TYPE,
308 VE_H264_TRIGGER_TYPE_FLUSH_BITS |
309 VE_H264_TRIGGER_TYPE_N_BITS(tmp));
310 while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY)
317 static void cedrus_set_params(struct cedrus_ctx *ctx,
318 struct cedrus_run *run)
320 const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
321 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
322 const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
323 const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
324 struct vb2_buffer *src_buf = &run->src->vb2_buf;
325 struct cedrus_dev *dev = ctx->dev;
326 dma_addr_t src_buf_addr;
327 size_t slice_bytes = vb2_get_plane_payload(src_buf, 0);
328 unsigned int pic_width_in_mbs;
332 cedrus_write(dev, VE_H264_VLD_LEN, slice_bytes * 8);
333 cedrus_write(dev, VE_H264_VLD_OFFSET, 0);
335 src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
336 cedrus_write(dev, VE_H264_VLD_END, src_buf_addr + slice_bytes);
337 cedrus_write(dev, VE_H264_VLD_ADDR,
338 VE_H264_VLD_ADDR_VAL(src_buf_addr) |
339 VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID |
340 VE_H264_VLD_ADDR_LAST);
342 if (ctx->src_fmt.width > 2048) {
343 cedrus_write(dev, VE_BUF_CTRL,
344 VE_BUF_CTRL_INTRAPRED_MIXED_RAM |
345 VE_BUF_CTRL_DBLK_MIXED_RAM);
346 cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR,
347 ctx->codec.h264.deblk_buf_dma);
348 cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR,
349 ctx->codec.h264.intra_pred_buf_dma);
351 cedrus_write(dev, VE_BUF_CTRL,
352 VE_BUF_CTRL_INTRAPRED_INT_SRAM |
353 VE_BUF_CTRL_DBLK_INT_SRAM);
357 * FIXME: Since the bitstream parsing is done in software, and
358 * in userspace, this shouldn't be needed anymore. But it
359 * turns out that removing it breaks the decoding process,
360 * without any clear indication why.
362 cedrus_write(dev, VE_H264_TRIGGER_TYPE,
363 VE_H264_TRIGGER_TYPE_INIT_SWDEC);
365 cedrus_skip_bits(dev, slice->header_bit_size);
367 if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice))
368 cedrus_write_pred_weight_table(ctx, run);
370 if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) ||
371 (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) ||
372 (slice->slice_type == V4L2_H264_SLICE_TYPE_B))
373 cedrus_write_ref_list0(ctx, run);
375 if (slice->slice_type == V4L2_H264_SLICE_TYPE_B)
376 cedrus_write_ref_list1(ctx, run);
378 // picture parameters
381 * FIXME: the kernel headers are allowing the default value to
382 * be passed, but the libva doesn't give us that.
384 reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10;
385 reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5;
386 reg |= (pps->weighted_bipred_idc & 0x3) << 2;
387 if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
388 reg |= VE_H264_PPS_ENTROPY_CODING_MODE;
389 if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
390 reg |= VE_H264_PPS_WEIGHTED_PRED;
391 if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
392 reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED;
393 if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE)
394 reg |= VE_H264_PPS_TRANSFORM_8X8_MODE;
395 cedrus_write(dev, VE_H264_PPS, reg);
397 // sequence parameters
399 reg |= (sps->chroma_format_idc & 0x7) << 19;
400 reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8;
401 reg |= sps->pic_height_in_map_units_minus1 & 0xff;
402 if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
403 reg |= VE_H264_SPS_MBS_ONLY;
404 if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
405 reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD;
406 if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
407 reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE;
408 cedrus_write(dev, VE_H264_SPS, reg);
410 mbaff_pic = !(slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC) &&
411 (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
412 pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1;
416 reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24;
417 reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) *
418 (mbaff_pic + 1)) & 0xff) << 16;
419 reg |= decode->nal_ref_idc ? BIT(12) : 0;
420 reg |= (slice->slice_type & 0xf) << 8;
421 reg |= slice->cabac_init_idc & 0x3;
422 if (ctx->fh.m2m_ctx->new_frame)
423 reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC;
424 if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)
425 reg |= VE_H264_SHS_FIELD_PIC;
426 if (slice->flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
427 reg |= VE_H264_SHS_BOTTOM_FIELD;
428 if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED)
429 reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED;
430 cedrus_write(dev, VE_H264_SHS, reg);
433 reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD;
434 reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24;
435 reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16;
436 reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8;
437 reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4;
438 reg |= slice->slice_beta_offset_div2 & 0xf;
439 cedrus_write(dev, VE_H264_SHS2, reg);
442 reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16;
443 reg |= (pps->chroma_qp_index_offset & 0x3f) << 8;
444 reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f;
445 cedrus_write(dev, VE_H264_SHS_QP, reg);
447 // clear status flags
448 cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS));
451 cedrus_write(dev, VE_H264_CTRL,
452 VE_H264_CTRL_SLICE_DECODE_INT |
453 VE_H264_CTRL_DECODE_ERR_INT |
454 VE_H264_CTRL_VLD_DATA_REQ_INT);
457 static enum cedrus_irq_status
458 cedrus_h264_irq_status(struct cedrus_ctx *ctx)
460 struct cedrus_dev *dev = ctx->dev;
461 u32 reg = cedrus_read(dev, VE_H264_STATUS);
463 if (reg & (VE_H264_STATUS_DECODE_ERR_INT |
464 VE_H264_STATUS_VLD_DATA_REQ_INT))
465 return CEDRUS_IRQ_ERROR;
467 if (reg & VE_H264_CTRL_SLICE_DECODE_INT)
468 return CEDRUS_IRQ_OK;
470 return CEDRUS_IRQ_NONE;
473 static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx)
475 struct cedrus_dev *dev = ctx->dev;
477 cedrus_write(dev, VE_H264_STATUS,
478 VE_H264_STATUS_INT_MASK);
481 static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx)
483 struct cedrus_dev *dev = ctx->dev;
484 u32 reg = cedrus_read(dev, VE_H264_CTRL);
486 cedrus_write(dev, VE_H264_CTRL,
487 reg & ~VE_H264_CTRL_INT_MASK);
490 static void cedrus_h264_setup(struct cedrus_ctx *ctx,
491 struct cedrus_run *run)
493 struct cedrus_dev *dev = ctx->dev;
495 cedrus_engine_enable(ctx, CEDRUS_CODEC_H264);
497 cedrus_write(dev, VE_H264_SDROT_CTRL, 0);
498 cedrus_write(dev, VE_H264_EXTRA_BUFFER1,
499 ctx->codec.h264.pic_info_buf_dma);
500 cedrus_write(dev, VE_H264_EXTRA_BUFFER2,
501 ctx->codec.h264.neighbor_info_buf_dma);
503 cedrus_write_scaling_lists(ctx, run);
504 cedrus_write_frame_list(ctx, run);
506 cedrus_set_params(ctx, run);
509 static int cedrus_h264_start(struct cedrus_ctx *ctx)
511 struct cedrus_dev *dev = ctx->dev;
512 unsigned int pic_info_size;
513 unsigned int field_size;
514 unsigned int mv_col_size;
517 /* Formula for picture buffer size is taken from CedarX source. */
519 if (ctx->src_fmt.width > 2048)
520 pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000;
522 pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000;
525 * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set,
526 * there is no need to multiply by 2.
528 pic_info_size += ctx->src_fmt.height * 2 * 64;
530 if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE)
531 pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE;
533 ctx->codec.h264.pic_info_buf_size = pic_info_size;
534 ctx->codec.h264.pic_info_buf =
535 dma_alloc_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
536 &ctx->codec.h264.pic_info_buf_dma,
538 if (!ctx->codec.h264.pic_info_buf)
542 * That buffer is supposed to be 16kiB in size, and be aligned
543 * on 16kiB as well. However, dma_alloc_coherent provides the
544 * guarantee that we'll have a CPU and DMA address aligned on
545 * the smallest page order that is greater to the requested
546 * size, so we don't have to overallocate.
548 ctx->codec.h264.neighbor_info_buf =
549 dma_alloc_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
550 &ctx->codec.h264.neighbor_info_buf_dma,
552 if (!ctx->codec.h264.neighbor_info_buf) {
557 field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
558 DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
561 * FIXME: This is actually conditional to
562 * V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE not being set, we
563 * might have to rework this if memory efficiency ever is
564 * something we need to work on.
566 field_size = field_size * 2;
569 * FIXME: This is actually conditional to
570 * V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY not being set, we might
571 * have to rework this if memory efficiency ever is something
572 * we need to work on.
574 field_size = field_size * 2;
575 ctx->codec.h264.mv_col_buf_field_size = field_size;
577 mv_col_size = field_size * 2 * CEDRUS_H264_FRAME_NUM;
578 ctx->codec.h264.mv_col_buf_size = mv_col_size;
579 ctx->codec.h264.mv_col_buf = dma_alloc_coherent(dev->dev,
580 ctx->codec.h264.mv_col_buf_size,
581 &ctx->codec.h264.mv_col_buf_dma,
583 if (!ctx->codec.h264.mv_col_buf) {
585 goto err_neighbor_buf;
588 if (ctx->src_fmt.width > 2048) {
590 * Formulas for deblock and intra prediction buffer sizes
591 * are taken from CedarX source.
594 ctx->codec.h264.deblk_buf_size =
595 ALIGN(ctx->src_fmt.width, 32) * 12;
596 ctx->codec.h264.deblk_buf =
597 dma_alloc_coherent(dev->dev,
598 ctx->codec.h264.deblk_buf_size,
599 &ctx->codec.h264.deblk_buf_dma,
601 if (!ctx->codec.h264.deblk_buf) {
607 * NOTE: Multiplying by two deviates from CedarX logic, but it
608 * is for some unknown reason needed for H264 4K decoding on H6.
610 ctx->codec.h264.intra_pred_buf_size =
611 ALIGN(ctx->src_fmt.width, 64) * 5 * 2;
612 ctx->codec.h264.intra_pred_buf =
613 dma_alloc_coherent(dev->dev,
614 ctx->codec.h264.intra_pred_buf_size,
615 &ctx->codec.h264.intra_pred_buf_dma,
617 if (!ctx->codec.h264.intra_pred_buf) {
626 dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size,
627 ctx->codec.h264.deblk_buf,
628 ctx->codec.h264.deblk_buf_dma);
631 dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size,
632 ctx->codec.h264.mv_col_buf,
633 ctx->codec.h264.mv_col_buf_dma);
636 dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
637 ctx->codec.h264.neighbor_info_buf,
638 ctx->codec.h264.neighbor_info_buf_dma);
641 dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
642 ctx->codec.h264.pic_info_buf,
643 ctx->codec.h264.pic_info_buf_dma);
647 static void cedrus_h264_stop(struct cedrus_ctx *ctx)
649 struct cedrus_dev *dev = ctx->dev;
651 dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size,
652 ctx->codec.h264.mv_col_buf,
653 ctx->codec.h264.mv_col_buf_dma);
654 dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
655 ctx->codec.h264.neighbor_info_buf,
656 ctx->codec.h264.neighbor_info_buf_dma);
657 dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
658 ctx->codec.h264.pic_info_buf,
659 ctx->codec.h264.pic_info_buf_dma);
660 if (ctx->codec.h264.deblk_buf_size)
661 dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size,
662 ctx->codec.h264.deblk_buf,
663 ctx->codec.h264.deblk_buf_dma);
664 if (ctx->codec.h264.intra_pred_buf_size)
665 dma_free_coherent(dev->dev, ctx->codec.h264.intra_pred_buf_size,
666 ctx->codec.h264.intra_pred_buf,
667 ctx->codec.h264.intra_pred_buf_dma);
670 static void cedrus_h264_trigger(struct cedrus_ctx *ctx)
672 struct cedrus_dev *dev = ctx->dev;
674 cedrus_write(dev, VE_H264_TRIGGER_TYPE,
675 VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE);
678 struct cedrus_dec_ops cedrus_dec_ops_h264 = {
679 .irq_clear = cedrus_h264_irq_clear,
680 .irq_disable = cedrus_h264_irq_disable,
681 .irq_status = cedrus_h264_irq_status,
682 .setup = cedrus_h264_setup,
683 .start = cedrus_h264_start,
684 .stop = cedrus_h264_stop,
685 .trigger = cedrus_h264_trigger,