Linux 6.9-rc1
[linux-2.6-microblaze.git] / drivers / gpu / drm / imx / dcss / dcss-scaler.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright 2019 NXP.
4  *
5  * Scaling algorithms were contributed by Dzung Hoang <dzung.hoang@nxp.com>
6  */
7
8 #include <linux/device.h>
9 #include <linux/slab.h>
10
11 #include "dcss-dev.h"
12
13 #define DCSS_SCALER_CTRL                        0x00
14 #define   SCALER_EN                             BIT(0)
15 #define   REPEAT_EN                             BIT(4)
16 #define   SCALE2MEM_EN                          BIT(8)
17 #define   MEM2OFIFO_EN                          BIT(12)
18 #define DCSS_SCALER_OFIFO_CTRL                  0x04
19 #define   OFIFO_LOW_THRES_POS                   0
20 #define   OFIFO_LOW_THRES_MASK                  GENMASK(9, 0)
21 #define   OFIFO_HIGH_THRES_POS                  16
22 #define   OFIFO_HIGH_THRES_MASK                 GENMASK(25, 16)
23 #define   UNDERRUN_DETECT_CLR                   BIT(26)
24 #define   LOW_THRES_DETECT_CLR                  BIT(27)
25 #define   HIGH_THRES_DETECT_CLR                 BIT(28)
26 #define   UNDERRUN_DETECT_EN                    BIT(29)
27 #define   LOW_THRES_DETECT_EN                   BIT(30)
28 #define   HIGH_THRES_DETECT_EN                  BIT(31)
29 #define DCSS_SCALER_SDATA_CTRL                  0x08
30 #define   YUV_EN                                BIT(0)
31 #define   RTRAM_8LINES                          BIT(1)
32 #define   Y_UV_BYTE_SWAP                        BIT(4)
33 #define   A2R10G10B10_FORMAT_POS                8
34 #define   A2R10G10B10_FORMAT_MASK               GENMASK(11, 8)
35 #define DCSS_SCALER_BIT_DEPTH                   0x0C
36 #define   LUM_BIT_DEPTH_POS                     0
37 #define   LUM_BIT_DEPTH_MASK                    GENMASK(1, 0)
38 #define   CHR_BIT_DEPTH_POS                     4
39 #define   CHR_BIT_DEPTH_MASK                    GENMASK(5, 4)
40 #define DCSS_SCALER_SRC_FORMAT                  0x10
41 #define DCSS_SCALER_DST_FORMAT                  0x14
42 #define   FORMAT_MASK                           GENMASK(1, 0)
43 #define DCSS_SCALER_SRC_LUM_RES                 0x18
44 #define DCSS_SCALER_SRC_CHR_RES                 0x1C
45 #define DCSS_SCALER_DST_LUM_RES                 0x20
46 #define DCSS_SCALER_DST_CHR_RES                 0x24
47 #define   WIDTH_POS                             0
48 #define   WIDTH_MASK                            GENMASK(11, 0)
49 #define   HEIGHT_POS                            16
50 #define   HEIGHT_MASK                           GENMASK(27, 16)
51 #define DCSS_SCALER_V_LUM_START                 0x48
52 #define   V_START_MASK                          GENMASK(15, 0)
53 #define DCSS_SCALER_V_LUM_INC                   0x4C
54 #define   V_INC_MASK                            GENMASK(15, 0)
55 #define DCSS_SCALER_H_LUM_START                 0x50
56 #define   H_START_MASK                          GENMASK(18, 0)
57 #define DCSS_SCALER_H_LUM_INC                   0x54
58 #define   H_INC_MASK                            GENMASK(15, 0)
59 #define DCSS_SCALER_V_CHR_START                 0x58
60 #define DCSS_SCALER_V_CHR_INC                   0x5C
61 #define DCSS_SCALER_H_CHR_START                 0x60
62 #define DCSS_SCALER_H_CHR_INC                   0x64
63 #define DCSS_SCALER_COEF_VLUM                   0x80
64 #define DCSS_SCALER_COEF_HLUM                   0x140
65 #define DCSS_SCALER_COEF_VCHR                   0x200
66 #define DCSS_SCALER_COEF_HCHR                   0x300
67
68 struct dcss_scaler_ch {
69         void __iomem *base_reg;
70         u32 base_ofs;
71         struct dcss_scaler *scl;
72
73         u32 sdata_ctrl;
74         u32 scaler_ctrl;
75
76         bool scaler_ctrl_chgd;
77
78         u32 c_vstart;
79         u32 c_hstart;
80
81         bool use_nn_interpolation;
82 };
83
84 struct dcss_scaler {
85         struct device *dev;
86
87         struct dcss_ctxld *ctxld;
88         u32 ctx_id;
89
90         struct dcss_scaler_ch ch[3];
91 };
92
93 /* scaler coefficients generator */
94 #define PSC_FRAC_BITS 30
95 #define PSC_FRAC_SCALE BIT(PSC_FRAC_BITS)
96 #define PSC_BITS_FOR_PHASE 4
97 #define PSC_NUM_PHASES 16
98 #define PSC_STORED_PHASES (PSC_NUM_PHASES / 2 + 1)
99 #define PSC_NUM_TAPS 7
100 #define PSC_NUM_TAPS_RGBA 5
101 #define PSC_COEFF_PRECISION 10
102 #define PSC_PHASE_FRACTION_BITS 13
103 #define PSC_PHASE_MASK (PSC_NUM_PHASES - 1)
104 #define PSC_Q_FRACTION 19
105 #define PSC_Q_ROUND_OFFSET (1 << (PSC_Q_FRACTION - 1))
106
107 /**
108  * mult_q() - Performs fixed-point multiplication.
109  * @A: multiplier
110  * @B: multiplicand
111  */
112 static int mult_q(int A, int B)
113 {
114         int result;
115         s64 temp;
116
117         temp = (int64_t)A * (int64_t)B;
118         temp += PSC_Q_ROUND_OFFSET;
119         result = (int)(temp >> PSC_Q_FRACTION);
120         return result;
121 }
122
123 /**
124  * div_q() - Performs fixed-point division.
125  * @A: dividend
126  * @B: divisor
127  */
128 static int div_q(int A, int B)
129 {
130         int result;
131         s64 temp;
132
133         temp = (int64_t)A << PSC_Q_FRACTION;
134         if ((temp >= 0 && B >= 0) || (temp < 0 && B < 0))
135                 temp += B / 2;
136         else
137                 temp -= B / 2;
138
139         result = (int)(temp / B);
140         return result;
141 }
142
143 /**
144  * exp_approx_q() - Compute approximation to exp(x) function using Taylor
145  *                  series.
146  * @x: fixed-point argument of exp function
147  */
148 static int exp_approx_q(int x)
149 {
150         int sum = 1 << PSC_Q_FRACTION;
151         int term = 1 << PSC_Q_FRACTION;
152
153         term = mult_q(term, div_q(x, 1 << PSC_Q_FRACTION));
154         sum += term;
155         term = mult_q(term, div_q(x, 2 << PSC_Q_FRACTION));
156         sum += term;
157         term = mult_q(term, div_q(x, 3 << PSC_Q_FRACTION));
158         sum += term;
159         term = mult_q(term, div_q(x, 4 << PSC_Q_FRACTION));
160         sum += term;
161
162         return sum;
163 }
164
165 /**
166  * dcss_scaler_gaussian_filter() - Generate gaussian prototype filter.
167  * @fc_q: fixed-point cutoff frequency normalized to range [0, 1]
168  * @use_5_taps: indicates whether to use 5 taps or 7 taps
169  * @coef: output filter coefficients
170  */
171 static void dcss_scaler_gaussian_filter(int fc_q, bool use_5_taps,
172                                         bool phase0_identity,
173                                         int coef[][PSC_NUM_TAPS])
174 {
175         int sigma_q, g0_q, g1_q, g2_q;
176         int tap_cnt1, tap_cnt2, tap_idx, phase_cnt;
177         int mid;
178         int phase;
179         int i;
180         int taps;
181
182         if (use_5_taps)
183                 for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
184                         coef[phase][0] = 0;
185                         coef[phase][PSC_NUM_TAPS - 1] = 0;
186                 }
187
188         /* seed coefficient scanner */
189         taps = use_5_taps ? PSC_NUM_TAPS_RGBA : PSC_NUM_TAPS;
190         mid = (PSC_NUM_PHASES * taps) / 2 - 1;
191         phase_cnt = (PSC_NUM_PHASES * (PSC_NUM_TAPS + 1)) / 2;
192         tap_cnt1 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
193         tap_cnt2 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
194
195         /* seed gaussian filter generator */
196         sigma_q = div_q(PSC_Q_ROUND_OFFSET, fc_q);
197         g0_q = 1 << PSC_Q_FRACTION;
198         g1_q = exp_approx_q(div_q(-PSC_Q_ROUND_OFFSET,
199                                   mult_q(sigma_q, sigma_q)));
200         g2_q = mult_q(g1_q, g1_q);
201         coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = g0_q;
202
203         for (i = 0; i < mid; i++) {
204                 phase_cnt++;
205                 tap_cnt1--;
206                 tap_cnt2++;
207
208                 g0_q = mult_q(g0_q, g1_q);
209                 g1_q = mult_q(g1_q, g2_q);
210
211                 if ((phase_cnt & PSC_PHASE_MASK) <= 8) {
212                         tap_idx = tap_cnt1 >> PSC_BITS_FOR_PHASE;
213                         coef[phase_cnt & PSC_PHASE_MASK][tap_idx] = g0_q;
214                 }
215                 if (((-phase_cnt) & PSC_PHASE_MASK) <= 8) {
216                         tap_idx = tap_cnt2 >> PSC_BITS_FOR_PHASE;
217                         coef[(-phase_cnt) & PSC_PHASE_MASK][tap_idx] = g0_q;
218                 }
219         }
220
221         phase_cnt++;
222         tap_cnt1--;
223         coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = 0;
224
225         /* override phase 0 with identity filter if specified */
226         if (phase0_identity)
227                 for (i = 0; i < PSC_NUM_TAPS; i++)
228                         coef[0][i] = i == (PSC_NUM_TAPS >> 1) ?
229                                                 (1 << PSC_COEFF_PRECISION) : 0;
230
231         /* normalize coef */
232         for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
233                 int sum = 0;
234                 s64 ll_temp;
235
236                 for (i = 0; i < PSC_NUM_TAPS; i++)
237                         sum += coef[phase][i];
238                 for (i = 0; i < PSC_NUM_TAPS; i++) {
239                         ll_temp = coef[phase][i];
240                         ll_temp <<= PSC_COEFF_PRECISION;
241                         ll_temp += sum >> 1;
242                         ll_temp /= sum;
243                         coef[phase][i] = (int)ll_temp;
244                 }
245         }
246 }
247
248 static void dcss_scaler_nearest_neighbor_filter(bool use_5_taps,
249                                                 int coef[][PSC_NUM_TAPS])
250 {
251         int i, j;
252
253         for (i = 0; i < PSC_STORED_PHASES; i++)
254                 for (j = 0; j < PSC_NUM_TAPS; j++)
255                         coef[i][j] = j == PSC_NUM_TAPS >> 1 ?
256                                                 (1 << PSC_COEFF_PRECISION) : 0;
257 }
258
259 /**
260  * dcss_scaler_filter_design() - Compute filter coefficients using
261  *                               Gaussian filter.
262  * @src_length: length of input
263  * @dst_length: length of output
264  * @use_5_taps: 0 for 7 taps per phase, 1 for 5 taps
265  * @coef: output coefficients
266  */
267 static void dcss_scaler_filter_design(int src_length, int dst_length,
268                                       bool use_5_taps, bool phase0_identity,
269                                       int coef[][PSC_NUM_TAPS],
270                                       bool nn_interpolation)
271 {
272         int fc_q;
273
274         /* compute cutoff frequency */
275         if (dst_length >= src_length)
276                 fc_q = div_q(1, PSC_NUM_PHASES);
277         else
278                 fc_q = div_q(dst_length, src_length * PSC_NUM_PHASES);
279
280         if (nn_interpolation)
281                 dcss_scaler_nearest_neighbor_filter(use_5_taps, coef);
282         else
283                 /* compute gaussian filter coefficients */
284                 dcss_scaler_gaussian_filter(fc_q, use_5_taps, phase0_identity, coef);
285 }
286
287 static void dcss_scaler_write(struct dcss_scaler_ch *ch, u32 val, u32 ofs)
288 {
289         struct dcss_scaler *scl = ch->scl;
290
291         dcss_ctxld_write(scl->ctxld, scl->ctx_id, val, ch->base_ofs + ofs);
292 }
293
294 static int dcss_scaler_ch_init_all(struct dcss_scaler *scl,
295                                    unsigned long scaler_base)
296 {
297         struct dcss_scaler_ch *ch;
298         int i;
299
300         for (i = 0; i < 3; i++) {
301                 ch = &scl->ch[i];
302
303                 ch->base_ofs = scaler_base + i * 0x400;
304
305                 ch->base_reg = devm_ioremap(scl->dev, ch->base_ofs, SZ_4K);
306                 if (!ch->base_reg) {
307                         dev_err(scl->dev, "scaler: unable to remap ch base\n");
308                         return -ENOMEM;
309                 }
310
311                 ch->scl = scl;
312         }
313
314         return 0;
315 }
316
317 int dcss_scaler_init(struct dcss_dev *dcss, unsigned long scaler_base)
318 {
319         struct dcss_scaler *scaler;
320
321         scaler = devm_kzalloc(dcss->dev, sizeof(*scaler), GFP_KERNEL);
322         if (!scaler)
323                 return -ENOMEM;
324
325         dcss->scaler = scaler;
326         scaler->dev = dcss->dev;
327         scaler->ctxld = dcss->ctxld;
328         scaler->ctx_id = CTX_SB_HP;
329
330         if (dcss_scaler_ch_init_all(scaler, scaler_base))
331                 return -ENOMEM;
332
333         return 0;
334 }
335
336 void dcss_scaler_exit(struct dcss_scaler *scl)
337 {
338         int ch_no;
339
340         for (ch_no = 0; ch_no < 3; ch_no++) {
341                 struct dcss_scaler_ch *ch = &scl->ch[ch_no];
342
343                 dcss_writel(0, ch->base_reg + DCSS_SCALER_CTRL);
344         }
345 }
346
347 void dcss_scaler_ch_enable(struct dcss_scaler *scl, int ch_num, bool en)
348 {
349         struct dcss_scaler_ch *ch = &scl->ch[ch_num];
350         u32 scaler_ctrl;
351
352         scaler_ctrl = en ? SCALER_EN | REPEAT_EN : 0;
353
354         if (en)
355                 dcss_scaler_write(ch, ch->sdata_ctrl, DCSS_SCALER_SDATA_CTRL);
356
357         if (ch->scaler_ctrl != scaler_ctrl)
358                 ch->scaler_ctrl_chgd = true;
359
360         ch->scaler_ctrl = scaler_ctrl;
361 }
362
363 static void dcss_scaler_yuv_enable(struct dcss_scaler_ch *ch, bool en)
364 {
365         ch->sdata_ctrl &= ~YUV_EN;
366         ch->sdata_ctrl |= en ? YUV_EN : 0;
367 }
368
369 static void dcss_scaler_rtr_8lines_enable(struct dcss_scaler_ch *ch, bool en)
370 {
371         ch->sdata_ctrl &= ~RTRAM_8LINES;
372         ch->sdata_ctrl |= en ? RTRAM_8LINES : 0;
373 }
374
375 static void dcss_scaler_bit_depth_set(struct dcss_scaler_ch *ch, int depth)
376 {
377         u32 val;
378
379         val = depth == 30 ? 2 : 0;
380
381         dcss_scaler_write(ch,
382                           ((val << CHR_BIT_DEPTH_POS) & CHR_BIT_DEPTH_MASK) |
383                           ((val << LUM_BIT_DEPTH_POS) & LUM_BIT_DEPTH_MASK),
384                           DCSS_SCALER_BIT_DEPTH);
385 }
386
387 enum buffer_format {
388         BUF_FMT_YUV420,
389         BUF_FMT_YUV422,
390         BUF_FMT_ARGB8888_YUV444,
391 };
392
393 enum chroma_location {
394         PSC_LOC_HORZ_0_VERT_1_OVER_4 = 0,
395         PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4 = 1,
396         PSC_LOC_HORZ_0_VERT_0 = 2,
397         PSC_LOC_HORZ_1_OVER_4_VERT_0 = 3,
398         PSC_LOC_HORZ_0_VERT_1_OVER_2 = 4,
399         PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2 = 5
400 };
401
402 static void dcss_scaler_format_set(struct dcss_scaler_ch *ch,
403                                    enum buffer_format src_fmt,
404                                    enum buffer_format dst_fmt)
405 {
406         dcss_scaler_write(ch, src_fmt, DCSS_SCALER_SRC_FORMAT);
407         dcss_scaler_write(ch, dst_fmt, DCSS_SCALER_DST_FORMAT);
408 }
409
410 static void dcss_scaler_res_set(struct dcss_scaler_ch *ch,
411                                 int src_xres, int src_yres,
412                                 int dst_xres, int dst_yres,
413                                 u32 pix_format, enum buffer_format dst_format)
414 {
415         u32 lsrc_xres, lsrc_yres, csrc_xres, csrc_yres;
416         u32 ldst_xres, ldst_yres, cdst_xres, cdst_yres;
417         bool src_is_444 = true;
418
419         lsrc_xres = src_xres;
420         csrc_xres = src_xres;
421         lsrc_yres = src_yres;
422         csrc_yres = src_yres;
423         ldst_xres = dst_xres;
424         cdst_xres = dst_xres;
425         ldst_yres = dst_yres;
426         cdst_yres = dst_yres;
427
428         if (pix_format == DRM_FORMAT_UYVY || pix_format == DRM_FORMAT_VYUY ||
429             pix_format == DRM_FORMAT_YUYV || pix_format == DRM_FORMAT_YVYU) {
430                 csrc_xres >>= 1;
431                 src_is_444 = false;
432         } else if (pix_format == DRM_FORMAT_NV12 ||
433                    pix_format == DRM_FORMAT_NV21) {
434                 csrc_xres >>= 1;
435                 csrc_yres >>= 1;
436                 src_is_444 = false;
437         }
438
439         if (dst_format == BUF_FMT_YUV422)
440                 cdst_xres >>= 1;
441
442         /* for 4:4:4 to 4:2:2 conversion, source height should be 1 less */
443         if (src_is_444 && dst_format == BUF_FMT_YUV422) {
444                 lsrc_yres--;
445                 csrc_yres--;
446         }
447
448         dcss_scaler_write(ch, (((lsrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
449                                (((lsrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
450                           DCSS_SCALER_SRC_LUM_RES);
451         dcss_scaler_write(ch, (((csrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
452                                (((csrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
453                           DCSS_SCALER_SRC_CHR_RES);
454         dcss_scaler_write(ch, (((ldst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
455                                (((ldst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
456                           DCSS_SCALER_DST_LUM_RES);
457         dcss_scaler_write(ch, (((cdst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
458                                (((cdst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
459                           DCSS_SCALER_DST_CHR_RES);
460 }
461
462 #define downscale_fp(factor, fp_pos)            ((factor) << (fp_pos))
463 #define upscale_fp(factor, fp_pos)              ((1 << (fp_pos)) / (factor))
464
465 struct dcss_scaler_factors {
466         int downscale;
467         int upscale;
468 };
469
470 static const struct dcss_scaler_factors dcss_scaler_factors[] = {
471         {3, 8}, {5, 8}, {5, 8},
472 };
473
474 static void dcss_scaler_fractions_set(struct dcss_scaler_ch *ch,
475                                       int src_xres, int src_yres,
476                                       int dst_xres, int dst_yres,
477                                       u32 src_format, u32 dst_format,
478                                       enum chroma_location src_chroma_loc)
479 {
480         int src_c_xres, src_c_yres, dst_c_xres, dst_c_yres;
481         u32 l_vinc, l_hinc, c_vinc, c_hinc;
482         u32 c_vstart, c_hstart;
483
484         src_c_xres = src_xres;
485         src_c_yres = src_yres;
486         dst_c_xres = dst_xres;
487         dst_c_yres = dst_yres;
488
489         c_vstart = 0;
490         c_hstart = 0;
491
492         /* adjustments for source chroma location */
493         if (src_format == BUF_FMT_YUV420) {
494                 /* vertical input chroma position adjustment */
495                 switch (src_chroma_loc) {
496                 case PSC_LOC_HORZ_0_VERT_1_OVER_4:
497                 case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
498                         /*
499                          * move chroma up to first luma line
500                          * (1/4 chroma input line spacing)
501                          */
502                         c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
503                         break;
504                 case PSC_LOC_HORZ_0_VERT_1_OVER_2:
505                 case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
506                         /*
507                          * move chroma up to first luma line
508                          * (1/2 chroma input line spacing)
509                          */
510                         c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 1));
511                         break;
512                 default:
513                         break;
514                 }
515                 /* horizontal input chroma position adjustment */
516                 switch (src_chroma_loc) {
517                 case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
518                 case PSC_LOC_HORZ_1_OVER_4_VERT_0:
519                 case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
520                         /* move chroma left 1/4 chroma input sample spacing */
521                         c_hstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
522                         break;
523                 default:
524                         break;
525                 }
526         }
527
528         /* adjustments to chroma resolution */
529         if (src_format == BUF_FMT_YUV420) {
530                 src_c_xres >>= 1;
531                 src_c_yres >>= 1;
532         } else if (src_format == BUF_FMT_YUV422) {
533                 src_c_xres >>= 1;
534         }
535
536         if (dst_format == BUF_FMT_YUV422)
537                 dst_c_xres >>= 1;
538
539         l_vinc = ((src_yres << 13) + (dst_yres >> 1)) / dst_yres;
540         c_vinc = ((src_c_yres << 13) + (dst_c_yres >> 1)) / dst_c_yres;
541         l_hinc = ((src_xres << 13) + (dst_xres >> 1)) / dst_xres;
542         c_hinc = ((src_c_xres << 13) + (dst_c_xres >> 1)) / dst_c_xres;
543
544         /* save chroma start phase */
545         ch->c_vstart = c_vstart;
546         ch->c_hstart = c_hstart;
547
548         dcss_scaler_write(ch, 0, DCSS_SCALER_V_LUM_START);
549         dcss_scaler_write(ch, l_vinc, DCSS_SCALER_V_LUM_INC);
550
551         dcss_scaler_write(ch, 0, DCSS_SCALER_H_LUM_START);
552         dcss_scaler_write(ch, l_hinc, DCSS_SCALER_H_LUM_INC);
553
554         dcss_scaler_write(ch, c_vstart, DCSS_SCALER_V_CHR_START);
555         dcss_scaler_write(ch, c_vinc, DCSS_SCALER_V_CHR_INC);
556
557         dcss_scaler_write(ch, c_hstart, DCSS_SCALER_H_CHR_START);
558         dcss_scaler_write(ch, c_hinc, DCSS_SCALER_H_CHR_INC);
559 }
560
561 int dcss_scaler_get_min_max_ratios(struct dcss_scaler *scl, int ch_num,
562                                    int *min, int *max)
563 {
564         *min = upscale_fp(dcss_scaler_factors[ch_num].upscale, 16);
565         *max = downscale_fp(dcss_scaler_factors[ch_num].downscale, 16);
566
567         return 0;
568 }
569
570 static void dcss_scaler_program_5_coef_set(struct dcss_scaler_ch *ch,
571                                            int base_addr,
572                                            int coef[][PSC_NUM_TAPS])
573 {
574         int i, phase;
575
576         for (i = 0; i < PSC_STORED_PHASES; i++) {
577                 dcss_scaler_write(ch, ((coef[i][1] & 0xfff) << 16 |
578                                        (coef[i][2] & 0xfff) << 4  |
579                                        (coef[i][3] & 0xf00) >> 8),
580                                   base_addr + i * sizeof(u32));
581                 dcss_scaler_write(ch, ((coef[i][3] & 0x0ff) << 20 |
582                                        (coef[i][4] & 0xfff) << 8  |
583                                        (coef[i][5] & 0xff0) >> 4),
584                                   base_addr + 0x40 + i * sizeof(u32));
585                 dcss_scaler_write(ch, ((coef[i][5] & 0x00f) << 24),
586                                   base_addr + 0x80 + i * sizeof(u32));
587         }
588
589         /* reverse both phase and tap orderings */
590         for (phase = (PSC_NUM_PHASES >> 1) - 1;
591                         i < PSC_NUM_PHASES; i++, phase--) {
592                 dcss_scaler_write(ch, ((coef[phase][5] & 0xfff) << 16 |
593                                        (coef[phase][4] & 0xfff) << 4  |
594                                        (coef[phase][3] & 0xf00) >> 8),
595                                   base_addr + i * sizeof(u32));
596                 dcss_scaler_write(ch, ((coef[phase][3] & 0x0ff) << 20 |
597                                        (coef[phase][2] & 0xfff) << 8  |
598                                        (coef[phase][1] & 0xff0) >> 4),
599                                   base_addr + 0x40 + i * sizeof(u32));
600                 dcss_scaler_write(ch, ((coef[phase][1] & 0x00f) << 24),
601                                   base_addr + 0x80 + i * sizeof(u32));
602         }
603 }
604
605 static void dcss_scaler_program_7_coef_set(struct dcss_scaler_ch *ch,
606                                            int base_addr,
607                                            int coef[][PSC_NUM_TAPS])
608 {
609         int i, phase;
610
611         for (i = 0; i < PSC_STORED_PHASES; i++) {
612                 dcss_scaler_write(ch, ((coef[i][0] & 0xfff) << 16 |
613                                        (coef[i][1] & 0xfff) << 4  |
614                                        (coef[i][2] & 0xf00) >> 8),
615                                   base_addr + i * sizeof(u32));
616                 dcss_scaler_write(ch, ((coef[i][2] & 0x0ff) << 20 |
617                                        (coef[i][3] & 0xfff) << 8  |
618                                        (coef[i][4] & 0xff0) >> 4),
619                                   base_addr + 0x40 + i * sizeof(u32));
620                 dcss_scaler_write(ch, ((coef[i][4] & 0x00f) << 24 |
621                                        (coef[i][5] & 0xfff) << 12 |
622                                        (coef[i][6] & 0xfff)),
623                                   base_addr + 0x80 + i * sizeof(u32));
624         }
625
626         /* reverse both phase and tap orderings */
627         for (phase = (PSC_NUM_PHASES >> 1) - 1;
628                         i < PSC_NUM_PHASES; i++, phase--) {
629                 dcss_scaler_write(ch, ((coef[phase][6] & 0xfff) << 16 |
630                                        (coef[phase][5] & 0xfff) << 4  |
631                                        (coef[phase][4] & 0xf00) >> 8),
632                                   base_addr + i * sizeof(u32));
633                 dcss_scaler_write(ch, ((coef[phase][4] & 0x0ff) << 20 |
634                                        (coef[phase][3] & 0xfff) << 8  |
635                                        (coef[phase][2] & 0xff0) >> 4),
636                                   base_addr + 0x40 + i * sizeof(u32));
637                 dcss_scaler_write(ch, ((coef[phase][2] & 0x00f) << 24 |
638                                        (coef[phase][1] & 0xfff) << 12 |
639                                        (coef[phase][0] & 0xfff)),
640                                   base_addr + 0x80 + i * sizeof(u32));
641         }
642 }
643
644 static void dcss_scaler_yuv_coef_set(struct dcss_scaler_ch *ch,
645                                      enum buffer_format src_format,
646                                      enum buffer_format dst_format,
647                                      bool use_5_taps,
648                                      int src_xres, int src_yres, int dst_xres,
649                                      int dst_yres)
650 {
651         int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
652         bool program_5_taps = use_5_taps ||
653                               (dst_format == BUF_FMT_YUV422 &&
654                                src_format == BUF_FMT_ARGB8888_YUV444);
655
656         /* horizontal luma */
657         dcss_scaler_filter_design(src_xres, dst_xres, false,
658                                   src_xres == dst_xres, coef,
659                                   ch->use_nn_interpolation);
660         dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
661
662         /* vertical luma */
663         dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
664                                   src_yres == dst_yres, coef,
665                                   ch->use_nn_interpolation);
666
667         if (program_5_taps)
668                 dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
669         else
670                 dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
671
672         /* adjust chroma resolution */
673         if (src_format != BUF_FMT_ARGB8888_YUV444)
674                 src_xres >>= 1;
675         if (src_format == BUF_FMT_YUV420)
676                 src_yres >>= 1;
677         if (dst_format != BUF_FMT_ARGB8888_YUV444)
678                 dst_xres >>= 1;
679         if (dst_format == BUF_FMT_YUV420) /* should not happen */
680                 dst_yres >>= 1;
681
682         /* horizontal chroma */
683         dcss_scaler_filter_design(src_xres, dst_xres, false,
684                                   (src_xres == dst_xres) && (ch->c_hstart == 0),
685                                   coef, ch->use_nn_interpolation);
686
687         dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HCHR, coef);
688
689         /* vertical chroma */
690         dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
691                                   (src_yres == dst_yres) && (ch->c_vstart == 0),
692                                   coef, ch->use_nn_interpolation);
693         if (program_5_taps)
694                 dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
695         else
696                 dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
697 }
698
699 static void dcss_scaler_rgb_coef_set(struct dcss_scaler_ch *ch,
700                                      int src_xres, int src_yres, int dst_xres,
701                                      int dst_yres)
702 {
703         int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
704
705         /* horizontal RGB */
706         dcss_scaler_filter_design(src_xres, dst_xres, false,
707                                   src_xres == dst_xres, coef,
708                                   ch->use_nn_interpolation);
709         dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
710
711         /* vertical RGB */
712         dcss_scaler_filter_design(src_yres, dst_yres, false,
713                                   src_yres == dst_yres, coef,
714                                   ch->use_nn_interpolation);
715         dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
716 }
717
718 static void dcss_scaler_set_rgb10_order(struct dcss_scaler_ch *ch,
719                                         const struct drm_format_info *format)
720 {
721         u32 a2r10g10b10_format;
722
723         if (format->is_yuv)
724                 return;
725
726         ch->sdata_ctrl &= ~A2R10G10B10_FORMAT_MASK;
727
728         if (format->depth != 30)
729                 return;
730
731         switch (format->format) {
732         case DRM_FORMAT_ARGB2101010:
733         case DRM_FORMAT_XRGB2101010:
734                 a2r10g10b10_format = 0;
735                 break;
736
737         case DRM_FORMAT_ABGR2101010:
738         case DRM_FORMAT_XBGR2101010:
739                 a2r10g10b10_format = 5;
740                 break;
741
742         case DRM_FORMAT_RGBA1010102:
743         case DRM_FORMAT_RGBX1010102:
744                 a2r10g10b10_format = 6;
745                 break;
746
747         case DRM_FORMAT_BGRA1010102:
748         case DRM_FORMAT_BGRX1010102:
749                 a2r10g10b10_format = 11;
750                 break;
751
752         default:
753                 a2r10g10b10_format = 0;
754                 break;
755         }
756
757         ch->sdata_ctrl |= a2r10g10b10_format << A2R10G10B10_FORMAT_POS;
758 }
759
760 void dcss_scaler_set_filter(struct dcss_scaler *scl, int ch_num,
761                             enum drm_scaling_filter scaling_filter)
762 {
763         struct dcss_scaler_ch *ch = &scl->ch[ch_num];
764
765         ch->use_nn_interpolation = scaling_filter == DRM_SCALING_FILTER_NEAREST_NEIGHBOR;
766 }
767
768 void dcss_scaler_setup(struct dcss_scaler *scl, int ch_num,
769                        const struct drm_format_info *format,
770                        int src_xres, int src_yres, int dst_xres, int dst_yres,
771                        u32 vrefresh_hz)
772 {
773         struct dcss_scaler_ch *ch = &scl->ch[ch_num];
774         unsigned int pixel_depth = 0;
775         bool rtr_8line_en = false;
776         bool use_5_taps = false;
777         enum buffer_format src_format = BUF_FMT_ARGB8888_YUV444;
778         enum buffer_format dst_format = BUF_FMT_ARGB8888_YUV444;
779         u32 pix_format = format->format;
780
781         if (format->is_yuv) {
782                 dcss_scaler_yuv_enable(ch, true);
783
784                 if (pix_format == DRM_FORMAT_NV12 ||
785                     pix_format == DRM_FORMAT_NV21) {
786                         rtr_8line_en = true;
787                         src_format = BUF_FMT_YUV420;
788                 } else if (pix_format == DRM_FORMAT_UYVY ||
789                            pix_format == DRM_FORMAT_VYUY ||
790                            pix_format == DRM_FORMAT_YUYV ||
791                            pix_format == DRM_FORMAT_YVYU) {
792                         src_format = BUF_FMT_YUV422;
793                 }
794
795                 use_5_taps = !rtr_8line_en;
796         } else {
797                 dcss_scaler_yuv_enable(ch, false);
798
799                 pixel_depth = format->depth;
800         }
801
802         dcss_scaler_fractions_set(ch, src_xres, src_yres, dst_xres,
803                                   dst_yres, src_format, dst_format,
804                                   PSC_LOC_HORZ_0_VERT_1_OVER_4);
805
806         if (format->is_yuv)
807                 dcss_scaler_yuv_coef_set(ch, src_format, dst_format,
808                                          use_5_taps, src_xres, src_yres,
809                                          dst_xres, dst_yres);
810         else
811                 dcss_scaler_rgb_coef_set(ch, src_xres, src_yres,
812                                          dst_xres, dst_yres);
813
814         dcss_scaler_rtr_8lines_enable(ch, rtr_8line_en);
815         dcss_scaler_bit_depth_set(ch, pixel_depth);
816         dcss_scaler_set_rgb10_order(ch, format);
817         dcss_scaler_format_set(ch, src_format, dst_format);
818         dcss_scaler_res_set(ch, src_xres, src_yres, dst_xres, dst_yres,
819                             pix_format, dst_format);
820 }
821
822 /* This function will be called from interrupt context. */
823 void dcss_scaler_write_sclctrl(struct dcss_scaler *scl)
824 {
825         int chnum;
826
827         dcss_ctxld_assert_locked(scl->ctxld);
828
829         for (chnum = 0; chnum < 3; chnum++) {
830                 struct dcss_scaler_ch *ch = &scl->ch[chnum];
831
832                 if (ch->scaler_ctrl_chgd) {
833                         dcss_ctxld_write_irqsafe(scl->ctxld, scl->ctx_id,
834                                                  ch->scaler_ctrl,
835                                                  ch->base_ofs +
836                                                  DCSS_SCALER_CTRL);
837                         ch->scaler_ctrl_chgd = false;
838                 }
839         }
840 }