Merge tag 'dmaengine-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul...
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / vcn_v3_0.c
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/firmware.h>
25 #include "amdgpu.h"
26 #include "amdgpu_vcn.h"
27 #include "amdgpu_pm.h"
28 #include "amdgpu_cs.h"
29 #include "soc15.h"
30 #include "soc15d.h"
31 #include "vcn_v2_0.h"
32 #include "mmsch_v3_0.h"
33
34 #include "vcn/vcn_3_0_0_offset.h"
35 #include "vcn/vcn_3_0_0_sh_mask.h"
36 #include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
37
38 #include <drm/drm_drv.h>
39
40 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET                        0x27
41 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET                    0x0f
42 #define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET                  0x10
43 #define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET                  0x11
44 #define mmUVD_NO_OP_INTERNAL_OFFSET                             0x29
45 #define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET                       0x66
46 #define mmUVD_SCRATCH9_INTERNAL_OFFSET                          0xc01d
47
48 #define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET                   0x431
49 #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET          0x3b4
50 #define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET         0x3b5
51 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET                       0x25c
52
53 #define VCN_INSTANCES_SIENNA_CICHLID                            2
54 #define DEC_SW_RING_ENABLED                                     FALSE
55
56 #define RDECODE_MSG_CREATE                                      0x00000000
57 #define RDECODE_MESSAGE_CREATE                                  0x00000001
58
59 static int amdgpu_ih_clientid_vcns[] = {
60         SOC15_IH_CLIENTID_VCN,
61         SOC15_IH_CLIENTID_VCN1
62 };
63
64 static int vcn_v3_0_start_sriov(struct amdgpu_device *adev);
65 static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
66 static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev);
67 static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev);
68 static int vcn_v3_0_set_powergating_state(void *handle,
69                         enum amd_powergating_state state);
70 static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
71                         int inst_idx, struct dpg_pause_state *new_state);
72
73 static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
74 static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring);
75
76 /**
77  * vcn_v3_0_early_init - set function pointers
78  *
79  * @handle: amdgpu_device pointer
80  *
81  * Set ring and irq function pointers
82  */
83 static int vcn_v3_0_early_init(void *handle)
84 {
85         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
86
87         if (amdgpu_sriov_vf(adev)) {
88                 adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID;
89                 adev->vcn.harvest_config = 0;
90                 adev->vcn.num_enc_rings = 1;
91
92         } else {
93                 if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
94                                                  AMDGPU_VCN_HARVEST_VCN1))
95                         /* both instances are harvested, disable the block */
96                         return -ENOENT;
97
98                 if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 33))
99                         adev->vcn.num_enc_rings = 0;
100                 else
101                         adev->vcn.num_enc_rings = 2;
102         }
103
104         vcn_v3_0_set_dec_ring_funcs(adev);
105         vcn_v3_0_set_enc_ring_funcs(adev);
106         vcn_v3_0_set_irq_funcs(adev);
107
108         return 0;
109 }
110
111 /**
112  * vcn_v3_0_sw_init - sw init for VCN block
113  *
114  * @handle: amdgpu_device pointer
115  *
116  * Load firmware and sw initialization
117  */
118 static int vcn_v3_0_sw_init(void *handle)
119 {
120         struct amdgpu_ring *ring;
121         int i, j, r;
122         int vcn_doorbell_index = 0;
123         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
124
125         r = amdgpu_vcn_sw_init(adev);
126         if (r)
127                 return r;
128
129         amdgpu_vcn_setup_ucode(adev);
130
131         r = amdgpu_vcn_resume(adev);
132         if (r)
133                 return r;
134
135         /*
136          * Note: doorbell assignment is fixed for SRIOV multiple VCN engines
137          * Formula:
138          *   vcn_db_base  = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
139          *   dec_ring_i   = vcn_db_base + i * (adev->vcn.num_enc_rings + 1)
140          *   enc_ring_i,j = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) + 1 + j
141          */
142         if (amdgpu_sriov_vf(adev)) {
143                 vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1;
144                 /* get DWORD offset */
145                 vcn_doorbell_index = vcn_doorbell_index << 1;
146         }
147
148         for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
149                 volatile struct amdgpu_fw_shared *fw_shared;
150
151                 if (adev->vcn.harvest_config & (1 << i))
152                         continue;
153
154                 adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
155                 adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
156                 adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
157                 adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
158                 adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
159                 adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
160
161                 adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
162                 adev->vcn.inst[i].external.scratch9 = SOC15_REG_OFFSET(VCN, i, mmUVD_SCRATCH9);
163                 adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
164                 adev->vcn.inst[i].external.data0 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA0);
165                 adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
166                 adev->vcn.inst[i].external.data1 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA1);
167                 adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
168                 adev->vcn.inst[i].external.cmd = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_CMD);
169                 adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
170                 adev->vcn.inst[i].external.nop = SOC15_REG_OFFSET(VCN, i, mmUVD_NO_OP);
171
172                 /* VCN DEC TRAP */
173                 r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
174                                 VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[i].irq);
175                 if (r)
176                         return r;
177
178                 atomic_set(&adev->vcn.inst[i].sched_score, 0);
179
180                 ring = &adev->vcn.inst[i].ring_dec;
181                 ring->use_doorbell = true;
182                 if (amdgpu_sriov_vf(adev)) {
183                         ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1);
184                 } else {
185                         ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
186                 }
187                 sprintf(ring->name, "vcn_dec_%d", i);
188                 r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
189                                      AMDGPU_RING_PRIO_DEFAULT,
190                                      &adev->vcn.inst[i].sched_score);
191                 if (r)
192                         return r;
193
194                 for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
195                         enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(j);
196
197                         /* VCN ENC TRAP */
198                         r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
199                                 j + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
200                         if (r)
201                                 return r;
202
203                         ring = &adev->vcn.inst[i].ring_enc[j];
204                         ring->use_doorbell = true;
205                         if (amdgpu_sriov_vf(adev)) {
206                                 ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1 + j;
207                         } else {
208                                 ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
209                         }
210                         sprintf(ring->name, "vcn_enc_%d.%d", i, j);
211                         r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
212                                              hw_prio, &adev->vcn.inst[i].sched_score);
213                         if (r)
214                                 return r;
215                 }
216
217                 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
218                 fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) |
219                                              cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) |
220                                              cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB);
221                 fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED);
222
223                 if (amdgpu_vcnfw_log)
224                         amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
225         }
226
227         if (amdgpu_sriov_vf(adev)) {
228                 r = amdgpu_virt_alloc_mm_table(adev);
229                 if (r)
230                         return r;
231         }
232         if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
233                 adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
234
235         return 0;
236 }
237
238 /**
239  * vcn_v3_0_sw_fini - sw fini for VCN block
240  *
241  * @handle: amdgpu_device pointer
242  *
243  * VCN suspend and free up sw allocation
244  */
245 static int vcn_v3_0_sw_fini(void *handle)
246 {
247         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
248         int i, r, idx;
249
250         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
251                 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
252                         volatile struct amdgpu_fw_shared *fw_shared;
253
254                         if (adev->vcn.harvest_config & (1 << i))
255                                 continue;
256                         fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
257                         fw_shared->present_flag_0 = 0;
258                         fw_shared->sw_ring.is_enabled = false;
259                 }
260
261                 drm_dev_exit(idx);
262         }
263
264         if (amdgpu_sriov_vf(adev))
265                 amdgpu_virt_free_mm_table(adev);
266
267         r = amdgpu_vcn_suspend(adev);
268         if (r)
269                 return r;
270
271         r = amdgpu_vcn_sw_fini(adev);
272
273         return r;
274 }
275
276 /**
277  * vcn_v3_0_hw_init - start and test VCN block
278  *
279  * @handle: amdgpu_device pointer
280  *
281  * Initialize the hardware, boot up the VCPU and do some testing
282  */
283 static int vcn_v3_0_hw_init(void *handle)
284 {
285         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
286         struct amdgpu_ring *ring;
287         int i, j, r;
288
289         if (amdgpu_sriov_vf(adev)) {
290                 r = vcn_v3_0_start_sriov(adev);
291                 if (r)
292                         goto done;
293
294                 /* initialize VCN dec and enc ring buffers */
295                 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
296                         if (adev->vcn.harvest_config & (1 << i))
297                                 continue;
298
299                         ring = &adev->vcn.inst[i].ring_dec;
300                         if (amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, i)) {
301                                 ring->sched.ready = false;
302                                 ring->no_scheduler = true;
303                                 dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
304                         } else {
305                                 ring->wptr = 0;
306                                 ring->wptr_old = 0;
307                                 vcn_v3_0_dec_ring_set_wptr(ring);
308                                 ring->sched.ready = true;
309                         }
310
311                         for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
312                                 ring = &adev->vcn.inst[i].ring_enc[j];
313                                 if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
314                                         ring->sched.ready = false;
315                                         ring->no_scheduler = true;
316                                         dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
317                                 } else {
318                                         ring->wptr = 0;
319                                         ring->wptr_old = 0;
320                                         vcn_v3_0_enc_ring_set_wptr(ring);
321                                         ring->sched.ready = true;
322                                 }
323                         }
324                 }
325         } else {
326                 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
327                         if (adev->vcn.harvest_config & (1 << i))
328                                 continue;
329
330                         ring = &adev->vcn.inst[i].ring_dec;
331
332                         adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
333                                                      ring->doorbell_index, i);
334
335                         r = amdgpu_ring_test_helper(ring);
336                         if (r)
337                                 goto done;
338
339                         for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
340                                 ring = &adev->vcn.inst[i].ring_enc[j];
341                                 r = amdgpu_ring_test_helper(ring);
342                                 if (r)
343                                         goto done;
344                         }
345                 }
346         }
347
348 done:
349         if (!r)
350                 DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
351                         (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
352
353         return r;
354 }
355
356 /**
357  * vcn_v3_0_hw_fini - stop the hardware block
358  *
359  * @handle: amdgpu_device pointer
360  *
361  * Stop the VCN block, mark ring as not ready any more
362  */
363 static int vcn_v3_0_hw_fini(void *handle)
364 {
365         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
366         int i;
367
368         cancel_delayed_work_sync(&adev->vcn.idle_work);
369
370         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
371                 if (adev->vcn.harvest_config & (1 << i))
372                         continue;
373
374                 if (!amdgpu_sriov_vf(adev)) {
375                         if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
376                                         (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
377                                          RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
378                                 vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
379                         }
380                 }
381         }
382
383         return 0;
384 }
385
386 /**
387  * vcn_v3_0_suspend - suspend VCN block
388  *
389  * @handle: amdgpu_device pointer
390  *
391  * HW fini and suspend VCN block
392  */
393 static int vcn_v3_0_suspend(void *handle)
394 {
395         int r;
396         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
397
398         r = vcn_v3_0_hw_fini(adev);
399         if (r)
400                 return r;
401
402         r = amdgpu_vcn_suspend(adev);
403
404         return r;
405 }
406
407 /**
408  * vcn_v3_0_resume - resume VCN block
409  *
410  * @handle: amdgpu_device pointer
411  *
412  * Resume firmware and hw init VCN block
413  */
414 static int vcn_v3_0_resume(void *handle)
415 {
416         int r;
417         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
418
419         r = amdgpu_vcn_resume(adev);
420         if (r)
421                 return r;
422
423         r = vcn_v3_0_hw_init(adev);
424
425         return r;
426 }
427
428 /**
429  * vcn_v3_0_mc_resume - memory controller programming
430  *
431  * @adev: amdgpu_device pointer
432  * @inst: instance number
433  *
434  * Let the VCN memory controller know it's offsets
435  */
436 static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
437 {
438         uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
439         uint32_t offset;
440
441         /* cache window 0: fw */
442         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
443                 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
444                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
445                 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
446                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
447                 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0, 0);
448                 offset = 0;
449         } else {
450                 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
451                         lower_32_bits(adev->vcn.inst[inst].gpu_addr));
452                 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
453                         upper_32_bits(adev->vcn.inst[inst].gpu_addr));
454                 offset = size;
455                 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0,
456                         AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
457         }
458         WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE0, size);
459
460         /* cache window 1: stack */
461         WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
462                 lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
463         WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
464                 upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
465         WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET1, 0);
466         WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
467
468         /* cache window 2: context */
469         WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
470                 lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
471         WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
472                 upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
473         WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET2, 0);
474         WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
475
476         /* non-cache window */
477         WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
478                 lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
479         WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
480                 upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
481         WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
482         WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0,
483                 AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
484 }
485
486 static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
487 {
488         uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
489         uint32_t offset;
490
491         /* cache window 0: fw */
492         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
493                 if (!indirect) {
494                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
495                                 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
496                                 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
497                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
498                                 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
499                                 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
500                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
501                                 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
502                 } else {
503                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
504                                 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
505                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
506                                 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
507                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
508                                 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
509                 }
510                 offset = 0;
511         } else {
512                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
513                         VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
514                         lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
515                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
516                         VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
517                         upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
518                 offset = size;
519                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
520                         VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0),
521                         AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
522         }
523
524         if (!indirect)
525                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
526                         VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
527         else
528                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
529                         VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
530
531         /* cache window 1: stack */
532         if (!indirect) {
533                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
534                         VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
535                         lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
536                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
537                         VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
538                         upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
539                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
540                         VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
541         } else {
542                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
543                         VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
544                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
545                         VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
546                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
547                         VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
548         }
549         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
550                         VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
551
552         /* cache window 2: context */
553         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
554                         VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
555                         lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
556         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
557                         VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
558                         upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
559         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
560                         VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
561         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
562                         VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
563
564         /* non-cache window */
565         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
566                         VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
567                         lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
568         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
569                         VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
570                         upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
571         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
572                         VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
573         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
574                         VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0),
575                         AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
576
577         /* VCN global tiling registers */
578         WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
579                 UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
580 }
581
582 static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
583 {
584         uint32_t data = 0;
585
586         if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
587                 data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
588                         | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
589                         | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
590                         | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
591                         | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
592                         | 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
593                         | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
594                         | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
595                         | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
596                         | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
597                         | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
598                         | 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
599                         | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
600                         | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
601
602                 WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
603                 SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS,
604                         UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 0x3F3FFFFF);
605         } else {
606                 data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
607                         | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
608                         | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
609                         | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
610                         | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
611                         | 1 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
612                         | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
613                         | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
614                         | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
615                         | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
616                         | 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
617                         | 1 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
618                         | 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
619                         | 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
620                 WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
621                 SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, 0,  0x3F3FFFFF);
622         }
623
624         data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS);
625         data &= ~0x103;
626         if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
627                 data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
628                         UVD_POWER_STATUS__UVD_PG_EN_MASK;
629
630         WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
631 }
632
633 static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
634 {
635         uint32_t data;
636
637         if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
638                 /* Before power off, this indicator has to be turned on */
639                 data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS);
640                 data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
641                 data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
642                 WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
643
644                 data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
645                         | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
646                         | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
647                         | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
648                         | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
649                         | 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
650                         | 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
651                         | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
652                         | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
653                         | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
654                         | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
655                         | 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
656                         | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
657                         | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
658                 WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
659
660                 data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
661                         | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
662                         | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
663                         | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
664                         | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
665                         | 2 << UVD_PGFSM_STATUS__UVDIRL_PWR_STATUS__SHIFT
666                         | 2 << UVD_PGFSM_STATUS__UVDLM_PWR_STATUS__SHIFT
667                         | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
668                         | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
669                         | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
670                         | 2 << UVD_PGFSM_STATUS__UVDAB_PWR_STATUS__SHIFT
671                         | 2 << UVD_PGFSM_STATUS__UVDATD_PWR_STATUS__SHIFT
672                         | 2 << UVD_PGFSM_STATUS__UVDNA_PWR_STATUS__SHIFT
673                         | 2 << UVD_PGFSM_STATUS__UVDNB_PWR_STATUS__SHIFT);
674                 SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, data, 0x3F3FFFFF);
675         }
676 }
677
678 /**
679  * vcn_v3_0_disable_clock_gating - disable VCN clock gating
680  *
681  * @adev: amdgpu_device pointer
682  * @inst: instance number
683  *
684  * Disable clock gating for VCN block
685  */
686 static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
687 {
688         uint32_t data;
689
690         /* VCN disable CGC */
691         data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
692         if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
693                 data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
694         else
695                 data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
696         data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
697         data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
698         WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
699
700         data = RREG32_SOC15(VCN, inst, mmUVD_CGC_GATE);
701         data &= ~(UVD_CGC_GATE__SYS_MASK
702                 | UVD_CGC_GATE__UDEC_MASK
703                 | UVD_CGC_GATE__MPEG2_MASK
704                 | UVD_CGC_GATE__REGS_MASK
705                 | UVD_CGC_GATE__RBC_MASK
706                 | UVD_CGC_GATE__LMI_MC_MASK
707                 | UVD_CGC_GATE__LMI_UMC_MASK
708                 | UVD_CGC_GATE__IDCT_MASK
709                 | UVD_CGC_GATE__MPRD_MASK
710                 | UVD_CGC_GATE__MPC_MASK
711                 | UVD_CGC_GATE__LBSI_MASK
712                 | UVD_CGC_GATE__LRBBM_MASK
713                 | UVD_CGC_GATE__UDEC_RE_MASK
714                 | UVD_CGC_GATE__UDEC_CM_MASK
715                 | UVD_CGC_GATE__UDEC_IT_MASK
716                 | UVD_CGC_GATE__UDEC_DB_MASK
717                 | UVD_CGC_GATE__UDEC_MP_MASK
718                 | UVD_CGC_GATE__WCB_MASK
719                 | UVD_CGC_GATE__VCPU_MASK
720                 | UVD_CGC_GATE__MMSCH_MASK);
721
722         WREG32_SOC15(VCN, inst, mmUVD_CGC_GATE, data);
723
724         SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_CGC_GATE, 0,  0xFFFFFFFF);
725
726         data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
727         data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
728                 | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
729                 | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
730                 | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
731                 | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
732                 | UVD_CGC_CTRL__SYS_MODE_MASK
733                 | UVD_CGC_CTRL__UDEC_MODE_MASK
734                 | UVD_CGC_CTRL__MPEG2_MODE_MASK
735                 | UVD_CGC_CTRL__REGS_MODE_MASK
736                 | UVD_CGC_CTRL__RBC_MODE_MASK
737                 | UVD_CGC_CTRL__LMI_MC_MODE_MASK
738                 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
739                 | UVD_CGC_CTRL__IDCT_MODE_MASK
740                 | UVD_CGC_CTRL__MPRD_MODE_MASK
741                 | UVD_CGC_CTRL__MPC_MODE_MASK
742                 | UVD_CGC_CTRL__LBSI_MODE_MASK
743                 | UVD_CGC_CTRL__LRBBM_MODE_MASK
744                 | UVD_CGC_CTRL__WCB_MODE_MASK
745                 | UVD_CGC_CTRL__VCPU_MODE_MASK
746                 | UVD_CGC_CTRL__MMSCH_MODE_MASK);
747         WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
748
749         data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE);
750         data |= (UVD_SUVD_CGC_GATE__SRE_MASK
751                 | UVD_SUVD_CGC_GATE__SIT_MASK
752                 | UVD_SUVD_CGC_GATE__SMP_MASK
753                 | UVD_SUVD_CGC_GATE__SCM_MASK
754                 | UVD_SUVD_CGC_GATE__SDB_MASK
755                 | UVD_SUVD_CGC_GATE__SRE_H264_MASK
756                 | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
757                 | UVD_SUVD_CGC_GATE__SIT_H264_MASK
758                 | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
759                 | UVD_SUVD_CGC_GATE__SCM_H264_MASK
760                 | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
761                 | UVD_SUVD_CGC_GATE__SDB_H264_MASK
762                 | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
763                 | UVD_SUVD_CGC_GATE__SCLR_MASK
764                 | UVD_SUVD_CGC_GATE__ENT_MASK
765                 | UVD_SUVD_CGC_GATE__IME_MASK
766                 | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
767                 | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
768                 | UVD_SUVD_CGC_GATE__SITE_MASK
769                 | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
770                 | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
771                 | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
772                 | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
773                 | UVD_SUVD_CGC_GATE__IME_HEVC_MASK
774                 | UVD_SUVD_CGC_GATE__EFC_MASK
775                 | UVD_SUVD_CGC_GATE__SAOE_MASK
776                 | UVD_SUVD_CGC_GATE__SRE_AV1_MASK
777                 | UVD_SUVD_CGC_GATE__FBC_PCLK_MASK
778                 | UVD_SUVD_CGC_GATE__FBC_CCLK_MASK
779                 | UVD_SUVD_CGC_GATE__SCM_AV1_MASK
780                 | UVD_SUVD_CGC_GATE__SMPA_MASK);
781         WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE, data);
782
783         data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2);
784         data |= (UVD_SUVD_CGC_GATE2__MPBE0_MASK
785                 | UVD_SUVD_CGC_GATE2__MPBE1_MASK
786                 | UVD_SUVD_CGC_GATE2__SIT_AV1_MASK
787                 | UVD_SUVD_CGC_GATE2__SDB_AV1_MASK
788                 | UVD_SUVD_CGC_GATE2__MPC1_MASK);
789         WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2, data);
790
791         data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL);
792         data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
793                 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
794                 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
795                 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
796                 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
797                 | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
798                 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
799                 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
800                 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK
801                 | UVD_SUVD_CGC_CTRL__EFC_MODE_MASK
802                 | UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK
803                 | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK
804                 | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK
805                 | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK
806                 | UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK
807                 | UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK
808                 | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK
809                 | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK
810                 | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK);
811         WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
812 }
813
814 static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
815                 uint8_t sram_sel, int inst_idx, uint8_t indirect)
816 {
817         uint32_t reg_data = 0;
818
819         /* enable sw clock gating control */
820         if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
821                 reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
822         else
823                 reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
824         reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
825         reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
826         reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
827                  UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
828                  UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
829                  UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
830                  UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
831                  UVD_CGC_CTRL__SYS_MODE_MASK |
832                  UVD_CGC_CTRL__UDEC_MODE_MASK |
833                  UVD_CGC_CTRL__MPEG2_MODE_MASK |
834                  UVD_CGC_CTRL__REGS_MODE_MASK |
835                  UVD_CGC_CTRL__RBC_MODE_MASK |
836                  UVD_CGC_CTRL__LMI_MC_MODE_MASK |
837                  UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
838                  UVD_CGC_CTRL__IDCT_MODE_MASK |
839                  UVD_CGC_CTRL__MPRD_MODE_MASK |
840                  UVD_CGC_CTRL__MPC_MODE_MASK |
841                  UVD_CGC_CTRL__LBSI_MODE_MASK |
842                  UVD_CGC_CTRL__LRBBM_MODE_MASK |
843                  UVD_CGC_CTRL__WCB_MODE_MASK |
844                  UVD_CGC_CTRL__VCPU_MODE_MASK |
845                  UVD_CGC_CTRL__MMSCH_MODE_MASK);
846         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
847                 VCN, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
848
849         /* turn off clock gating */
850         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
851                 VCN, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect);
852
853         /* turn on SUVD clock gating */
854         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
855                 VCN, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
856
857         /* turn on sw mode in UVD_SUVD_CGC_CTRL */
858         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
859                 VCN, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
860 }
861
862 /**
863  * vcn_v3_0_enable_clock_gating - enable VCN clock gating
864  *
865  * @adev: amdgpu_device pointer
866  * @inst: instance number
867  *
868  * Enable clock gating for VCN block
869  */
870 static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
871 {
872         uint32_t data;
873
874         /* enable VCN CGC */
875         data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
876         if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
877                 data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
878         else
879                 data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
880         data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
881         data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
882         WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
883
884         data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
885         data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
886                 | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
887                 | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
888                 | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
889                 | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
890                 | UVD_CGC_CTRL__SYS_MODE_MASK
891                 | UVD_CGC_CTRL__UDEC_MODE_MASK
892                 | UVD_CGC_CTRL__MPEG2_MODE_MASK
893                 | UVD_CGC_CTRL__REGS_MODE_MASK
894                 | UVD_CGC_CTRL__RBC_MODE_MASK
895                 | UVD_CGC_CTRL__LMI_MC_MODE_MASK
896                 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
897                 | UVD_CGC_CTRL__IDCT_MODE_MASK
898                 | UVD_CGC_CTRL__MPRD_MODE_MASK
899                 | UVD_CGC_CTRL__MPC_MODE_MASK
900                 | UVD_CGC_CTRL__LBSI_MODE_MASK
901                 | UVD_CGC_CTRL__LRBBM_MODE_MASK
902                 | UVD_CGC_CTRL__WCB_MODE_MASK
903                 | UVD_CGC_CTRL__VCPU_MODE_MASK
904                 | UVD_CGC_CTRL__MMSCH_MODE_MASK);
905         WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
906
907         data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL);
908         data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
909                 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
910                 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
911                 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
912                 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
913                 | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
914                 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
915                 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
916                 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK
917                 | UVD_SUVD_CGC_CTRL__EFC_MODE_MASK
918                 | UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK
919                 | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK
920                 | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK
921                 | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK
922                 | UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK
923                 | UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK
924                 | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK
925                 | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK
926                 | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK);
927         WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
928 }
929
930 static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
931 {
932         volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
933         struct amdgpu_ring *ring;
934         uint32_t rb_bufsz, tmp;
935
936         /* disable register anti-hang mechanism */
937         WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
938                 ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
939         /* enable dynamic power gating mode */
940         tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS);
941         tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
942         tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
943         WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);
944
945         if (indirect)
946                 adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
947
948         /* enable clock gating */
949         vcn_v3_0_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
950
951         /* enable VCPU clock */
952         tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
953         tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
954         tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
955         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
956                 VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
957
958         /* disable master interupt */
959         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
960                 VCN, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect);
961
962         /* setup mmUVD_LMI_CTRL */
963         tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
964                 UVD_LMI_CTRL__REQ_MODE_MASK |
965                 UVD_LMI_CTRL__CRC_RESET_MASK |
966                 UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
967                 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
968                 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
969                 (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
970                 0x00100000L);
971         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
972                 VCN, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect);
973
974         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
975                 VCN, inst_idx, mmUVD_MPC_CNTL),
976                 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
977
978         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
979                 VCN, inst_idx, mmUVD_MPC_SET_MUXA0),
980                 ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
981                  (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
982                  (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
983                  (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
984
985         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
986                 VCN, inst_idx, mmUVD_MPC_SET_MUXB0),
987                  ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
988                  (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
989                  (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
990                  (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
991
992         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
993                 VCN, inst_idx, mmUVD_MPC_SET_MUX),
994                 ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
995                  (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
996                  (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
997
998         vcn_v3_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
999
1000         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1001                 VCN, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
1002         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1003                 VCN, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
1004
1005         /* enable LMI MC and UMC channels */
1006         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1007                 VCN, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect);
1008
1009         /* unblock VCPU register access */
1010         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1011                 VCN, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
1012
1013         tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
1014         tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
1015         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1016                 VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
1017
1018         /* enable master interrupt */
1019         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1020                 VCN, inst_idx, mmUVD_MASTINT_EN),
1021                 UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
1022
1023         /* add nop to workaround PSP size check */
1024         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1025                 VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
1026
1027         if (indirect)
1028                 psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
1029                         (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
1030                                 (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
1031
1032         ring = &adev->vcn.inst[inst_idx].ring_dec;
1033         /* force RBC into idle state */
1034         rb_bufsz = order_base_2(ring->ring_size);
1035         tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
1036         tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1037         tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1038         tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1039         tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1040         WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
1041
1042         /* Stall DPG before WPTR/RPTR reset */
1043         WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1044                 UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
1045                 ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1046         fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
1047
1048         /* set the write pointer delay */
1049         WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
1050
1051         /* set the wb address */
1052         WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
1053                 (upper_32_bits(ring->gpu_addr) >> 2));
1054
1055         /* programm the RB_BASE for ring buffer */
1056         WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1057                 lower_32_bits(ring->gpu_addr));
1058         WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1059                 upper_32_bits(ring->gpu_addr));
1060
1061         /* Initialize the ring buffer's read and write pointers */
1062         WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0);
1063
1064         WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0);
1065
1066         ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR);
1067         WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
1068                 lower_32_bits(ring->wptr));
1069
1070         /* Reset FW shared memory RBC WPTR/RPTR */
1071         fw_shared->rb.rptr = 0;
1072         fw_shared->rb.wptr = lower_32_bits(ring->wptr);
1073
1074         /*resetting done, fw can check RB ring */
1075         fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
1076
1077         /* Unstall DPG */
1078         WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1079                 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1080
1081         return 0;
1082 }
1083
1084 static int vcn_v3_0_start(struct amdgpu_device *adev)
1085 {
1086         volatile struct amdgpu_fw_shared *fw_shared;
1087         struct amdgpu_ring *ring;
1088         uint32_t rb_bufsz, tmp;
1089         int i, j, k, r;
1090
1091         if (adev->pm.dpm_enabled)
1092                 amdgpu_dpm_enable_uvd(adev, true);
1093
1094         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1095                 if (adev->vcn.harvest_config & (1 << i))
1096                         continue;
1097
1098                 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
1099                         r = vcn_v3_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
1100                         continue;
1101                 }
1102
1103                 /* disable VCN power gating */
1104                 vcn_v3_0_disable_static_power_gating(adev, i);
1105
1106                 /* set VCN status busy */
1107                 tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
1108                 WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
1109
1110                 /*SW clock gating */
1111                 vcn_v3_0_disable_clock_gating(adev, i);
1112
1113                 /* enable VCPU clock */
1114                 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1115                         UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
1116
1117                 /* disable master interrupt */
1118                 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
1119                         ~UVD_MASTINT_EN__VCPU_EN_MASK);
1120
1121                 /* enable LMI MC and UMC channels */
1122                 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
1123                         ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1124
1125                 tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
1126                 tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1127                 tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1128                 WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
1129
1130                 /* setup mmUVD_LMI_CTRL */
1131                 tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
1132                 WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp |
1133                         UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
1134                         UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
1135                         UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
1136                         UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
1137
1138                 /* setup mmUVD_MPC_CNTL */
1139                 tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
1140                 tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
1141                 tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
1142                 WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
1143
1144                 /* setup UVD_MPC_SET_MUXA0 */
1145                 WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
1146                         ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
1147                         (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
1148                         (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
1149                         (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
1150
1151                 /* setup UVD_MPC_SET_MUXB0 */
1152                 WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
1153                         ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
1154                         (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
1155                         (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
1156                         (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
1157
1158                 /* setup mmUVD_MPC_SET_MUX */
1159                 WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
1160                         ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
1161                         (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
1162                         (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
1163
1164                 vcn_v3_0_mc_resume(adev, i);
1165
1166                 /* VCN global tiling registers */
1167                 WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG,
1168                         adev->gfx.config.gb_addr_config);
1169
1170                 /* unblock VCPU register access */
1171                 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
1172                         ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1173
1174                 /* release VCPU reset to boot */
1175                 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1176                         ~UVD_VCPU_CNTL__BLK_RST_MASK);
1177
1178                 for (j = 0; j < 10; ++j) {
1179                         uint32_t status;
1180
1181                         for (k = 0; k < 100; ++k) {
1182                                 status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
1183                                 if (status & 2)
1184                                         break;
1185                                 mdelay(10);
1186                         }
1187                         r = 0;
1188                         if (status & 2)
1189                                 break;
1190
1191                         DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
1192                         WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1193                                 UVD_VCPU_CNTL__BLK_RST_MASK,
1194                                 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1195                         mdelay(10);
1196                         WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1197                                 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1198
1199                         mdelay(10);
1200                         r = -1;
1201                 }
1202
1203                 if (r) {
1204                         DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i);
1205                         return r;
1206                 }
1207
1208                 /* enable master interrupt */
1209                 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
1210                         UVD_MASTINT_EN__VCPU_EN_MASK,
1211                         ~UVD_MASTINT_EN__VCPU_EN_MASK);
1212
1213                 /* clear the busy bit of VCN_STATUS */
1214                 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
1215                         ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1216
1217                 WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
1218
1219                 ring = &adev->vcn.inst[i].ring_dec;
1220                 /* force RBC into idle state */
1221                 rb_bufsz = order_base_2(ring->ring_size);
1222                 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
1223                 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1224                 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1225                 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1226                 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1227                 WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
1228
1229                 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1230                 fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
1231
1232                 /* programm the RB_BASE for ring buffer */
1233                 WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1234                         lower_32_bits(ring->gpu_addr));
1235                 WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1236                         upper_32_bits(ring->gpu_addr));
1237
1238                 /* Initialize the ring buffer's read and write pointers */
1239                 WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
1240
1241                 WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0);
1242                 ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
1243                 WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
1244                         lower_32_bits(ring->wptr));
1245                 fw_shared->rb.wptr = lower_32_bits(ring->wptr);
1246                 fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
1247
1248                 if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) {
1249                         fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
1250                         ring = &adev->vcn.inst[i].ring_enc[0];
1251                         WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1252                         WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1253                         WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
1254                         WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1255                         WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
1256                         fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
1257
1258                         fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
1259                         ring = &adev->vcn.inst[i].ring_enc[1];
1260                         WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1261                         WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1262                         WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1263                         WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1264                         WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
1265                         fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
1266                 }
1267         }
1268
1269         return 0;
1270 }
1271
1272 static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
1273 {
1274         int i, j;
1275         struct amdgpu_ring *ring;
1276         uint64_t cache_addr;
1277         uint64_t rb_addr;
1278         uint64_t ctx_addr;
1279         uint32_t param, resp, expected;
1280         uint32_t offset, cache_size;
1281         uint32_t tmp, timeout;
1282
1283         struct amdgpu_mm_table *table = &adev->virt.mm_table;
1284         uint32_t *table_loc;
1285         uint32_t table_size;
1286         uint32_t size, size_dw;
1287
1288         struct mmsch_v3_0_cmd_direct_write
1289                 direct_wt = { {0} };
1290         struct mmsch_v3_0_cmd_direct_read_modify_write
1291                 direct_rd_mod_wt = { {0} };
1292         struct mmsch_v3_0_cmd_end end = { {0} };
1293         struct mmsch_v3_0_init_header header;
1294
1295         direct_wt.cmd_header.command_type =
1296                 MMSCH_COMMAND__DIRECT_REG_WRITE;
1297         direct_rd_mod_wt.cmd_header.command_type =
1298                 MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
1299         end.cmd_header.command_type =
1300                 MMSCH_COMMAND__END;
1301
1302         header.version = MMSCH_VERSION;
1303         header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2;
1304         for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) {
1305                 header.inst[i].init_status = 0;
1306                 header.inst[i].table_offset = 0;
1307                 header.inst[i].table_size = 0;
1308         }
1309
1310         table_loc = (uint32_t *)table->cpu_addr;
1311         table_loc += header.total_size;
1312         for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
1313                 if (adev->vcn.harvest_config & (1 << i))
1314                         continue;
1315
1316                 table_size = 0;
1317
1318                 MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
1319                         mmUVD_STATUS),
1320                         ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
1321
1322                 cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
1323
1324                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1325                         MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1326                                 mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1327                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
1328                         MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1329                                 mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1330                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
1331                         offset = 0;
1332                         MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1333                                 mmUVD_VCPU_CACHE_OFFSET0),
1334                                 0);
1335                 } else {
1336                         MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1337                                 mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1338                                 lower_32_bits(adev->vcn.inst[i].gpu_addr));
1339                         MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1340                                 mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1341                                 upper_32_bits(adev->vcn.inst[i].gpu_addr));
1342                         offset = cache_size;
1343                         MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1344                                 mmUVD_VCPU_CACHE_OFFSET0),
1345                                 AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
1346                 }
1347
1348                 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1349                         mmUVD_VCPU_CACHE_SIZE0),
1350                         cache_size);
1351
1352                 cache_addr = adev->vcn.inst[i].gpu_addr + offset;
1353                 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1354                         mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
1355                         lower_32_bits(cache_addr));
1356                 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1357                         mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
1358                         upper_32_bits(cache_addr));
1359                 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1360                         mmUVD_VCPU_CACHE_OFFSET1),
1361                         0);
1362                 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1363                         mmUVD_VCPU_CACHE_SIZE1),
1364                         AMDGPU_VCN_STACK_SIZE);
1365
1366                 cache_addr = adev->vcn.inst[i].gpu_addr + offset +
1367                         AMDGPU_VCN_STACK_SIZE;
1368                 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1369                         mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
1370                         lower_32_bits(cache_addr));
1371                 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1372                         mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
1373                         upper_32_bits(cache_addr));
1374                 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1375                         mmUVD_VCPU_CACHE_OFFSET2),
1376                         0);
1377                 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1378                         mmUVD_VCPU_CACHE_SIZE2),
1379                         AMDGPU_VCN_CONTEXT_SIZE);
1380
1381                 for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
1382                         ring = &adev->vcn.inst[i].ring_enc[j];
1383                         ring->wptr = 0;
1384                         rb_addr = ring->gpu_addr;
1385                         MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1386                                 mmUVD_RB_BASE_LO),
1387                                 lower_32_bits(rb_addr));
1388                         MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1389                                 mmUVD_RB_BASE_HI),
1390                                 upper_32_bits(rb_addr));
1391                         MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1392                                 mmUVD_RB_SIZE),
1393                                 ring->ring_size / 4);
1394                 }
1395
1396                 ring = &adev->vcn.inst[i].ring_dec;
1397                 ring->wptr = 0;
1398                 rb_addr = ring->gpu_addr;
1399                 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1400                         mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
1401                         lower_32_bits(rb_addr));
1402                 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1403                         mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
1404                         upper_32_bits(rb_addr));
1405                 /* force RBC into idle state */
1406                 tmp = order_base_2(ring->ring_size);
1407                 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, tmp);
1408                 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1409                 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1410                 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1411                 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1412                 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1413                         mmUVD_RBC_RB_CNTL),
1414                         tmp);
1415
1416                 /* add end packet */
1417                 MMSCH_V3_0_INSERT_END();
1418
1419                 /* refine header */
1420                 header.inst[i].init_status = 0;
1421                 header.inst[i].table_offset = header.total_size;
1422                 header.inst[i].table_size = table_size;
1423                 header.total_size += table_size;
1424         }
1425
1426         /* Update init table header in memory */
1427         size = sizeof(struct mmsch_v3_0_init_header);
1428         table_loc = (uint32_t *)table->cpu_addr;
1429         memcpy((void *)table_loc, &header, size);
1430
1431         /* message MMSCH (in VCN[0]) to initialize this client
1432          * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
1433          * of memory descriptor location
1434          */
1435         ctx_addr = table->gpu_addr;
1436         WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
1437         WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
1438
1439         /* 2, update vmid of descriptor */
1440         tmp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID);
1441         tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
1442         /* use domain0 for MM scheduler */
1443         tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
1444         WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, tmp);
1445
1446         /* 3, notify mmsch about the size of this descriptor */
1447         size = header.total_size;
1448         WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size);
1449
1450         /* 4, set resp to zero */
1451         WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
1452
1453         /* 5, kick off the initialization and wait until
1454          * MMSCH_VF_MAILBOX_RESP becomes non-zero
1455          */
1456         param = 0x10000001;
1457         WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, param);
1458         tmp = 0;
1459         timeout = 1000;
1460         resp = 0;
1461         expected = param + 1;
1462         while (resp != expected) {
1463                 resp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
1464                 if (resp == expected)
1465                         break;
1466
1467                 udelay(10);
1468                 tmp = tmp + 10;
1469                 if (tmp >= timeout) {
1470                         DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
1471                                 " waiting for mmMMSCH_VF_MAILBOX_RESP "\
1472                                 "(expected=0x%08x, readback=0x%08x)\n",
1473                                 tmp, expected, resp);
1474                         return -EBUSY;
1475                 }
1476         }
1477
1478         return 0;
1479 }
1480
1481 static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
1482 {
1483         uint32_t tmp;
1484
1485         /* Wait for power status to be 1 */
1486         SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
1487                 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1488
1489         /* wait for read ptr to be equal to write ptr */
1490         tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
1491         SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
1492
1493         tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
1494         SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
1495
1496         tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
1497         SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
1498
1499         SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
1500                 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1501
1502         /* disable dynamic power gating mode */
1503         WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
1504                 ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
1505
1506         return 0;
1507 }
1508
1509 static int vcn_v3_0_stop(struct amdgpu_device *adev)
1510 {
1511         uint32_t tmp;
1512         int i, r = 0;
1513
1514         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1515                 if (adev->vcn.harvest_config & (1 << i))
1516                         continue;
1517
1518                 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1519                         r = vcn_v3_0_stop_dpg_mode(adev, i);
1520                         continue;
1521                 }
1522
1523                 /* wait for vcn idle */
1524                 r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
1525                 if (r)
1526                         return r;
1527
1528                 tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
1529                         UVD_LMI_STATUS__READ_CLEAN_MASK |
1530                         UVD_LMI_STATUS__WRITE_CLEAN_MASK |
1531                         UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
1532                 r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
1533                 if (r)
1534                         return r;
1535
1536                 /* disable LMI UMC channel */
1537                 tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
1538                 tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
1539                 WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
1540                 tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
1541                         UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
1542                 r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
1543                 if (r)
1544                         return r;
1545
1546                 /* block VCPU register access */
1547                 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
1548                         UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
1549                         ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1550
1551                 /* reset VCPU */
1552                 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1553                         UVD_VCPU_CNTL__BLK_RST_MASK,
1554                         ~UVD_VCPU_CNTL__BLK_RST_MASK);
1555
1556                 /* disable VCPU clock */
1557                 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1558                         ~(UVD_VCPU_CNTL__CLK_EN_MASK));
1559
1560                 /* apply soft reset */
1561                 tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
1562                 tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1563                 WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
1564                 tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
1565                 tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1566                 WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
1567
1568                 /* clear status */
1569                 WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
1570
1571                 /* apply HW clock gating */
1572                 vcn_v3_0_enable_clock_gating(adev, i);
1573
1574                 /* enable VCN power gating */
1575                 vcn_v3_0_enable_static_power_gating(adev, i);
1576         }
1577
1578         if (adev->pm.dpm_enabled)
1579                 amdgpu_dpm_enable_uvd(adev, false);
1580
1581         return 0;
1582 }
1583
1584 static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
1585                    int inst_idx, struct dpg_pause_state *new_state)
1586 {
1587         volatile struct amdgpu_fw_shared *fw_shared;
1588         struct amdgpu_ring *ring;
1589         uint32_t reg_data = 0;
1590         int ret_code;
1591
1592         /* pause/unpause if state is changed */
1593         if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
1594                 DRM_DEBUG("dpg pause state changed %d -> %d",
1595                         adev->vcn.inst[inst_idx].pause_state.fw_based,  new_state->fw_based);
1596                 reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) &
1597                         (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1598
1599                 if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
1600                         ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
1601                                 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1602
1603                         if (!ret_code) {
1604                                 /* pause DPG */
1605                                 reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1606                                 WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
1607
1608                                 /* wait for ACK */
1609                                 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
1610                                         UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
1611                                         UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1612
1613                                 /* Stall DPG before WPTR/RPTR reset */
1614                                 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1615                                         UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
1616                                         ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1617
1618                                 if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) {
1619                                         /* Restore */
1620                                         fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
1621                                         fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
1622                                         ring = &adev->vcn.inst[inst_idx].ring_enc[0];
1623                                         ring->wptr = 0;
1624                                         WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
1625                                         WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1626                                         WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
1627                                         WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1628                                         WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1629                                         fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
1630
1631                                         fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
1632                                         ring = &adev->vcn.inst[inst_idx].ring_enc[1];
1633                                         ring->wptr = 0;
1634                                         WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1635                                         WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1636                                         WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
1637                                         WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1638                                         WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1639                                         fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
1640
1641                                         /* restore wptr/rptr with pointers saved in FW shared memory*/
1642                                         WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, fw_shared->rb.rptr);
1643                                         WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, fw_shared->rb.wptr);
1644                                 }
1645
1646                                 /* Unstall DPG */
1647                                 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1648                                         0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1649
1650                                 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
1651                                         UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1652                         }
1653                 } else {
1654                         /* unpause dpg, no need to wait */
1655                         reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1656                         WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
1657                 }
1658                 adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
1659         }
1660
1661         return 0;
1662 }
1663
1664 /**
1665  * vcn_v3_0_dec_ring_get_rptr - get read pointer
1666  *
1667  * @ring: amdgpu_ring pointer
1668  *
1669  * Returns the current hardware read pointer
1670  */
1671 static uint64_t vcn_v3_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
1672 {
1673         struct amdgpu_device *adev = ring->adev;
1674
1675         return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR);
1676 }
1677
1678 /**
1679  * vcn_v3_0_dec_ring_get_wptr - get write pointer
1680  *
1681  * @ring: amdgpu_ring pointer
1682  *
1683  * Returns the current hardware write pointer
1684  */
1685 static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
1686 {
1687         struct amdgpu_device *adev = ring->adev;
1688
1689         if (ring->use_doorbell)
1690                 return adev->wb.wb[ring->wptr_offs];
1691         else
1692                 return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
1693 }
1694
1695 /**
1696  * vcn_v3_0_dec_ring_set_wptr - set write pointer
1697  *
1698  * @ring: amdgpu_ring pointer
1699  *
1700  * Commits the write pointer to the hardware
1701  */
1702 static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
1703 {
1704         struct amdgpu_device *adev = ring->adev;
1705         volatile struct amdgpu_fw_shared *fw_shared;
1706
1707         if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1708                 /*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */
1709                 fw_shared = adev->vcn.inst[ring->me].fw_shared.cpu_addr;
1710                 fw_shared->rb.wptr = lower_32_bits(ring->wptr);
1711                 WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2,
1712                         lower_32_bits(ring->wptr));
1713         }
1714
1715         if (ring->use_doorbell) {
1716                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
1717                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1718         } else {
1719                 WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
1720         }
1721 }
1722
1723 static void vcn_v3_0_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
1724                                 u64 seq, uint32_t flags)
1725 {
1726         WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1727
1728         amdgpu_ring_write(ring, VCN_DEC_SW_CMD_FENCE);
1729         amdgpu_ring_write(ring, addr);
1730         amdgpu_ring_write(ring, upper_32_bits(addr));
1731         amdgpu_ring_write(ring, seq);
1732         amdgpu_ring_write(ring, VCN_DEC_SW_CMD_TRAP);
1733 }
1734
1735 static void vcn_v3_0_dec_sw_ring_insert_end(struct amdgpu_ring *ring)
1736 {
1737         amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END);
1738 }
1739
1740 static void vcn_v3_0_dec_sw_ring_emit_ib(struct amdgpu_ring *ring,
1741                                struct amdgpu_job *job,
1742                                struct amdgpu_ib *ib,
1743                                uint32_t flags)
1744 {
1745         uint32_t vmid = AMDGPU_JOB_GET_VMID(job);
1746
1747         amdgpu_ring_write(ring, VCN_DEC_SW_CMD_IB);
1748         amdgpu_ring_write(ring, vmid);
1749         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1750         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1751         amdgpu_ring_write(ring, ib->length_dw);
1752 }
1753
1754 static void vcn_v3_0_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1755                                 uint32_t val, uint32_t mask)
1756 {
1757         amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WAIT);
1758         amdgpu_ring_write(ring, reg << 2);
1759         amdgpu_ring_write(ring, mask);
1760         amdgpu_ring_write(ring, val);
1761 }
1762
1763 static void vcn_v3_0_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring,
1764                                 uint32_t vmid, uint64_t pd_addr)
1765 {
1766         struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1767         uint32_t data0, data1, mask;
1768
1769         pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1770
1771         /* wait for register write */
1772         data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
1773         data1 = lower_32_bits(pd_addr);
1774         mask = 0xffffffff;
1775         vcn_v3_0_dec_sw_ring_emit_reg_wait(ring, data0, data1, mask);
1776 }
1777
1778 static void vcn_v3_0_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
1779 {
1780         amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WRITE);
1781         amdgpu_ring_write(ring, reg << 2);
1782         amdgpu_ring_write(ring, val);
1783 }
1784
1785 static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = {
1786         .type = AMDGPU_RING_TYPE_VCN_DEC,
1787         .align_mask = 0x3f,
1788         .nop = VCN_DEC_SW_CMD_NO_OP,
1789         .secure_submission_supported = true,
1790         .vmhub = AMDGPU_MMHUB_0,
1791         .get_rptr = vcn_v3_0_dec_ring_get_rptr,
1792         .get_wptr = vcn_v3_0_dec_ring_get_wptr,
1793         .set_wptr = vcn_v3_0_dec_ring_set_wptr,
1794         .emit_frame_size =
1795                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1796                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1797                 4 + /* vcn_v3_0_dec_sw_ring_emit_vm_flush */
1798                 5 + 5 + /* vcn_v3_0_dec_sw_ring_emit_fdec_swe x2 vm fdec_swe */
1799                 1, /* vcn_v3_0_dec_sw_ring_insert_end */
1800         .emit_ib_size = 5, /* vcn_v3_0_dec_sw_ring_emit_ib */
1801         .emit_ib = vcn_v3_0_dec_sw_ring_emit_ib,
1802         .emit_fence = vcn_v3_0_dec_sw_ring_emit_fence,
1803         .emit_vm_flush = vcn_v3_0_dec_sw_ring_emit_vm_flush,
1804         .test_ring = amdgpu_vcn_dec_sw_ring_test_ring,
1805         .test_ib = NULL,//amdgpu_vcn_dec_sw_ring_test_ib,
1806         .insert_nop = amdgpu_ring_insert_nop,
1807         .insert_end = vcn_v3_0_dec_sw_ring_insert_end,
1808         .pad_ib = amdgpu_ring_generic_pad_ib,
1809         .begin_use = amdgpu_vcn_ring_begin_use,
1810         .end_use = amdgpu_vcn_ring_end_use,
1811         .emit_wreg = vcn_v3_0_dec_sw_ring_emit_wreg,
1812         .emit_reg_wait = vcn_v3_0_dec_sw_ring_emit_reg_wait,
1813         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1814 };
1815
1816 static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,
1817                                 struct amdgpu_job *job)
1818 {
1819         struct drm_gpu_scheduler **scheds;
1820
1821         /* The create msg must be in the first IB submitted */
1822         if (atomic_read(&job->base.entity->fence_seq))
1823                 return -EINVAL;
1824
1825         scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
1826                 [AMDGPU_RING_PRIO_DEFAULT].sched;
1827         drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
1828         return 0;
1829 }
1830
1831 static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
1832                             uint64_t addr)
1833 {
1834         struct ttm_operation_ctx ctx = { false, false };
1835         struct amdgpu_bo_va_mapping *map;
1836         uint32_t *msg, num_buffers;
1837         struct amdgpu_bo *bo;
1838         uint64_t start, end;
1839         unsigned int i;
1840         void * ptr;
1841         int r;
1842
1843         addr &= AMDGPU_GMC_HOLE_MASK;
1844         r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
1845         if (r) {
1846                 DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
1847                 return r;
1848         }
1849
1850         start = map->start * AMDGPU_GPU_PAGE_SIZE;
1851         end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
1852         if (addr & 0x7) {
1853                 DRM_ERROR("VCN messages must be 8 byte aligned!\n");
1854                 return -EINVAL;
1855         }
1856
1857         bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
1858         amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
1859         r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1860         if (r) {
1861                 DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
1862                 return r;
1863         }
1864
1865         r = amdgpu_bo_kmap(bo, &ptr);
1866         if (r) {
1867                 DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
1868                 return r;
1869         }
1870
1871         msg = ptr + addr - start;
1872
1873         /* Check length */
1874         if (msg[1] > end - addr) {
1875                 r = -EINVAL;
1876                 goto out;
1877         }
1878
1879         if (msg[3] != RDECODE_MSG_CREATE)
1880                 goto out;
1881
1882         num_buffers = msg[2];
1883         for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
1884                 uint32_t offset, size, *create;
1885
1886                 if (msg[0] != RDECODE_MESSAGE_CREATE)
1887                         continue;
1888
1889                 offset = msg[1];
1890                 size = msg[2];
1891
1892                 if (offset + size > end) {
1893                         r = -EINVAL;
1894                         goto out;
1895                 }
1896
1897                 create = ptr + addr + offset - start;
1898
1899                 /* H246, HEVC and VP9 can run on any instance */
1900                 if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
1901                         continue;
1902
1903                 r = vcn_v3_0_limit_sched(p, job);
1904                 if (r)
1905                         goto out;
1906         }
1907
1908 out:
1909         amdgpu_bo_kunmap(bo);
1910         return r;
1911 }
1912
1913 static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
1914                                            struct amdgpu_job *job,
1915                                            struct amdgpu_ib *ib)
1916 {
1917         struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
1918         uint32_t msg_lo = 0, msg_hi = 0;
1919         unsigned i;
1920         int r;
1921
1922         /* The first instance can decode anything */
1923         if (!ring->me)
1924                 return 0;
1925
1926         for (i = 0; i < ib->length_dw; i += 2) {
1927                 uint32_t reg = amdgpu_ib_get_value(ib, i);
1928                 uint32_t val = amdgpu_ib_get_value(ib, i + 1);
1929
1930                 if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
1931                         msg_lo = val;
1932                 } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {
1933                         msg_hi = val;
1934                 } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
1935                            val == 0) {
1936                         r = vcn_v3_0_dec_msg(p, job,
1937                                              ((u64)msg_hi) << 32 | msg_lo);
1938                         if (r)
1939                                 return r;
1940                 }
1941         }
1942         return 0;
1943 }
1944
1945 static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
1946         .type = AMDGPU_RING_TYPE_VCN_DEC,
1947         .align_mask = 0xf,
1948         .secure_submission_supported = true,
1949         .vmhub = AMDGPU_MMHUB_0,
1950         .get_rptr = vcn_v3_0_dec_ring_get_rptr,
1951         .get_wptr = vcn_v3_0_dec_ring_get_wptr,
1952         .set_wptr = vcn_v3_0_dec_ring_set_wptr,
1953         .patch_cs_in_place = vcn_v3_0_ring_patch_cs_in_place,
1954         .emit_frame_size =
1955                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
1956                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
1957                 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
1958                 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
1959                 6,
1960         .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
1961         .emit_ib = vcn_v2_0_dec_ring_emit_ib,
1962         .emit_fence = vcn_v2_0_dec_ring_emit_fence,
1963         .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
1964         .test_ring = vcn_v2_0_dec_ring_test_ring,
1965         .test_ib = amdgpu_vcn_dec_ring_test_ib,
1966         .insert_nop = vcn_v2_0_dec_ring_insert_nop,
1967         .insert_start = vcn_v2_0_dec_ring_insert_start,
1968         .insert_end = vcn_v2_0_dec_ring_insert_end,
1969         .pad_ib = amdgpu_ring_generic_pad_ib,
1970         .begin_use = amdgpu_vcn_ring_begin_use,
1971         .end_use = amdgpu_vcn_ring_end_use,
1972         .emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
1973         .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
1974         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1975 };
1976
1977 /**
1978  * vcn_v3_0_enc_ring_get_rptr - get enc read pointer
1979  *
1980  * @ring: amdgpu_ring pointer
1981  *
1982  * Returns the current hardware enc read pointer
1983  */
1984 static uint64_t vcn_v3_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
1985 {
1986         struct amdgpu_device *adev = ring->adev;
1987
1988         if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
1989                 return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR);
1990         else
1991                 return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2);
1992 }
1993
1994 /**
1995  * vcn_v3_0_enc_ring_get_wptr - get enc write pointer
1996  *
1997  * @ring: amdgpu_ring pointer
1998  *
1999  * Returns the current hardware enc write pointer
2000  */
2001 static uint64_t vcn_v3_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
2002 {
2003         struct amdgpu_device *adev = ring->adev;
2004
2005         if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
2006                 if (ring->use_doorbell)
2007                         return adev->wb.wb[ring->wptr_offs];
2008                 else
2009                         return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
2010         } else {
2011                 if (ring->use_doorbell)
2012                         return adev->wb.wb[ring->wptr_offs];
2013                 else
2014                         return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
2015         }
2016 }
2017
2018 /**
2019  * vcn_v3_0_enc_ring_set_wptr - set enc write pointer
2020  *
2021  * @ring: amdgpu_ring pointer
2022  *
2023  * Commits the enc write pointer to the hardware
2024  */
2025 static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
2026 {
2027         struct amdgpu_device *adev = ring->adev;
2028
2029         if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
2030                 if (ring->use_doorbell) {
2031                         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
2032                         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
2033                 } else {
2034                         WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
2035                 }
2036         } else {
2037                 if (ring->use_doorbell) {
2038                         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
2039                         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
2040                 } else {
2041                         WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
2042                 }
2043         }
2044 }
2045
2046 static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = {
2047         .type = AMDGPU_RING_TYPE_VCN_ENC,
2048         .align_mask = 0x3f,
2049         .nop = VCN_ENC_CMD_NO_OP,
2050         .vmhub = AMDGPU_MMHUB_0,
2051         .get_rptr = vcn_v3_0_enc_ring_get_rptr,
2052         .get_wptr = vcn_v3_0_enc_ring_get_wptr,
2053         .set_wptr = vcn_v3_0_enc_ring_set_wptr,
2054         .emit_frame_size =
2055                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2056                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
2057                 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
2058                 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
2059                 1, /* vcn_v2_0_enc_ring_insert_end */
2060         .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
2061         .emit_ib = vcn_v2_0_enc_ring_emit_ib,
2062         .emit_fence = vcn_v2_0_enc_ring_emit_fence,
2063         .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
2064         .test_ring = amdgpu_vcn_enc_ring_test_ring,
2065         .test_ib = amdgpu_vcn_enc_ring_test_ib,
2066         .insert_nop = amdgpu_ring_insert_nop,
2067         .insert_end = vcn_v2_0_enc_ring_insert_end,
2068         .pad_ib = amdgpu_ring_generic_pad_ib,
2069         .begin_use = amdgpu_vcn_ring_begin_use,
2070         .end_use = amdgpu_vcn_ring_end_use,
2071         .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
2072         .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
2073         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2074 };
2075
2076 static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
2077 {
2078         int i;
2079
2080         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2081                 if (adev->vcn.harvest_config & (1 << i))
2082                         continue;
2083
2084                 if (!DEC_SW_RING_ENABLED)
2085                         adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs;
2086                 else
2087                         adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_sw_ring_vm_funcs;
2088                 adev->vcn.inst[i].ring_dec.me = i;
2089                 DRM_INFO("VCN(%d) decode%s is enabled in VM mode\n", i,
2090                           DEC_SW_RING_ENABLED?"(Software Ring)":"");
2091         }
2092 }
2093
2094 static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev)
2095 {
2096         int i, j;
2097
2098         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2099                 if (adev->vcn.harvest_config & (1 << i))
2100                         continue;
2101
2102                 for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
2103                         adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs;
2104                         adev->vcn.inst[i].ring_enc[j].me = i;
2105                 }
2106                 if (adev->vcn.num_enc_rings > 0)
2107                         DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i);
2108         }
2109 }
2110
2111 static bool vcn_v3_0_is_idle(void *handle)
2112 {
2113         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2114         int i, ret = 1;
2115
2116         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2117                 if (adev->vcn.harvest_config & (1 << i))
2118                         continue;
2119
2120                 ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
2121         }
2122
2123         return ret;
2124 }
2125
2126 static int vcn_v3_0_wait_for_idle(void *handle)
2127 {
2128         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2129         int i, ret = 0;
2130
2131         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2132                 if (adev->vcn.harvest_config & (1 << i))
2133                         continue;
2134
2135                 ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
2136                         UVD_STATUS__IDLE);
2137                 if (ret)
2138                         return ret;
2139         }
2140
2141         return ret;
2142 }
2143
2144 static int vcn_v3_0_set_clockgating_state(void *handle,
2145                                           enum amd_clockgating_state state)
2146 {
2147         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2148         bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
2149         int i;
2150
2151         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2152                 if (adev->vcn.harvest_config & (1 << i))
2153                         continue;
2154
2155                 if (enable) {
2156                         if (RREG32_SOC15(VCN, i, mmUVD_STATUS) != UVD_STATUS__IDLE)
2157                                 return -EBUSY;
2158                         vcn_v3_0_enable_clock_gating(adev, i);
2159                 } else {
2160                         vcn_v3_0_disable_clock_gating(adev, i);
2161                 }
2162         }
2163
2164         return 0;
2165 }
2166
2167 static int vcn_v3_0_set_powergating_state(void *handle,
2168                                           enum amd_powergating_state state)
2169 {
2170         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2171         int ret;
2172
2173         /* for SRIOV, guest should not control VCN Power-gating
2174          * MMSCH FW should control Power-gating and clock-gating
2175          * guest should avoid touching CGC and PG
2176          */
2177         if (amdgpu_sriov_vf(adev)) {
2178                 adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
2179                 return 0;
2180         }
2181
2182         if(state == adev->vcn.cur_state)
2183                 return 0;
2184
2185         if (state == AMD_PG_STATE_GATE)
2186                 ret = vcn_v3_0_stop(adev);
2187         else
2188                 ret = vcn_v3_0_start(adev);
2189
2190         if(!ret)
2191                 adev->vcn.cur_state = state;
2192
2193         return ret;
2194 }
2195
2196 static int vcn_v3_0_set_interrupt_state(struct amdgpu_device *adev,
2197                                         struct amdgpu_irq_src *source,
2198                                         unsigned type,
2199                                         enum amdgpu_interrupt_state state)
2200 {
2201         return 0;
2202 }
2203
2204 static int vcn_v3_0_process_interrupt(struct amdgpu_device *adev,
2205                                       struct amdgpu_irq_src *source,
2206                                       struct amdgpu_iv_entry *entry)
2207 {
2208         uint32_t ip_instance;
2209
2210         switch (entry->client_id) {
2211         case SOC15_IH_CLIENTID_VCN:
2212                 ip_instance = 0;
2213                 break;
2214         case SOC15_IH_CLIENTID_VCN1:
2215                 ip_instance = 1;
2216                 break;
2217         default:
2218                 DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
2219                 return 0;
2220         }
2221
2222         DRM_DEBUG("IH: VCN TRAP\n");
2223
2224         switch (entry->src_id) {
2225         case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
2226                 amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
2227                 break;
2228         case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
2229                 amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
2230                 break;
2231         case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
2232                 amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
2233                 break;
2234         default:
2235                 DRM_ERROR("Unhandled interrupt: %d %d\n",
2236                           entry->src_id, entry->src_data[0]);
2237                 break;
2238         }
2239
2240         return 0;
2241 }
2242
2243 static const struct amdgpu_irq_src_funcs vcn_v3_0_irq_funcs = {
2244         .set = vcn_v3_0_set_interrupt_state,
2245         .process = vcn_v3_0_process_interrupt,
2246 };
2247
2248 static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev)
2249 {
2250         int i;
2251
2252         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2253                 if (adev->vcn.harvest_config & (1 << i))
2254                         continue;
2255
2256                 adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
2257                 adev->vcn.inst[i].irq.funcs = &vcn_v3_0_irq_funcs;
2258         }
2259 }
2260
2261 static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
2262         .name = "vcn_v3_0",
2263         .early_init = vcn_v3_0_early_init,
2264         .late_init = NULL,
2265         .sw_init = vcn_v3_0_sw_init,
2266         .sw_fini = vcn_v3_0_sw_fini,
2267         .hw_init = vcn_v3_0_hw_init,
2268         .hw_fini = vcn_v3_0_hw_fini,
2269         .suspend = vcn_v3_0_suspend,
2270         .resume = vcn_v3_0_resume,
2271         .is_idle = vcn_v3_0_is_idle,
2272         .wait_for_idle = vcn_v3_0_wait_for_idle,
2273         .check_soft_reset = NULL,
2274         .pre_soft_reset = NULL,
2275         .soft_reset = NULL,
2276         .post_soft_reset = NULL,
2277         .set_clockgating_state = vcn_v3_0_set_clockgating_state,
2278         .set_powergating_state = vcn_v3_0_set_powergating_state,
2279 };
2280
2281 const struct amdgpu_ip_block_version vcn_v3_0_ip_block =
2282 {
2283         .type = AMD_IP_BLOCK_TYPE_VCN,
2284         .major = 3,
2285         .minor = 0,
2286         .rev = 0,
2287         .funcs = &vcn_v3_0_ip_funcs,
2288 };