2 * Copyright 2016 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
27 #include <linux/firmware.h>
28 #include <linux/module.h>
29 #include <linux/pci.h>
32 #include "amdgpu_pm.h"
33 #include "amdgpu_vcn.h"
37 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
38 #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
39 #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
40 #define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin"
41 #define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin"
42 #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin"
43 #define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin"
44 #define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin"
46 MODULE_FIRMWARE(FIRMWARE_RAVEN);
47 MODULE_FIRMWARE(FIRMWARE_PICASSO);
48 MODULE_FIRMWARE(FIRMWARE_RAVEN2);
49 MODULE_FIRMWARE(FIRMWARE_ARCTURUS);
50 MODULE_FIRMWARE(FIRMWARE_RENOIR);
51 MODULE_FIRMWARE(FIRMWARE_NAVI10);
52 MODULE_FIRMWARE(FIRMWARE_NAVI14);
53 MODULE_FIRMWARE(FIRMWARE_NAVI12);
55 static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
57 int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
59 unsigned long bo_size;
61 const struct common_firmware_header *hdr;
62 unsigned char fw_check;
65 INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
67 switch (adev->asic_type) {
69 if (adev->rev_id >= 8)
70 fw_name = FIRMWARE_RAVEN2;
71 else if (adev->pdev->device == 0x15d8)
72 fw_name = FIRMWARE_PICASSO;
74 fw_name = FIRMWARE_RAVEN;
77 fw_name = FIRMWARE_ARCTURUS;
80 fw_name = FIRMWARE_RENOIR;
81 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
82 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
83 adev->vcn.indirect_sram = true;
86 fw_name = FIRMWARE_NAVI10;
87 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
88 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
89 adev->vcn.indirect_sram = true;
92 fw_name = FIRMWARE_NAVI14;
93 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
94 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
95 adev->vcn.indirect_sram = true;
98 fw_name = FIRMWARE_NAVI12;
99 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
100 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
101 adev->vcn.indirect_sram = true;
107 r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
109 dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
114 r = amdgpu_ucode_validate(adev->vcn.fw);
116 dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
118 release_firmware(adev->vcn.fw);
123 hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
124 adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
126 /* Bit 20-23, it is encode major and non-zero for new naming convention.
127 * This field is part of version minor and DRM_DISABLED_FLAG in old naming
128 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG
129 * is zero in old naming convention, this field is always zero so far.
130 * These four bits are used to tell which naming convention is present.
132 fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf;
134 unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev;
136 fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff;
137 enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff;
138 enc_major = fw_check;
139 dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
140 vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
141 DRM_INFO("Found VCN firmware Version ENC: %hu.%hu DEC: %hu VEP: %hu Revision: %hu\n",
142 enc_major, enc_minor, dec_ver, vep, fw_rev);
144 unsigned int version_major, version_minor, family_id;
146 family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
147 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
148 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
149 DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
150 version_major, version_minor, family_id);
153 bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
154 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
155 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
157 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
158 if (adev->vcn.harvest_config & (1 << i))
161 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
162 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo,
163 &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr);
165 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
170 if (adev->vcn.indirect_sram) {
171 r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
172 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo,
173 &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr);
175 dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r);
183 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
187 cancel_delayed_work_sync(&adev->vcn.idle_work);
189 if (adev->vcn.indirect_sram) {
190 amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo,
191 &adev->vcn.dpg_sram_gpu_addr,
192 (void **)&adev->vcn.dpg_sram_cpu_addr);
195 for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
196 if (adev->vcn.harvest_config & (1 << j))
198 kvfree(adev->vcn.inst[j].saved_bo);
200 amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
201 &adev->vcn.inst[j].gpu_addr,
202 (void **)&adev->vcn.inst[j].cpu_addr);
204 amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
206 for (i = 0; i < adev->vcn.num_enc_rings; ++i)
207 amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
210 release_firmware(adev->vcn.fw);
215 int amdgpu_vcn_suspend(struct amdgpu_device *adev)
221 cancel_delayed_work_sync(&adev->vcn.idle_work);
223 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
224 if (adev->vcn.harvest_config & (1 << i))
226 if (adev->vcn.inst[i].vcpu_bo == NULL)
229 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
230 ptr = adev->vcn.inst[i].cpu_addr;
232 adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
233 if (!adev->vcn.inst[i].saved_bo)
236 memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
241 int amdgpu_vcn_resume(struct amdgpu_device *adev)
247 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
248 if (adev->vcn.harvest_config & (1 << i))
250 if (adev->vcn.inst[i].vcpu_bo == NULL)
253 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
254 ptr = adev->vcn.inst[i].cpu_addr;
256 if (adev->vcn.inst[i].saved_bo != NULL) {
257 memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
258 kvfree(adev->vcn.inst[i].saved_bo);
259 adev->vcn.inst[i].saved_bo = NULL;
261 const struct common_firmware_header *hdr;
264 hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
265 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
266 offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
267 memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
268 le32_to_cpu(hdr->ucode_size_bytes));
269 size -= le32_to_cpu(hdr->ucode_size_bytes);
270 ptr += le32_to_cpu(hdr->ucode_size_bytes);
272 memset_io(ptr, 0, size);
278 static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
280 struct amdgpu_device *adev =
281 container_of(work, struct amdgpu_device, vcn.idle_work.work);
282 unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
285 for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
286 if (adev->vcn.harvest_config & (1 << j))
289 for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
290 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
293 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
294 struct dpg_pause_state new_state;
297 new_state.fw_based = VCN_DPG_STATE__PAUSE;
299 new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
301 adev->vcn.pause_dpg_mode(adev, j, &new_state);
304 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
309 amdgpu_gfx_off_ctrl(adev, true);
310 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
313 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
317 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
319 struct amdgpu_device *adev = ring->adev;
320 bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
323 amdgpu_gfx_off_ctrl(adev, false);
324 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
325 AMD_PG_STATE_UNGATE);
328 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
329 struct dpg_pause_state new_state;
330 unsigned int fences = 0;
333 for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
334 fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
337 new_state.fw_based = VCN_DPG_STATE__PAUSE;
339 new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
341 if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
342 new_state.fw_based = VCN_DPG_STATE__PAUSE;
344 adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
348 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
350 schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
353 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
355 struct amdgpu_device *adev = ring->adev;
360 WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
361 r = amdgpu_ring_alloc(ring, 3);
364 amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
365 amdgpu_ring_write(ring, 0xDEADBEEF);
366 amdgpu_ring_commit(ring);
367 for (i = 0; i < adev->usec_timeout; i++) {
368 tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
369 if (tmp == 0xDEADBEEF)
374 if (i >= adev->usec_timeout)
380 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
381 struct amdgpu_bo *bo,
382 struct dma_fence **fence)
384 struct amdgpu_device *adev = ring->adev;
385 struct dma_fence *f = NULL;
386 struct amdgpu_job *job;
387 struct amdgpu_ib *ib;
391 r = amdgpu_job_alloc_with_ib(adev, 64, &job);
396 addr = amdgpu_bo_gpu_offset(bo);
397 ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
399 ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
400 ib->ptr[3] = addr >> 32;
401 ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0);
403 for (i = 6; i < 16; i += 2) {
404 ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0);
409 r = amdgpu_job_submit_direct(job, ring, &f);
413 amdgpu_bo_fence(bo, f, false);
414 amdgpu_bo_unreserve(bo);
415 amdgpu_bo_unref(&bo);
418 *fence = dma_fence_get(f);
424 amdgpu_job_free(job);
427 amdgpu_bo_unreserve(bo);
428 amdgpu_bo_unref(&bo);
432 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
433 struct dma_fence **fence)
435 struct amdgpu_device *adev = ring->adev;
436 struct amdgpu_bo *bo = NULL;
440 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
441 AMDGPU_GEM_DOMAIN_VRAM,
442 &bo, NULL, (void **)&msg);
446 msg[0] = cpu_to_le32(0x00000028);
447 msg[1] = cpu_to_le32(0x00000038);
448 msg[2] = cpu_to_le32(0x00000001);
449 msg[3] = cpu_to_le32(0x00000000);
450 msg[4] = cpu_to_le32(handle);
451 msg[5] = cpu_to_le32(0x00000000);
452 msg[6] = cpu_to_le32(0x00000001);
453 msg[7] = cpu_to_le32(0x00000028);
454 msg[8] = cpu_to_le32(0x00000010);
455 msg[9] = cpu_to_le32(0x00000000);
456 msg[10] = cpu_to_le32(0x00000007);
457 msg[11] = cpu_to_le32(0x00000000);
458 msg[12] = cpu_to_le32(0x00000780);
459 msg[13] = cpu_to_le32(0x00000440);
460 for (i = 14; i < 1024; ++i)
461 msg[i] = cpu_to_le32(0x0);
463 return amdgpu_vcn_dec_send_msg(ring, bo, fence);
466 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
467 struct dma_fence **fence)
469 struct amdgpu_device *adev = ring->adev;
470 struct amdgpu_bo *bo = NULL;
474 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
475 AMDGPU_GEM_DOMAIN_VRAM,
476 &bo, NULL, (void **)&msg);
480 msg[0] = cpu_to_le32(0x00000028);
481 msg[1] = cpu_to_le32(0x00000018);
482 msg[2] = cpu_to_le32(0x00000000);
483 msg[3] = cpu_to_le32(0x00000002);
484 msg[4] = cpu_to_le32(handle);
485 msg[5] = cpu_to_le32(0x00000000);
486 for (i = 6; i < 1024; ++i)
487 msg[i] = cpu_to_le32(0x0);
489 return amdgpu_vcn_dec_send_msg(ring, bo, fence);
492 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
494 struct amdgpu_device *adev = ring->adev;
495 struct dma_fence *fence;
498 /* temporarily disable ib test for sriov */
499 if (amdgpu_sriov_vf(adev))
502 r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
506 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence);
510 r = dma_fence_wait_timeout(fence, false, timeout);
516 dma_fence_put(fence);
521 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
523 struct amdgpu_device *adev = ring->adev;
528 r = amdgpu_ring_alloc(ring, 16);
532 rptr = amdgpu_ring_get_rptr(ring);
534 amdgpu_ring_write(ring, VCN_ENC_CMD_END);
535 amdgpu_ring_commit(ring);
537 for (i = 0; i < adev->usec_timeout; i++) {
538 if (amdgpu_ring_get_rptr(ring) != rptr)
543 if (i >= adev->usec_timeout)
549 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
550 struct amdgpu_bo *bo,
551 struct dma_fence **fence)
553 const unsigned ib_size_dw = 16;
554 struct amdgpu_job *job;
555 struct amdgpu_ib *ib;
556 struct dma_fence *f = NULL;
560 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
565 addr = amdgpu_bo_gpu_offset(bo);
568 ib->ptr[ib->length_dw++] = 0x00000018;
569 ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
570 ib->ptr[ib->length_dw++] = handle;
571 ib->ptr[ib->length_dw++] = upper_32_bits(addr);
572 ib->ptr[ib->length_dw++] = addr;
573 ib->ptr[ib->length_dw++] = 0x0000000b;
575 ib->ptr[ib->length_dw++] = 0x00000014;
576 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
577 ib->ptr[ib->length_dw++] = 0x0000001c;
578 ib->ptr[ib->length_dw++] = 0x00000000;
579 ib->ptr[ib->length_dw++] = 0x00000000;
581 ib->ptr[ib->length_dw++] = 0x00000008;
582 ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
584 for (i = ib->length_dw; i < ib_size_dw; ++i)
587 r = amdgpu_job_submit_direct(job, ring, &f);
592 *fence = dma_fence_get(f);
598 amdgpu_job_free(job);
602 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
603 struct amdgpu_bo *bo,
604 struct dma_fence **fence)
606 const unsigned ib_size_dw = 16;
607 struct amdgpu_job *job;
608 struct amdgpu_ib *ib;
609 struct dma_fence *f = NULL;
613 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
618 addr = amdgpu_bo_gpu_offset(bo);
621 ib->ptr[ib->length_dw++] = 0x00000018;
622 ib->ptr[ib->length_dw++] = 0x00000001;
623 ib->ptr[ib->length_dw++] = handle;
624 ib->ptr[ib->length_dw++] = upper_32_bits(addr);
625 ib->ptr[ib->length_dw++] = addr;
626 ib->ptr[ib->length_dw++] = 0x0000000b;
628 ib->ptr[ib->length_dw++] = 0x00000014;
629 ib->ptr[ib->length_dw++] = 0x00000002;
630 ib->ptr[ib->length_dw++] = 0x0000001c;
631 ib->ptr[ib->length_dw++] = 0x00000000;
632 ib->ptr[ib->length_dw++] = 0x00000000;
634 ib->ptr[ib->length_dw++] = 0x00000008;
635 ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
637 for (i = ib->length_dw; i < ib_size_dw; ++i)
640 r = amdgpu_job_submit_direct(job, ring, &f);
645 *fence = dma_fence_get(f);
651 amdgpu_job_free(job);
655 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
657 struct amdgpu_device *adev = ring->adev;
658 struct dma_fence *fence = NULL;
659 struct amdgpu_bo *bo = NULL;
662 /* temporarily disable ib test for sriov */
663 if (amdgpu_sriov_vf(adev))
666 r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
667 AMDGPU_GEM_DOMAIN_VRAM,
672 r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
676 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
680 r = dma_fence_wait_timeout(fence, false, timeout);
687 dma_fence_put(fence);
688 amdgpu_bo_unreserve(bo);
689 amdgpu_bo_unref(&bo);