2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/firmware.h>
25 #include <linux/pci.h>
27 #include <drm/drm_cache.h>
31 #include "amdgpu_atomfirmware.h"
32 #include "amdgpu_gem.h"
34 #include "gc/gc_9_0_sh_mask.h"
35 #include "dce/dce_12_0_offset.h"
36 #include "dce/dce_12_0_sh_mask.h"
37 #include "vega10_enum.h"
38 #include "mmhub/mmhub_1_0_offset.h"
39 #include "athub/athub_1_0_sh_mask.h"
40 #include "athub/athub_1_0_offset.h"
41 #include "oss/osssys_4_0_offset.h"
45 #include "soc15_common.h"
46 #include "umc/umc_6_0_sh_mask.h"
48 #include "gfxhub_v1_0.h"
49 #include "mmhub_v1_0.h"
50 #include "athub_v1_0.h"
51 #include "gfxhub_v1_1.h"
52 #include "mmhub_v9_4.h"
53 #include "mmhub_v1_7.h"
59 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
61 #include "amdgpu_ras.h"
62 #include "amdgpu_xgmi.h"
64 /* add these here since we already include dce12 headers and these are for DCN */
65 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
66 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
67 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0
68 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10
69 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
70 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
71 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d
72 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
75 static const char *gfxhub_client_ids[] = {
91 static const char *mmhub_client_ids_raven[][2] = {
116 static const char *mmhub_client_ids_renoir[][2] = {
144 static const char *mmhub_client_ids_vega10[][2] = {
157 [32+14][0] = "SDMA0",
170 [32+4][1] = "DCEDWB",
173 [32+14][1] = "SDMA1",
176 static const char *mmhub_client_ids_vega12[][2] = {
189 [32+15][0] = "SDMA0",
199 [32+1][1] = "DCEDWB",
205 [32+15][1] = "SDMA1",
208 static const char *mmhub_client_ids_vega20[][2] = {
222 [32+12][0] = "UTCL2",
223 [32+14][0] = "SDMA1",
241 [32+14][1] = "SDMA1",
244 static const char *mmhub_client_ids_arcturus[][2] = {
285 static const char *mmhub_client_ids_aldebaran[][2] = {
288 [32+1][0] = "DBGU_IO0",
289 [32+2][0] = "DBGU_IO2",
291 [96+11][0] = "JPEG0",
293 [96+13][0] = "VCNU0",
294 [128+11][0] = "JPEG1",
295 [128+12][0] = "VCN1",
296 [128+13][0] = "VCNU1",
299 [256+0][0] = "SDMA0",
300 [256+1][0] = "SDMA1",
301 [256+2][0] = "SDMA2",
302 [256+3][0] = "SDMA3",
303 [256+4][0] = "SDMA4",
307 [32+1][1] = "DBGU_IO0",
308 [32+2][1] = "DBGU_IO2",
310 [96+11][1] = "JPEG0",
312 [96+13][1] = "VCNU0",
313 [128+11][1] = "JPEG1",
314 [128+12][1] = "VCN1",
315 [128+13][1] = "VCNU1",
318 [256+0][1] = "SDMA0",
319 [256+1][1] = "SDMA1",
320 [256+2][1] = "SDMA2",
321 [256+3][1] = "SDMA3",
322 [256+4][1] = "SDMA4",
326 static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
328 SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
329 SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
332 static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
334 SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
335 SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
338 static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = {
339 (0x000143c0 + 0x00000000),
340 (0x000143c0 + 0x00000800),
341 (0x000143c0 + 0x00001000),
342 (0x000143c0 + 0x00001800),
343 (0x000543c0 + 0x00000000),
344 (0x000543c0 + 0x00000800),
345 (0x000543c0 + 0x00001000),
346 (0x000543c0 + 0x00001800),
347 (0x000943c0 + 0x00000000),
348 (0x000943c0 + 0x00000800),
349 (0x000943c0 + 0x00001000),
350 (0x000943c0 + 0x00001800),
351 (0x000d43c0 + 0x00000000),
352 (0x000d43c0 + 0x00000800),
353 (0x000d43c0 + 0x00001000),
354 (0x000d43c0 + 0x00001800),
355 (0x001143c0 + 0x00000000),
356 (0x001143c0 + 0x00000800),
357 (0x001143c0 + 0x00001000),
358 (0x001143c0 + 0x00001800),
359 (0x001543c0 + 0x00000000),
360 (0x001543c0 + 0x00000800),
361 (0x001543c0 + 0x00001000),
362 (0x001543c0 + 0x00001800),
363 (0x001943c0 + 0x00000000),
364 (0x001943c0 + 0x00000800),
365 (0x001943c0 + 0x00001000),
366 (0x001943c0 + 0x00001800),
367 (0x001d43c0 + 0x00000000),
368 (0x001d43c0 + 0x00000800),
369 (0x001d43c0 + 0x00001000),
370 (0x001d43c0 + 0x00001800),
373 static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
374 (0x000143e0 + 0x00000000),
375 (0x000143e0 + 0x00000800),
376 (0x000143e0 + 0x00001000),
377 (0x000143e0 + 0x00001800),
378 (0x000543e0 + 0x00000000),
379 (0x000543e0 + 0x00000800),
380 (0x000543e0 + 0x00001000),
381 (0x000543e0 + 0x00001800),
382 (0x000943e0 + 0x00000000),
383 (0x000943e0 + 0x00000800),
384 (0x000943e0 + 0x00001000),
385 (0x000943e0 + 0x00001800),
386 (0x000d43e0 + 0x00000000),
387 (0x000d43e0 + 0x00000800),
388 (0x000d43e0 + 0x00001000),
389 (0x000d43e0 + 0x00001800),
390 (0x001143e0 + 0x00000000),
391 (0x001143e0 + 0x00000800),
392 (0x001143e0 + 0x00001000),
393 (0x001143e0 + 0x00001800),
394 (0x001543e0 + 0x00000000),
395 (0x001543e0 + 0x00000800),
396 (0x001543e0 + 0x00001000),
397 (0x001543e0 + 0x00001800),
398 (0x001943e0 + 0x00000000),
399 (0x001943e0 + 0x00000800),
400 (0x001943e0 + 0x00001000),
401 (0x001943e0 + 0x00001800),
402 (0x001d43e0 + 0x00000000),
403 (0x001d43e0 + 0x00000800),
404 (0x001d43e0 + 0x00001000),
405 (0x001d43e0 + 0x00001800),
408 static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
409 struct amdgpu_irq_src *src,
411 enum amdgpu_interrupt_state state)
413 u32 bits, i, tmp, reg;
415 /* Devices newer then VEGA10/12 shall have these programming
416 sequences performed by PSP BL */
417 if (adev->asic_type >= CHIP_VEGA20)
423 case AMDGPU_IRQ_STATE_DISABLE:
424 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
425 reg = ecc_umc_mcumc_ctrl_addrs[i];
430 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
431 reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
437 case AMDGPU_IRQ_STATE_ENABLE:
438 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
439 reg = ecc_umc_mcumc_ctrl_addrs[i];
444 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
445 reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
458 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
459 struct amdgpu_irq_src *src,
461 enum amdgpu_interrupt_state state)
463 struct amdgpu_vmhub *hub;
464 u32 tmp, reg, bits, i, j;
466 bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
467 VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
468 VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
469 VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
470 VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
471 VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
472 VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
475 case AMDGPU_IRQ_STATE_DISABLE:
476 for (j = 0; j < adev->num_vmhubs; j++) {
477 hub = &adev->vmhub[j];
478 for (i = 0; i < 16; i++) {
479 reg = hub->vm_context0_cntl + i;
486 case AMDGPU_IRQ_STATE_ENABLE:
487 for (j = 0; j < adev->num_vmhubs; j++) {
488 hub = &adev->vmhub[j];
489 for (i = 0; i < 16; i++) {
490 reg = hub->vm_context0_cntl + i;
504 static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
505 struct amdgpu_irq_src *source,
506 struct amdgpu_iv_entry *entry)
508 bool retry_fault = !!(entry->src_data[1] & 0x80);
509 uint32_t status = 0, cid = 0, rw = 0;
510 struct amdgpu_task_info task_info;
511 struct amdgpu_vmhub *hub;
512 const char *mmhub_cid;
513 const char *hub_name;
516 addr = (u64)entry->src_data[0] << 12;
517 addr |= ((u64)entry->src_data[1] & 0xf) << 44;
520 /* Returning 1 here also prevents sending the IV to the KFD */
522 /* Process it onyl if it's the first fault for this address */
523 if (entry->ih != &adev->irq.ih_soft &&
524 amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
528 /* Delegate it to a different ring if the hardware hasn't
531 if (entry->ih == &adev->irq.ih) {
532 amdgpu_irq_delegate(adev, entry, 8);
536 /* Try to handle the recoverable page faults by filling page
539 if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
543 if (!printk_ratelimit())
546 if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
548 hub = &adev->vmhub[AMDGPU_MMHUB_0];
549 } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
551 hub = &adev->vmhub[AMDGPU_MMHUB_1];
553 hub_name = "gfxhub0";
554 hub = &adev->vmhub[AMDGPU_GFXHUB_0];
557 memset(&task_info, 0, sizeof(struct amdgpu_task_info));
558 amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
561 "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
562 "pasid:%u, for process %s pid %d thread %s pid %d)\n",
563 hub_name, retry_fault ? "retry" : "no-retry",
564 entry->src_id, entry->ring_id, entry->vmid,
565 entry->pasid, task_info.process_name, task_info.tgid,
566 task_info.task_name, task_info.pid);
567 dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n",
568 addr, entry->client_id,
569 soc15_ih_clientid_name[entry->client_id]);
571 if (amdgpu_sriov_vf(adev))
575 * Issue a dummy read to wait for the status register to
576 * be updated to avoid reading an incorrect value due to
577 * the new fast GRBM interface.
579 if ((entry->vmid_src == AMDGPU_GFXHUB_0) &&
580 (adev->asic_type < CHIP_ALDEBARAN))
581 RREG32(hub->vm_l2_pro_fault_status);
583 status = RREG32(hub->vm_l2_pro_fault_status);
584 cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID);
585 rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
586 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
590 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
592 if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) {
593 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
594 cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
595 gfxhub_client_ids[cid],
598 switch (adev->asic_type) {
600 mmhub_cid = mmhub_client_ids_vega10[cid][rw];
603 mmhub_cid = mmhub_client_ids_vega12[cid][rw];
606 mmhub_cid = mmhub_client_ids_vega20[cid][rw];
609 mmhub_cid = mmhub_client_ids_arcturus[cid][rw];
612 mmhub_cid = mmhub_client_ids_raven[cid][rw];
615 mmhub_cid = mmhub_client_ids_renoir[cid][rw];
618 mmhub_cid = mmhub_client_ids_aldebaran[cid][rw];
624 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
625 mmhub_cid ? mmhub_cid : "unknown", cid);
627 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
628 REG_GET_FIELD(status,
629 VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
630 dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
631 REG_GET_FIELD(status,
632 VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
633 dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
634 REG_GET_FIELD(status,
635 VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
636 dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
637 REG_GET_FIELD(status,
638 VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
639 dev_err(adev->dev, "\t RW: 0x%x\n", rw);
643 static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
644 .set = gmc_v9_0_vm_fault_interrupt_state,
645 .process = gmc_v9_0_process_interrupt,
649 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
650 .set = gmc_v9_0_ecc_interrupt_state,
651 .process = amdgpu_umc_process_ecc_irq,
654 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
656 adev->gmc.vm_fault.num_types = 1;
657 adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
659 if (!amdgpu_sriov_vf(adev) &&
660 !adev->gmc.xgmi.connected_to_cpu) {
661 adev->gmc.ecc_irq.num_types = 1;
662 adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
666 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
671 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
672 PER_VMID_INVALIDATE_REQ, 1 << vmid);
673 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
674 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
675 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
676 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
677 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
678 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
679 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
680 CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
686 * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
688 * @adev: amdgpu_device pointer
692 static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
695 if (adev->asic_type == CHIP_ALDEBARAN)
698 return ((vmhub == AMDGPU_MMHUB_0 ||
699 vmhub == AMDGPU_MMHUB_1) &&
700 (!amdgpu_sriov_vf(adev)) &&
701 (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
702 (adev->apu_flags & AMD_APU_IS_PICASSO))));
705 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
706 uint8_t vmid, uint16_t *p_pasid)
710 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
712 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
714 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
719 * VMID 0 is the physical GPU addresses as used by the kernel.
720 * VMIDs 1-15 are used for userspace clients and are handled
721 * by the amdgpu vm/hsa code.
725 * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
727 * @adev: amdgpu_device pointer
728 * @vmid: vm instance to flush
729 * @vmhub: which hub to flush
730 * @flush_type: the flush type
732 * Flush the TLB for the requested page table using certain type.
734 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
735 uint32_t vmhub, uint32_t flush_type)
737 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
738 const unsigned eng = 17;
739 u32 j, inv_req, inv_req2, tmp;
740 struct amdgpu_vmhub *hub;
742 BUG_ON(vmhub >= adev->num_vmhubs);
744 hub = &adev->vmhub[vmhub];
745 if (adev->gmc.xgmi.num_physical_nodes &&
746 adev->asic_type == CHIP_VEGA20) {
747 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
748 * heavy-weight TLB flush (type 2), which flushes
749 * both. Due to a race condition with concurrent
750 * memory accesses using the same TLB cache line, we
751 * still need a second TLB flush after this.
753 inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
754 inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
756 inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
760 /* This is necessary for a HW workaround under SRIOV as well
761 * as GFXOFF under bare metal
763 if (adev->gfx.kiq.ring.sched.ready &&
764 (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
765 down_read_trylock(&adev->reset_sem)) {
766 uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
767 uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
769 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
771 up_read(&adev->reset_sem);
775 spin_lock(&adev->gmc.invalidate_lock);
778 * It may lose gpuvm invalidate acknowldege state across power-gating
779 * off cycle, add semaphore acquire before invalidation and semaphore
780 * release after invalidation to avoid entering power gated state
784 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
786 for (j = 0; j < adev->usec_timeout; j++) {
787 /* a read return value of 1 means semaphore acuqire */
788 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
789 hub->eng_distance * eng);
795 if (j >= adev->usec_timeout)
796 DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
800 WREG32_NO_KIQ(hub->vm_inv_eng0_req +
801 hub->eng_distance * eng, inv_req);
804 * Issue a dummy read to wait for the ACK register to
805 * be cleared to avoid a false ACK due to the new fast
808 if ((vmhub == AMDGPU_GFXHUB_0) &&
809 (adev->asic_type < CHIP_ALDEBARAN))
810 RREG32_NO_KIQ(hub->vm_inv_eng0_req +
811 hub->eng_distance * eng);
813 for (j = 0; j < adev->usec_timeout; j++) {
814 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
815 hub->eng_distance * eng);
816 if (tmp & (1 << vmid))
825 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
828 * add semaphore release after invalidation,
829 * write with 0 means semaphore release
831 WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
832 hub->eng_distance * eng, 0);
834 spin_unlock(&adev->gmc.invalidate_lock);
836 if (j < adev->usec_timeout)
839 DRM_ERROR("Timeout waiting for VM flush ACK!\n");
843 * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
845 * @adev: amdgpu_device pointer
846 * @pasid: pasid to be flush
847 * @flush_type: the flush type
848 * @all_hub: flush all hubs
850 * Flush the TLB for the requested pasid.
852 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
853 uint16_t pasid, uint32_t flush_type,
859 uint16_t queried_pasid;
861 struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
862 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
864 if (amdgpu_in_reset(adev))
867 if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) {
868 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
869 * heavy-weight TLB flush (type 2), which flushes
870 * both. Due to a race condition with concurrent
871 * memory accesses using the same TLB cache line, we
872 * still need a second TLB flush after this.
874 bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
875 adev->asic_type == CHIP_VEGA20);
876 /* 2 dwords flush + 8 dwords fence */
877 unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
880 ndw += kiq->pmf->invalidate_tlbs_size;
882 spin_lock(&adev->gfx.kiq.ring_lock);
883 /* 2 dwords flush + 8 dwords fence */
884 amdgpu_ring_alloc(ring, ndw);
886 kiq->pmf->kiq_invalidate_tlbs(ring,
888 kiq->pmf->kiq_invalidate_tlbs(ring,
889 pasid, flush_type, all_hub);
890 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
892 amdgpu_ring_undo(ring);
893 spin_unlock(&adev->gfx.kiq.ring_lock);
894 up_read(&adev->reset_sem);
898 amdgpu_ring_commit(ring);
899 spin_unlock(&adev->gfx.kiq.ring_lock);
900 r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
902 dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
903 up_read(&adev->reset_sem);
906 up_read(&adev->reset_sem);
910 for (vmid = 1; vmid < 16; vmid++) {
912 ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
914 if (ret && queried_pasid == pasid) {
916 for (i = 0; i < adev->num_vmhubs; i++)
917 gmc_v9_0_flush_gpu_tlb(adev, vmid,
920 gmc_v9_0_flush_gpu_tlb(adev, vmid,
921 AMDGPU_GFXHUB_0, flush_type);
931 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
932 unsigned vmid, uint64_t pd_addr)
934 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
935 struct amdgpu_device *adev = ring->adev;
936 struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
937 uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
938 unsigned eng = ring->vm_inv_eng;
941 * It may lose gpuvm invalidate acknowldege state across power-gating
942 * off cycle, add semaphore acquire before invalidation and semaphore
943 * release after invalidation to avoid entering power gated state
947 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
949 /* a read return value of 1 means semaphore acuqire */
950 amdgpu_ring_emit_reg_wait(ring,
951 hub->vm_inv_eng0_sem +
952 hub->eng_distance * eng, 0x1, 0x1);
954 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
955 (hub->ctx_addr_distance * vmid),
956 lower_32_bits(pd_addr));
958 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
959 (hub->ctx_addr_distance * vmid),
960 upper_32_bits(pd_addr));
962 amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
963 hub->eng_distance * eng,
964 hub->vm_inv_eng0_ack +
965 hub->eng_distance * eng,
968 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
971 * add semaphore release after invalidation,
972 * write with 0 means semaphore release
974 amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
975 hub->eng_distance * eng, 0);
980 static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
983 struct amdgpu_device *adev = ring->adev;
986 /* Do nothing because there's no lut register for mmhub1. */
987 if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
990 if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
991 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
993 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
995 amdgpu_ring_emit_wreg(ring, reg, pasid);
999 * PTE format on VEGA 10:
1008 * 47:12 4k physical page base address
1018 * PDE format on VEGA 10:
1019 * 63:59 block fragment size
1023 * 47:6 physical base address of PD or PTE
1030 static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
1034 case AMDGPU_VM_MTYPE_DEFAULT:
1035 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1036 case AMDGPU_VM_MTYPE_NC:
1037 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1038 case AMDGPU_VM_MTYPE_WC:
1039 return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
1040 case AMDGPU_VM_MTYPE_RW:
1041 return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
1042 case AMDGPU_VM_MTYPE_CC:
1043 return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
1044 case AMDGPU_VM_MTYPE_UC:
1045 return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
1047 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1051 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
1052 uint64_t *addr, uint64_t *flags)
1054 if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
1055 *addr = amdgpu_gmc_vram_mc2pa(adev, *addr);
1056 BUG_ON(*addr & 0xFFFF00000000003FULL);
1058 if (!adev->gmc.translate_further)
1061 if (level == AMDGPU_VM_PDB1) {
1062 /* Set the block fragment size */
1063 if (!(*flags & AMDGPU_PDE_PTE))
1064 *flags |= AMDGPU_PDE_BFS(0x9);
1066 } else if (level == AMDGPU_VM_PDB0) {
1067 if (*flags & AMDGPU_PDE_PTE)
1068 *flags &= ~AMDGPU_PDE_PTE;
1070 *flags |= AMDGPU_PTE_TF;
1074 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
1075 struct amdgpu_bo_va_mapping *mapping,
1078 *flags &= ~AMDGPU_PTE_EXECUTABLE;
1079 *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1081 *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
1082 *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
1084 if (mapping->flags & AMDGPU_PTE_PRT) {
1085 *flags |= AMDGPU_PTE_PRT;
1086 *flags &= ~AMDGPU_PTE_VALID;
1089 if ((adev->asic_type == CHIP_ARCTURUS ||
1090 adev->asic_type == CHIP_ALDEBARAN) &&
1091 !(*flags & AMDGPU_PTE_SYSTEM) &&
1092 mapping->bo_va->is_xgmi)
1093 *flags |= AMDGPU_PTE_SNOOPED;
1095 if (adev->asic_type == CHIP_ALDEBARAN)
1096 *flags |= mapping->flags & AMDGPU_PTE_SNOOPED;
1099 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
1101 u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
1104 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
1105 size = AMDGPU_VBIOS_VGA_ALLOCATION;
1109 switch (adev->asic_type) {
1112 viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
1113 size = (REG_GET_FIELD(viewport,
1114 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
1115 REG_GET_FIELD(viewport,
1116 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
1123 viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
1124 size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
1125 REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
1134 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
1135 .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
1136 .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
1137 .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
1138 .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
1139 .map_mtype = gmc_v9_0_map_mtype,
1140 .get_vm_pde = gmc_v9_0_get_vm_pde,
1141 .get_vm_pte = gmc_v9_0_get_vm_pte,
1142 .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
1145 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
1147 adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
1150 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
1152 switch (adev->asic_type) {
1154 adev->umc.funcs = &umc_v6_0_funcs;
1157 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1158 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1159 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1160 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
1161 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1162 adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
1165 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1166 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1167 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1168 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
1169 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1170 adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
1172 case CHIP_ALDEBARAN:
1173 adev->umc.max_ras_err_cnt_per_query = UMC_V6_7_TOTAL_CHANNEL_NUM;
1174 adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM;
1175 adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
1176 adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
1177 if (!adev->gmc.xgmi.connected_to_cpu)
1178 adev->umc.ras_funcs = &umc_v6_7_ras_funcs;
1179 if (1 & adev->smuio.funcs->get_die_id(adev))
1180 adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0];
1182 adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0];
1189 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
1191 switch (adev->asic_type) {
1193 adev->mmhub.funcs = &mmhub_v9_4_funcs;
1195 case CHIP_ALDEBARAN:
1196 adev->mmhub.funcs = &mmhub_v1_7_funcs;
1199 adev->mmhub.funcs = &mmhub_v1_0_funcs;
1204 static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
1206 switch (adev->asic_type) {
1208 adev->mmhub.ras_funcs = &mmhub_v1_0_ras_funcs;
1211 adev->mmhub.ras_funcs = &mmhub_v9_4_ras_funcs;
1213 case CHIP_ALDEBARAN:
1214 adev->mmhub.ras_funcs = &mmhub_v1_7_ras_funcs;
1217 /* mmhub ras is not available */
1222 static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
1224 adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
1227 static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
1229 adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs;
1232 static int gmc_v9_0_early_init(void *handle)
1234 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1236 if (adev->asic_type == CHIP_VEGA20 ||
1237 adev->asic_type == CHIP_ARCTURUS)
1238 adev->gmc.xgmi.supported = true;
1240 if (adev->asic_type == CHIP_ALDEBARAN) {
1241 adev->gmc.xgmi.supported = true;
1242 adev->gmc.xgmi.connected_to_cpu =
1243 adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
1246 gmc_v9_0_set_gmc_funcs(adev);
1247 gmc_v9_0_set_irq_funcs(adev);
1248 gmc_v9_0_set_umc_funcs(adev);
1249 gmc_v9_0_set_mmhub_funcs(adev);
1250 gmc_v9_0_set_mmhub_ras_funcs(adev);
1251 gmc_v9_0_set_gfxhub_funcs(adev);
1252 gmc_v9_0_set_hdp_ras_funcs(adev);
1254 adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
1255 adev->gmc.shared_aperture_end =
1256 adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
1257 adev->gmc.private_aperture_start = 0x1000000000000000ULL;
1258 adev->gmc.private_aperture_end =
1259 adev->gmc.private_aperture_start + (4ULL << 30) - 1;
1264 static int gmc_v9_0_late_init(void *handle)
1266 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1269 r = amdgpu_gmc_allocate_vm_inv_eng(adev);
1274 * Workaround performance drop issue with VBIOS enables partial
1275 * writes, while disables HBM ECC for vega10.
1277 if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
1278 if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) {
1279 if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
1280 adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
1284 if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
1285 if (adev->mmhub.ras_funcs &&
1286 adev->mmhub.ras_funcs->reset_ras_error_count)
1287 adev->mmhub.ras_funcs->reset_ras_error_count(adev);
1289 if (adev->hdp.ras_funcs &&
1290 adev->hdp.ras_funcs->reset_ras_error_count)
1291 adev->hdp.ras_funcs->reset_ras_error_count(adev);
1294 r = amdgpu_gmc_ras_late_init(adev);
1298 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
1301 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
1302 struct amdgpu_gmc *mc)
1304 u64 base = adev->mmhub.funcs->get_fb_location(adev);
1306 /* add the xgmi offset of the physical node */
1307 base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1308 if (adev->gmc.xgmi.connected_to_cpu) {
1309 amdgpu_gmc_sysvm_location(adev, mc);
1311 amdgpu_gmc_vram_location(adev, mc, base);
1312 amdgpu_gmc_gart_location(adev, mc);
1313 amdgpu_gmc_agp_location(adev, mc);
1315 /* base offset of vram pages */
1316 adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1318 /* XXX: add the xgmi offset of the physical node? */
1319 adev->vm_manager.vram_base_offset +=
1320 adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1324 * gmc_v9_0_mc_init - initialize the memory controller driver params
1326 * @adev: amdgpu_device pointer
1328 * Look up the amount of vram, vram width, and decide how to place
1329 * vram and gart within the GPU's physical address space.
1330 * Returns 0 for success.
1332 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
1336 /* size in MB on si */
1337 adev->gmc.mc_vram_size =
1338 adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
1339 adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
1341 if (!(adev->flags & AMD_IS_APU) &&
1342 !adev->gmc.xgmi.connected_to_cpu) {
1343 r = amdgpu_device_resize_fb_bar(adev);
1347 adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
1348 adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
1350 #ifdef CONFIG_X86_64
1352 * AMD Accelerated Processing Platform (APP) supporting GPU-HOST xgmi
1353 * interface can use VRAM through here as it appears system reserved
1354 * memory in host address space.
1356 * For APUs, VRAM is just the stolen system memory and can be accessed
1359 * Otherwise, use the legacy Host Data Path (HDP) through PCIe BAR.
1362 /* check whether both host-gpu and gpu-gpu xgmi links exist */
1363 if ((adev->flags & AMD_IS_APU) ||
1364 (adev->gmc.xgmi.supported &&
1365 adev->gmc.xgmi.connected_to_cpu)) {
1366 adev->gmc.aper_base =
1367 adev->gfxhub.funcs->get_mc_fb_offset(adev) +
1368 adev->gmc.xgmi.physical_node_id *
1369 adev->gmc.xgmi.node_segment_size;
1370 adev->gmc.aper_size = adev->gmc.real_vram_size;
1374 /* In case the PCI BAR is larger than the actual amount of vram */
1375 adev->gmc.visible_vram_size = adev->gmc.aper_size;
1376 if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
1377 adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
1379 /* set the gart size */
1380 if (amdgpu_gart_size == -1) {
1381 switch (adev->asic_type) {
1382 case CHIP_VEGA10: /* all engines support GPUVM */
1383 case CHIP_VEGA12: /* all engines support GPUVM */
1386 case CHIP_ALDEBARAN:
1388 adev->gmc.gart_size = 512ULL << 20;
1390 case CHIP_RAVEN: /* DCE SG support */
1392 adev->gmc.gart_size = 1024ULL << 20;
1396 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
1399 adev->gmc.gart_size += adev->pm.smu_prv_buffer_size;
1401 gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
1406 static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
1410 if (adev->gart.bo) {
1411 WARN(1, "VEGA10 PCIE GART already initialized\n");
1415 if (adev->gmc.xgmi.connected_to_cpu) {
1416 adev->gmc.vmid0_page_table_depth = 1;
1417 adev->gmc.vmid0_page_table_block_size = 12;
1419 adev->gmc.vmid0_page_table_depth = 0;
1420 adev->gmc.vmid0_page_table_block_size = 0;
1423 /* Initialize common gart structure */
1424 r = amdgpu_gart_init(adev);
1427 adev->gart.table_size = adev->gart.num_gpu_pages * 8;
1428 adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
1429 AMDGPU_PTE_EXECUTABLE;
1431 r = amdgpu_gart_table_vram_alloc(adev);
1435 if (adev->gmc.xgmi.connected_to_cpu) {
1436 r = amdgpu_gmc_pdb0_alloc(adev);
1443 * gmc_v9_0_save_registers - saves regs
1445 * @adev: amdgpu_device pointer
1447 * This saves potential register values that should be
1448 * restored upon resume
1450 static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
1452 if (adev->asic_type == CHIP_RAVEN)
1453 adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
1456 static int gmc_v9_0_sw_init(void *handle)
1458 int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
1459 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1461 adev->gfxhub.funcs->init(adev);
1463 adev->mmhub.funcs->init(adev);
1465 spin_lock_init(&adev->gmc.invalidate_lock);
1467 r = amdgpu_atomfirmware_get_vram_info(adev,
1468 &vram_width, &vram_type, &vram_vendor);
1469 if (amdgpu_sriov_vf(adev))
1470 /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
1471 * and DF related registers is not readable, seems hardcord is the
1472 * only way to set the correct vram_width
1474 adev->gmc.vram_width = 2048;
1475 else if (amdgpu_emu_mode != 1)
1476 adev->gmc.vram_width = vram_width;
1478 if (!adev->gmc.vram_width) {
1479 int chansize, numchan;
1481 /* hbm memory channel size */
1482 if (adev->flags & AMD_IS_APU)
1487 numchan = adev->df.funcs->get_hbm_channel_number(adev);
1488 adev->gmc.vram_width = numchan * chansize;
1491 adev->gmc.vram_type = vram_type;
1492 adev->gmc.vram_vendor = vram_vendor;
1493 switch (adev->asic_type) {
1495 adev->num_vmhubs = 2;
1497 if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
1498 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1500 /* vm_size is 128TB + 512GB for legacy 3-level page support */
1501 amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
1502 adev->gmc.translate_further =
1503 adev->vm_manager.num_level > 1;
1510 case CHIP_ALDEBARAN:
1511 adev->num_vmhubs = 2;
1515 * To fulfill 4-level page support,
1516 * vm size is 256TB (48bit), maximum size of Vega10,
1517 * block size 512 (9bit)
1519 /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
1520 if (amdgpu_sriov_vf(adev))
1521 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
1523 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1526 adev->num_vmhubs = 3;
1528 /* Keep the vm size same with Vega20 */
1529 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1535 /* This interrupt is VMC page fault.*/
1536 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
1537 &adev->gmc.vm_fault);
1541 if (adev->asic_type == CHIP_ARCTURUS) {
1542 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
1543 &adev->gmc.vm_fault);
1548 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
1549 &adev->gmc.vm_fault);
1554 if (!amdgpu_sriov_vf(adev) &&
1555 !adev->gmc.xgmi.connected_to_cpu) {
1556 /* interrupt sent to DF. */
1557 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
1558 &adev->gmc.ecc_irq);
1563 /* Set the internal MC address mask
1564 * This is the max address of the GPU's
1565 * internal address space.
1567 adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
1569 r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
1571 printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
1574 adev->need_swiotlb = drm_need_swiotlb(44);
1576 if (adev->gmc.xgmi.supported) {
1577 r = adev->gfxhub.funcs->get_xgmi_info(adev);
1582 r = gmc_v9_0_mc_init(adev);
1586 amdgpu_gmc_get_vbios_allocations(adev);
1588 /* Memory manager */
1589 r = amdgpu_bo_init(adev);
1593 r = gmc_v9_0_gart_init(adev);
1599 * VMID 0 is reserved for System
1600 * amdgpu graphics/compute will use VMIDs 1..n-1
1601 * amdkfd will use VMIDs n..15
1603 * The first KFD VMID is 8 for GPUs with graphics, 3 for
1604 * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs
1605 * for video processing.
1607 adev->vm_manager.first_kfd_vmid =
1608 (adev->asic_type == CHIP_ARCTURUS ||
1609 adev->asic_type == CHIP_ALDEBARAN) ? 3 : 8;
1611 amdgpu_vm_manager_init(adev);
1613 gmc_v9_0_save_registers(adev);
1618 static int gmc_v9_0_sw_fini(void *handle)
1620 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1622 amdgpu_gmc_ras_fini(adev);
1623 amdgpu_gem_force_release(adev);
1624 amdgpu_vm_manager_fini(adev);
1625 amdgpu_gart_table_vram_free(adev);
1626 amdgpu_bo_unref(&adev->gmc.pdb0_bo);
1627 amdgpu_bo_fini(adev);
1632 static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
1635 switch (adev->asic_type) {
1637 if (amdgpu_sriov_vf(adev))
1641 soc15_program_register_sequence(adev,
1642 golden_settings_mmhub_1_0_0,
1643 ARRAY_SIZE(golden_settings_mmhub_1_0_0));
1644 soc15_program_register_sequence(adev,
1645 golden_settings_athub_1_0_0,
1646 ARRAY_SIZE(golden_settings_athub_1_0_0));
1651 /* TODO for renoir */
1652 soc15_program_register_sequence(adev,
1653 golden_settings_athub_1_0_0,
1654 ARRAY_SIZE(golden_settings_athub_1_0_0));
1662 * gmc_v9_0_restore_registers - restores regs
1664 * @adev: amdgpu_device pointer
1666 * This restores register values, saved at suspend.
1668 void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
1670 if (adev->asic_type == CHIP_RAVEN) {
1671 WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
1672 WARN_ON(adev->gmc.sdpif_register !=
1673 RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0));
1678 * gmc_v9_0_gart_enable - gart enable
1680 * @adev: amdgpu_device pointer
1682 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
1686 if (adev->gmc.xgmi.connected_to_cpu)
1687 amdgpu_gmc_init_pdb0(adev);
1689 if (adev->gart.bo == NULL) {
1690 dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
1694 r = amdgpu_gart_table_vram_pin(adev);
1698 r = adev->gfxhub.funcs->gart_enable(adev);
1702 r = adev->mmhub.funcs->gart_enable(adev);
1706 DRM_INFO("PCIE GART of %uM enabled.\n",
1707 (unsigned)(adev->gmc.gart_size >> 20));
1708 if (adev->gmc.pdb0_bo)
1709 DRM_INFO("PDB0 located at 0x%016llX\n",
1710 (unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo));
1711 DRM_INFO("PTB located at 0x%016llX\n",
1712 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
1714 adev->gart.ready = true;
1718 static int gmc_v9_0_hw_init(void *handle)
1720 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1724 /* The sequence of these two function calls matters.*/
1725 gmc_v9_0_init_golden_registers(adev);
1727 if (adev->mode_info.num_crtc) {
1728 /* Lockout access through VGA aperture*/
1729 WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
1730 /* disable VGA render */
1731 WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
1734 if (adev->mmhub.funcs->update_power_gating)
1735 adev->mmhub.funcs->update_power_gating(adev, true);
1737 adev->hdp.funcs->init_registers(adev);
1739 /* After HDP is initialized, flush HDP.*/
1740 adev->hdp.funcs->flush_hdp(adev, NULL);
1742 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
1747 if (!amdgpu_sriov_vf(adev)) {
1748 adev->gfxhub.funcs->set_fault_enable_default(adev, value);
1749 adev->mmhub.funcs->set_fault_enable_default(adev, value);
1751 for (i = 0; i < adev->num_vmhubs; ++i)
1752 gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
1754 if (adev->umc.funcs && adev->umc.funcs->init_registers)
1755 adev->umc.funcs->init_registers(adev);
1757 r = gmc_v9_0_gart_enable(adev);
1763 * gmc_v9_0_gart_disable - gart disable
1765 * @adev: amdgpu_device pointer
1767 * This disables all VM page table.
1769 static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
1771 adev->gfxhub.funcs->gart_disable(adev);
1772 adev->mmhub.funcs->gart_disable(adev);
1773 amdgpu_gart_table_vram_unpin(adev);
1776 static int gmc_v9_0_hw_fini(void *handle)
1778 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1780 if (amdgpu_sriov_vf(adev)) {
1781 /* full access mode, so don't touch any GMC register */
1782 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1786 amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
1787 amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1788 gmc_v9_0_gart_disable(adev);
1793 static int gmc_v9_0_suspend(void *handle)
1795 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1797 return gmc_v9_0_hw_fini(adev);
1800 static int gmc_v9_0_resume(void *handle)
1803 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1805 r = gmc_v9_0_hw_init(adev);
1809 amdgpu_vmid_reset_all(adev);
1814 static bool gmc_v9_0_is_idle(void *handle)
1816 /* MC is always ready in GMC v9.*/
1820 static int gmc_v9_0_wait_for_idle(void *handle)
1822 /* There is no need to wait for MC idle in GMC v9.*/
1826 static int gmc_v9_0_soft_reset(void *handle)
1828 /* XXX for emulation.*/
1832 static int gmc_v9_0_set_clockgating_state(void *handle,
1833 enum amd_clockgating_state state)
1835 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1837 adev->mmhub.funcs->set_clockgating(adev, state);
1839 athub_v1_0_set_clockgating(adev, state);
1844 static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
1846 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1848 adev->mmhub.funcs->get_clockgating(adev, flags);
1850 athub_v1_0_get_clockgating(adev, flags);
1853 static int gmc_v9_0_set_powergating_state(void *handle,
1854 enum amd_powergating_state state)
1859 const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
1861 .early_init = gmc_v9_0_early_init,
1862 .late_init = gmc_v9_0_late_init,
1863 .sw_init = gmc_v9_0_sw_init,
1864 .sw_fini = gmc_v9_0_sw_fini,
1865 .hw_init = gmc_v9_0_hw_init,
1866 .hw_fini = gmc_v9_0_hw_fini,
1867 .suspend = gmc_v9_0_suspend,
1868 .resume = gmc_v9_0_resume,
1869 .is_idle = gmc_v9_0_is_idle,
1870 .wait_for_idle = gmc_v9_0_wait_for_idle,
1871 .soft_reset = gmc_v9_0_soft_reset,
1872 .set_clockgating_state = gmc_v9_0_set_clockgating_state,
1873 .set_powergating_state = gmc_v9_0_set_powergating_state,
1874 .get_clockgating_state = gmc_v9_0_get_clockgating_state,
1877 const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
1879 .type = AMD_IP_BLOCK_TYPE_GMC,
1883 .funcs = &gmc_v9_0_ip_funcs,