2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/firmware.h>
25 #include <linux/pci.h>
27 #include <drm/drm_cache.h>
31 #include "amdgpu_atomfirmware.h"
32 #include "amdgpu_gem.h"
34 #include "hdp/hdp_4_0_offset.h"
35 #include "hdp/hdp_4_0_sh_mask.h"
36 #include "gc/gc_9_0_sh_mask.h"
37 #include "dce/dce_12_0_offset.h"
38 #include "dce/dce_12_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "athub/athub_1_0_sh_mask.h"
42 #include "athub/athub_1_0_offset.h"
43 #include "oss/osssys_4_0_offset.h"
47 #include "soc15_common.h"
48 #include "umc/umc_6_0_sh_mask.h"
50 #include "gfxhub_v1_0.h"
51 #include "mmhub_v1_0.h"
52 #include "athub_v1_0.h"
53 #include "gfxhub_v1_1.h"
54 #include "mmhub_v9_4.h"
58 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
60 #include "amdgpu_ras.h"
61 #include "amdgpu_xgmi.h"
63 /* add these here since we already include dce12 headers and these are for DCN */
64 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
65 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
66 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0
67 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10
68 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
69 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
70 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d
71 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
74 static const char *gfxhub_client_ids[] = {
90 static const char *mmhub_client_ids_raven[][2] = {
115 static const char *mmhub_client_ids_renoir[][2] = {
143 static const char *mmhub_client_ids_vega10[][2] = {
156 [32+14][0] = "SDMA0",
169 [32+4][1] = "DCEDWB",
172 [32+14][1] = "SDMA1",
175 static const char *mmhub_client_ids_vega12[][2] = {
188 [32+15][0] = "SDMA0",
198 [32+1][1] = "DCEDWB",
204 [32+15][1] = "SDMA1",
207 static const char *mmhub_client_ids_vega20[][2] = {
221 [32+12][0] = "UTCL2",
222 [32+14][0] = "SDMA1",
240 [32+14][1] = "SDMA1",
243 static const char *mmhub_client_ids_arcturus[][2] = {
250 [32+15][0] = "SDMA1",
251 [64+15][0] = "SDMA2",
252 [96+15][0] = "SDMA3",
253 [128+15][0] = "SDMA4",
254 [160+11][0] = "JPEG",
256 [160+13][0] = "VCNU",
257 [160+15][0] = "SDMA5",
258 [192+10][0] = "UTCL2",
259 [192+11][0] = "JPEG1",
260 [192+12][0] = "VCN1",
261 [192+13][0] = "VCN1U",
262 [192+15][0] = "SDMA6",
263 [224+15][0] = "SDMA7",
271 [32+15][1] = "SDMA1",
272 [64+15][1] = "SDMA2",
273 [96+15][1] = "SDMA3",
274 [128+15][1] = "SDMA4",
275 [160+11][1] = "JPEG",
277 [160+13][1] = "VCNU",
278 [160+15][1] = "SDMA5",
279 [192+11][1] = "JPEG1",
280 [192+12][1] = "VCN1",
281 [192+13][1] = "VCN1U",
282 [192+15][1] = "SDMA6",
283 [224+15][1] = "SDMA7",
286 static const u32 golden_settings_vega10_hdp[] =
288 0xf64, 0x0fffffff, 0x00000000,
289 0xf65, 0x0fffffff, 0x00000000,
290 0xf66, 0x0fffffff, 0x00000000,
291 0xf67, 0x0fffffff, 0x00000000,
292 0xf68, 0x0fffffff, 0x00000000,
293 0xf6a, 0x0fffffff, 0x00000000,
294 0xf6b, 0x0fffffff, 0x00000000,
295 0xf6c, 0x0fffffff, 0x00000000,
296 0xf6d, 0x0fffffff, 0x00000000,
297 0xf6e, 0x0fffffff, 0x00000000,
300 static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
302 SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
303 SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
306 static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
308 SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
309 SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
312 static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = {
313 (0x000143c0 + 0x00000000),
314 (0x000143c0 + 0x00000800),
315 (0x000143c0 + 0x00001000),
316 (0x000143c0 + 0x00001800),
317 (0x000543c0 + 0x00000000),
318 (0x000543c0 + 0x00000800),
319 (0x000543c0 + 0x00001000),
320 (0x000543c0 + 0x00001800),
321 (0x000943c0 + 0x00000000),
322 (0x000943c0 + 0x00000800),
323 (0x000943c0 + 0x00001000),
324 (0x000943c0 + 0x00001800),
325 (0x000d43c0 + 0x00000000),
326 (0x000d43c0 + 0x00000800),
327 (0x000d43c0 + 0x00001000),
328 (0x000d43c0 + 0x00001800),
329 (0x001143c0 + 0x00000000),
330 (0x001143c0 + 0x00000800),
331 (0x001143c0 + 0x00001000),
332 (0x001143c0 + 0x00001800),
333 (0x001543c0 + 0x00000000),
334 (0x001543c0 + 0x00000800),
335 (0x001543c0 + 0x00001000),
336 (0x001543c0 + 0x00001800),
337 (0x001943c0 + 0x00000000),
338 (0x001943c0 + 0x00000800),
339 (0x001943c0 + 0x00001000),
340 (0x001943c0 + 0x00001800),
341 (0x001d43c0 + 0x00000000),
342 (0x001d43c0 + 0x00000800),
343 (0x001d43c0 + 0x00001000),
344 (0x001d43c0 + 0x00001800),
347 static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
348 (0x000143e0 + 0x00000000),
349 (0x000143e0 + 0x00000800),
350 (0x000143e0 + 0x00001000),
351 (0x000143e0 + 0x00001800),
352 (0x000543e0 + 0x00000000),
353 (0x000543e0 + 0x00000800),
354 (0x000543e0 + 0x00001000),
355 (0x000543e0 + 0x00001800),
356 (0x000943e0 + 0x00000000),
357 (0x000943e0 + 0x00000800),
358 (0x000943e0 + 0x00001000),
359 (0x000943e0 + 0x00001800),
360 (0x000d43e0 + 0x00000000),
361 (0x000d43e0 + 0x00000800),
362 (0x000d43e0 + 0x00001000),
363 (0x000d43e0 + 0x00001800),
364 (0x001143e0 + 0x00000000),
365 (0x001143e0 + 0x00000800),
366 (0x001143e0 + 0x00001000),
367 (0x001143e0 + 0x00001800),
368 (0x001543e0 + 0x00000000),
369 (0x001543e0 + 0x00000800),
370 (0x001543e0 + 0x00001000),
371 (0x001543e0 + 0x00001800),
372 (0x001943e0 + 0x00000000),
373 (0x001943e0 + 0x00000800),
374 (0x001943e0 + 0x00001000),
375 (0x001943e0 + 0x00001800),
376 (0x001d43e0 + 0x00000000),
377 (0x001d43e0 + 0x00000800),
378 (0x001d43e0 + 0x00001000),
379 (0x001d43e0 + 0x00001800),
382 static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
383 struct amdgpu_irq_src *src,
385 enum amdgpu_interrupt_state state)
387 u32 bits, i, tmp, reg;
389 /* Devices newer then VEGA10/12 shall have these programming
390 sequences performed by PSP BL */
391 if (adev->asic_type >= CHIP_VEGA20)
397 case AMDGPU_IRQ_STATE_DISABLE:
398 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
399 reg = ecc_umc_mcumc_ctrl_addrs[i];
404 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
405 reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
411 case AMDGPU_IRQ_STATE_ENABLE:
412 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
413 reg = ecc_umc_mcumc_ctrl_addrs[i];
418 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
419 reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
432 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
433 struct amdgpu_irq_src *src,
435 enum amdgpu_interrupt_state state)
437 struct amdgpu_vmhub *hub;
438 u32 tmp, reg, bits, i, j;
440 bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
441 VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
442 VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
443 VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
444 VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
445 VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
446 VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
449 case AMDGPU_IRQ_STATE_DISABLE:
450 for (j = 0; j < adev->num_vmhubs; j++) {
451 hub = &adev->vmhub[j];
452 for (i = 0; i < 16; i++) {
453 reg = hub->vm_context0_cntl + i;
460 case AMDGPU_IRQ_STATE_ENABLE:
461 for (j = 0; j < adev->num_vmhubs; j++) {
462 hub = &adev->vmhub[j];
463 for (i = 0; i < 16; i++) {
464 reg = hub->vm_context0_cntl + i;
478 static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
479 struct amdgpu_irq_src *source,
480 struct amdgpu_iv_entry *entry)
482 bool retry_fault = !!(entry->src_data[1] & 0x80);
483 uint32_t status = 0, cid = 0, rw = 0;
484 struct amdgpu_task_info task_info;
485 struct amdgpu_vmhub *hub;
486 const char *mmhub_cid;
487 const char *hub_name;
490 addr = (u64)entry->src_data[0] << 12;
491 addr |= ((u64)entry->src_data[1] & 0xf) << 44;
494 /* Returning 1 here also prevents sending the IV to the KFD */
496 /* Process it onyl if it's the first fault for this address */
497 if (entry->ih != &adev->irq.ih_soft &&
498 amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
502 /* Delegate it to a different ring if the hardware hasn't
505 if (in_interrupt()) {
506 amdgpu_irq_delegate(adev, entry, 8);
510 /* Try to handle the recoverable page faults by filling page
513 if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
517 if (!printk_ratelimit())
520 if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
522 hub = &adev->vmhub[AMDGPU_MMHUB_0];
523 } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
525 hub = &adev->vmhub[AMDGPU_MMHUB_1];
527 hub_name = "gfxhub0";
528 hub = &adev->vmhub[AMDGPU_GFXHUB_0];
531 memset(&task_info, 0, sizeof(struct amdgpu_task_info));
532 amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
535 "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
536 "pasid:%u, for process %s pid %d thread %s pid %d)\n",
537 hub_name, retry_fault ? "retry" : "no-retry",
538 entry->src_id, entry->ring_id, entry->vmid,
539 entry->pasid, task_info.process_name, task_info.tgid,
540 task_info.task_name, task_info.pid);
541 dev_err(adev->dev, " in page starting at address 0x%012llx from client %d\n",
542 addr, entry->client_id);
544 if (amdgpu_sriov_vf(adev))
548 * Issue a dummy read to wait for the status register to
549 * be updated to avoid reading an incorrect value due to
550 * the new fast GRBM interface.
552 if (entry->vmid_src == AMDGPU_GFXHUB_0)
553 RREG32(hub->vm_l2_pro_fault_status);
555 status = RREG32(hub->vm_l2_pro_fault_status);
556 cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID);
557 rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
558 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
562 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
564 if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) {
565 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
566 cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
567 gfxhub_client_ids[cid],
570 switch (adev->asic_type) {
572 mmhub_cid = mmhub_client_ids_vega10[cid][rw];
575 mmhub_cid = mmhub_client_ids_vega12[cid][rw];
578 mmhub_cid = mmhub_client_ids_vega20[cid][rw];
581 mmhub_cid = mmhub_client_ids_arcturus[cid][rw];
584 mmhub_cid = mmhub_client_ids_raven[cid][rw];
587 mmhub_cid = mmhub_client_ids_renoir[cid][rw];
593 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
594 mmhub_cid ? mmhub_cid : "unknown", cid);
596 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
597 REG_GET_FIELD(status,
598 VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
599 dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
600 REG_GET_FIELD(status,
601 VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
602 dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
603 REG_GET_FIELD(status,
604 VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
605 dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
606 REG_GET_FIELD(status,
607 VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
608 dev_err(adev->dev, "\t RW: 0x%x\n", rw);
612 static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
613 .set = gmc_v9_0_vm_fault_interrupt_state,
614 .process = gmc_v9_0_process_interrupt,
618 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
619 .set = gmc_v9_0_ecc_interrupt_state,
620 .process = amdgpu_umc_process_ecc_irq,
623 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
625 adev->gmc.vm_fault.num_types = 1;
626 adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
628 if (!amdgpu_sriov_vf(adev)) {
629 adev->gmc.ecc_irq.num_types = 1;
630 adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
634 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
639 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
640 PER_VMID_INVALIDATE_REQ, 1 << vmid);
641 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
642 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
643 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
644 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
645 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
646 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
647 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
648 CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
654 * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
656 * @adev: amdgpu_device pointer
660 static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
663 return ((vmhub == AMDGPU_MMHUB_0 ||
664 vmhub == AMDGPU_MMHUB_1) &&
665 (!amdgpu_sriov_vf(adev)) &&
666 (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
667 (adev->apu_flags & AMD_APU_IS_PICASSO))));
670 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
671 uint8_t vmid, uint16_t *p_pasid)
675 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
677 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
679 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
684 * VMID 0 is the physical GPU addresses as used by the kernel.
685 * VMIDs 1-15 are used for userspace clients and are handled
686 * by the amdgpu vm/hsa code.
690 * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
692 * @adev: amdgpu_device pointer
693 * @vmid: vm instance to flush
694 * @vmhub: which hub to flush
695 * @flush_type: the flush type
697 * Flush the TLB for the requested page table using certain type.
699 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
700 uint32_t vmhub, uint32_t flush_type)
702 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
703 const unsigned eng = 17;
704 u32 j, inv_req, inv_req2, tmp;
705 struct amdgpu_vmhub *hub;
707 BUG_ON(vmhub >= adev->num_vmhubs);
709 hub = &adev->vmhub[vmhub];
710 if (adev->gmc.xgmi.num_physical_nodes &&
711 adev->asic_type == CHIP_VEGA20) {
712 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
713 * heavy-weight TLB flush (type 2), which flushes
714 * both. Due to a race condition with concurrent
715 * memory accesses using the same TLB cache line, we
716 * still need a second TLB flush after this.
718 inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
719 inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
721 inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
725 /* This is necessary for a HW workaround under SRIOV as well
726 * as GFXOFF under bare metal
728 if (adev->gfx.kiq.ring.sched.ready &&
729 (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
730 down_read_trylock(&adev->reset_sem)) {
731 uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
732 uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
734 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
736 up_read(&adev->reset_sem);
740 spin_lock(&adev->gmc.invalidate_lock);
743 * It may lose gpuvm invalidate acknowldege state across power-gating
744 * off cycle, add semaphore acquire before invalidation and semaphore
745 * release after invalidation to avoid entering power gated state
749 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
751 for (j = 0; j < adev->usec_timeout; j++) {
752 /* a read return value of 1 means semaphore acuqire */
753 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
754 hub->eng_distance * eng);
760 if (j >= adev->usec_timeout)
761 DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
765 WREG32_NO_KIQ(hub->vm_inv_eng0_req +
766 hub->eng_distance * eng, inv_req);
769 * Issue a dummy read to wait for the ACK register to
770 * be cleared to avoid a false ACK due to the new fast
773 if (vmhub == AMDGPU_GFXHUB_0)
774 RREG32_NO_KIQ(hub->vm_inv_eng0_req +
775 hub->eng_distance * eng);
777 for (j = 0; j < adev->usec_timeout; j++) {
778 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
779 hub->eng_distance * eng);
780 if (tmp & (1 << vmid))
789 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
792 * add semaphore release after invalidation,
793 * write with 0 means semaphore release
795 WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
796 hub->eng_distance * eng, 0);
798 spin_unlock(&adev->gmc.invalidate_lock);
800 if (j < adev->usec_timeout)
803 DRM_ERROR("Timeout waiting for VM flush ACK!\n");
807 * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
809 * @adev: amdgpu_device pointer
810 * @pasid: pasid to be flush
811 * @flush_type: the flush type
812 * @all_hub: flush all hubs
814 * Flush the TLB for the requested pasid.
816 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
817 uint16_t pasid, uint32_t flush_type,
823 uint16_t queried_pasid;
825 struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
826 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
828 if (amdgpu_in_reset(adev))
831 if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) {
832 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
833 * heavy-weight TLB flush (type 2), which flushes
834 * both. Due to a race condition with concurrent
835 * memory accesses using the same TLB cache line, we
836 * still need a second TLB flush after this.
838 bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
839 adev->asic_type == CHIP_VEGA20);
840 /* 2 dwords flush + 8 dwords fence */
841 unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
844 ndw += kiq->pmf->invalidate_tlbs_size;
846 spin_lock(&adev->gfx.kiq.ring_lock);
847 /* 2 dwords flush + 8 dwords fence */
848 amdgpu_ring_alloc(ring, ndw);
850 kiq->pmf->kiq_invalidate_tlbs(ring,
852 kiq->pmf->kiq_invalidate_tlbs(ring,
853 pasid, flush_type, all_hub);
854 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
856 amdgpu_ring_undo(ring);
857 spin_unlock(&adev->gfx.kiq.ring_lock);
858 up_read(&adev->reset_sem);
862 amdgpu_ring_commit(ring);
863 spin_unlock(&adev->gfx.kiq.ring_lock);
864 r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
866 dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
867 up_read(&adev->reset_sem);
870 up_read(&adev->reset_sem);
874 for (vmid = 1; vmid < 16; vmid++) {
876 ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
878 if (ret && queried_pasid == pasid) {
880 for (i = 0; i < adev->num_vmhubs; i++)
881 gmc_v9_0_flush_gpu_tlb(adev, vmid,
884 gmc_v9_0_flush_gpu_tlb(adev, vmid,
885 AMDGPU_GFXHUB_0, flush_type);
895 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
896 unsigned vmid, uint64_t pd_addr)
898 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
899 struct amdgpu_device *adev = ring->adev;
900 struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
901 uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
902 unsigned eng = ring->vm_inv_eng;
905 * It may lose gpuvm invalidate acknowldege state across power-gating
906 * off cycle, add semaphore acquire before invalidation and semaphore
907 * release after invalidation to avoid entering power gated state
911 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
913 /* a read return value of 1 means semaphore acuqire */
914 amdgpu_ring_emit_reg_wait(ring,
915 hub->vm_inv_eng0_sem +
916 hub->eng_distance * eng, 0x1, 0x1);
918 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
919 (hub->ctx_addr_distance * vmid),
920 lower_32_bits(pd_addr));
922 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
923 (hub->ctx_addr_distance * vmid),
924 upper_32_bits(pd_addr));
926 amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
927 hub->eng_distance * eng,
928 hub->vm_inv_eng0_ack +
929 hub->eng_distance * eng,
932 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
935 * add semaphore release after invalidation,
936 * write with 0 means semaphore release
938 amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
939 hub->eng_distance * eng, 0);
944 static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
947 struct amdgpu_device *adev = ring->adev;
950 /* Do nothing because there's no lut register for mmhub1. */
951 if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
954 if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
955 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
957 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
959 amdgpu_ring_emit_wreg(ring, reg, pasid);
963 * PTE format on VEGA 10:
972 * 47:12 4k physical page base address
982 * PDE format on VEGA 10:
983 * 63:59 block fragment size
987 * 47:6 physical base address of PD or PTE
994 static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
998 case AMDGPU_VM_MTYPE_DEFAULT:
999 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1000 case AMDGPU_VM_MTYPE_NC:
1001 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1002 case AMDGPU_VM_MTYPE_WC:
1003 return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
1004 case AMDGPU_VM_MTYPE_RW:
1005 return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
1006 case AMDGPU_VM_MTYPE_CC:
1007 return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
1008 case AMDGPU_VM_MTYPE_UC:
1009 return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
1011 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1015 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
1016 uint64_t *addr, uint64_t *flags)
1018 if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
1019 *addr = adev->vm_manager.vram_base_offset + *addr -
1020 adev->gmc.vram_start;
1021 BUG_ON(*addr & 0xFFFF00000000003FULL);
1023 if (!adev->gmc.translate_further)
1026 if (level == AMDGPU_VM_PDB1) {
1027 /* Set the block fragment size */
1028 if (!(*flags & AMDGPU_PDE_PTE))
1029 *flags |= AMDGPU_PDE_BFS(0x9);
1031 } else if (level == AMDGPU_VM_PDB0) {
1032 if (*flags & AMDGPU_PDE_PTE)
1033 *flags &= ~AMDGPU_PDE_PTE;
1035 *flags |= AMDGPU_PTE_TF;
1039 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
1040 struct amdgpu_bo_va_mapping *mapping,
1043 *flags &= ~AMDGPU_PTE_EXECUTABLE;
1044 *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1046 *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
1047 *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
1049 if (mapping->flags & AMDGPU_PTE_PRT) {
1050 *flags |= AMDGPU_PTE_PRT;
1051 *flags &= ~AMDGPU_PTE_VALID;
1054 if (adev->asic_type == CHIP_ARCTURUS &&
1055 !(*flags & AMDGPU_PTE_SYSTEM) &&
1056 mapping->bo_va->is_xgmi)
1057 *flags |= AMDGPU_PTE_SNOOPED;
1060 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
1062 u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
1065 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
1066 size = AMDGPU_VBIOS_VGA_ALLOCATION;
1070 switch (adev->asic_type) {
1073 viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
1074 size = (REG_GET_FIELD(viewport,
1075 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
1076 REG_GET_FIELD(viewport,
1077 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
1084 viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
1085 size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
1086 REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
1095 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
1096 .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
1097 .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
1098 .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
1099 .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
1100 .map_mtype = gmc_v9_0_map_mtype,
1101 .get_vm_pde = gmc_v9_0_get_vm_pde,
1102 .get_vm_pte = gmc_v9_0_get_vm_pte,
1103 .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
1106 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
1108 adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
1111 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
1113 switch (adev->asic_type) {
1115 adev->umc.funcs = &umc_v6_0_funcs;
1118 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1119 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1120 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1121 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
1122 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1123 adev->umc.funcs = &umc_v6_1_funcs;
1126 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1127 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1128 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1129 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
1130 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1131 adev->umc.funcs = &umc_v6_1_funcs;
1138 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
1140 switch (adev->asic_type) {
1142 adev->mmhub.funcs = &mmhub_v9_4_funcs;
1145 adev->mmhub.funcs = &mmhub_v1_0_funcs;
1150 static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
1152 adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
1155 static int gmc_v9_0_early_init(void *handle)
1157 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1159 gmc_v9_0_set_gmc_funcs(adev);
1160 gmc_v9_0_set_irq_funcs(adev);
1161 gmc_v9_0_set_umc_funcs(adev);
1162 gmc_v9_0_set_mmhub_funcs(adev);
1163 gmc_v9_0_set_gfxhub_funcs(adev);
1165 adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
1166 adev->gmc.shared_aperture_end =
1167 adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
1168 adev->gmc.private_aperture_start = 0x1000000000000000ULL;
1169 adev->gmc.private_aperture_end =
1170 adev->gmc.private_aperture_start + (4ULL << 30) - 1;
1175 static int gmc_v9_0_late_init(void *handle)
1177 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1180 r = amdgpu_gmc_allocate_vm_inv_eng(adev);
1185 * Workaround performance drop issue with VBIOS enables partial
1186 * writes, while disables HBM ECC for vega10.
1188 if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
1189 if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) {
1190 if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
1191 adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
1195 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
1196 adev->mmhub.funcs->reset_ras_error_count(adev);
1198 r = amdgpu_gmc_ras_late_init(adev);
1202 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
1205 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
1206 struct amdgpu_gmc *mc)
1210 if (!amdgpu_sriov_vf(adev))
1211 base = adev->mmhub.funcs->get_fb_location(adev);
1213 /* add the xgmi offset of the physical node */
1214 base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1215 amdgpu_gmc_vram_location(adev, mc, base);
1216 amdgpu_gmc_gart_location(adev, mc);
1217 amdgpu_gmc_agp_location(adev, mc);
1218 /* base offset of vram pages */
1219 adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1221 /* XXX: add the xgmi offset of the physical node? */
1222 adev->vm_manager.vram_base_offset +=
1223 adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1227 * gmc_v9_0_mc_init - initialize the memory controller driver params
1229 * @adev: amdgpu_device pointer
1231 * Look up the amount of vram, vram width, and decide how to place
1232 * vram and gart within the GPU's physical address space.
1233 * Returns 0 for success.
1235 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
1239 /* size in MB on si */
1240 adev->gmc.mc_vram_size =
1241 adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
1242 adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
1244 if (!(adev->flags & AMD_IS_APU)) {
1245 r = amdgpu_device_resize_fb_bar(adev);
1249 adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
1250 adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
1252 #ifdef CONFIG_X86_64
1253 if (adev->flags & AMD_IS_APU) {
1254 adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1255 adev->gmc.aper_size = adev->gmc.real_vram_size;
1258 /* In case the PCI BAR is larger than the actual amount of vram */
1259 adev->gmc.visible_vram_size = adev->gmc.aper_size;
1260 if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
1261 adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
1263 /* set the gart size */
1264 if (amdgpu_gart_size == -1) {
1265 switch (adev->asic_type) {
1266 case CHIP_VEGA10: /* all engines support GPUVM */
1267 case CHIP_VEGA12: /* all engines support GPUVM */
1271 adev->gmc.gart_size = 512ULL << 20;
1273 case CHIP_RAVEN: /* DCE SG support */
1275 adev->gmc.gart_size = 1024ULL << 20;
1279 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
1282 gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
1287 static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
1291 if (adev->gart.bo) {
1292 WARN(1, "VEGA10 PCIE GART already initialized\n");
1295 /* Initialize common gart structure */
1296 r = amdgpu_gart_init(adev);
1299 adev->gart.table_size = adev->gart.num_gpu_pages * 8;
1300 adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
1301 AMDGPU_PTE_EXECUTABLE;
1302 return amdgpu_gart_table_vram_alloc(adev);
1306 * gmc_v9_0_save_registers - saves regs
1308 * @adev: amdgpu_device pointer
1310 * This saves potential register values that should be
1311 * restored upon resume
1313 static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
1315 if (adev->asic_type == CHIP_RAVEN)
1316 adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
1319 static int gmc_v9_0_sw_init(void *handle)
1321 int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
1322 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1324 adev->gfxhub.funcs->init(adev);
1326 adev->mmhub.funcs->init(adev);
1328 spin_lock_init(&adev->gmc.invalidate_lock);
1330 r = amdgpu_atomfirmware_get_vram_info(adev,
1331 &vram_width, &vram_type, &vram_vendor);
1332 if (amdgpu_sriov_vf(adev))
1333 /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
1334 * and DF related registers is not readable, seems hardcord is the
1335 * only way to set the correct vram_width
1337 adev->gmc.vram_width = 2048;
1338 else if (amdgpu_emu_mode != 1)
1339 adev->gmc.vram_width = vram_width;
1341 if (!adev->gmc.vram_width) {
1342 int chansize, numchan;
1344 /* hbm memory channel size */
1345 if (adev->flags & AMD_IS_APU)
1350 numchan = adev->df.funcs->get_hbm_channel_number(adev);
1351 adev->gmc.vram_width = numchan * chansize;
1354 adev->gmc.vram_type = vram_type;
1355 adev->gmc.vram_vendor = vram_vendor;
1356 switch (adev->asic_type) {
1358 adev->num_vmhubs = 2;
1360 if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
1361 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1363 /* vm_size is 128TB + 512GB for legacy 3-level page support */
1364 amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
1365 adev->gmc.translate_further =
1366 adev->vm_manager.num_level > 1;
1373 adev->num_vmhubs = 2;
1377 * To fulfill 4-level page support,
1378 * vm size is 256TB (48bit), maximum size of Vega10,
1379 * block size 512 (9bit)
1381 /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
1382 if (amdgpu_sriov_vf(adev))
1383 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
1385 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1388 adev->num_vmhubs = 3;
1390 /* Keep the vm size same with Vega20 */
1391 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1397 /* This interrupt is VMC page fault.*/
1398 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
1399 &adev->gmc.vm_fault);
1403 if (adev->asic_type == CHIP_ARCTURUS) {
1404 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
1405 &adev->gmc.vm_fault);
1410 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
1411 &adev->gmc.vm_fault);
1416 if (!amdgpu_sriov_vf(adev)) {
1417 /* interrupt sent to DF. */
1418 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
1419 &adev->gmc.ecc_irq);
1424 /* Set the internal MC address mask
1425 * This is the max address of the GPU's
1426 * internal address space.
1428 adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
1430 r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
1432 printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
1435 adev->need_swiotlb = drm_need_swiotlb(44);
1437 if (adev->gmc.xgmi.supported) {
1438 r = adev->gfxhub.funcs->get_xgmi_info(adev);
1443 r = gmc_v9_0_mc_init(adev);
1447 amdgpu_gmc_get_vbios_allocations(adev);
1449 /* Memory manager */
1450 r = amdgpu_bo_init(adev);
1454 r = gmc_v9_0_gart_init(adev);
1460 * VMID 0 is reserved for System
1461 * amdgpu graphics/compute will use VMIDs 1..n-1
1462 * amdkfd will use VMIDs n..15
1464 * The first KFD VMID is 8 for GPUs with graphics, 3 for
1465 * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs
1466 * for video processing.
1468 adev->vm_manager.first_kfd_vmid =
1469 adev->asic_type == CHIP_ARCTURUS ? 3 : 8;
1471 amdgpu_vm_manager_init(adev);
1473 gmc_v9_0_save_registers(adev);
1478 static int gmc_v9_0_sw_fini(void *handle)
1480 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1482 amdgpu_gmc_ras_fini(adev);
1483 amdgpu_gem_force_release(adev);
1484 amdgpu_vm_manager_fini(adev);
1485 amdgpu_gart_table_vram_free(adev);
1486 amdgpu_bo_fini(adev);
1487 amdgpu_gart_fini(adev);
1492 static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
1495 switch (adev->asic_type) {
1497 if (amdgpu_sriov_vf(adev))
1501 soc15_program_register_sequence(adev,
1502 golden_settings_mmhub_1_0_0,
1503 ARRAY_SIZE(golden_settings_mmhub_1_0_0));
1504 soc15_program_register_sequence(adev,
1505 golden_settings_athub_1_0_0,
1506 ARRAY_SIZE(golden_settings_athub_1_0_0));
1511 /* TODO for renoir */
1512 soc15_program_register_sequence(adev,
1513 golden_settings_athub_1_0_0,
1514 ARRAY_SIZE(golden_settings_athub_1_0_0));
1522 * gmc_v9_0_restore_registers - restores regs
1524 * @adev: amdgpu_device pointer
1526 * This restores register values, saved at suspend.
1528 void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
1530 if (adev->asic_type == CHIP_RAVEN) {
1531 WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
1532 WARN_ON(adev->gmc.sdpif_register !=
1533 RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0));
1538 * gmc_v9_0_gart_enable - gart enable
1540 * @adev: amdgpu_device pointer
1542 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
1546 if (adev->gart.bo == NULL) {
1547 dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
1550 r = amdgpu_gart_table_vram_pin(adev);
1554 r = adev->gfxhub.funcs->gart_enable(adev);
1558 r = adev->mmhub.funcs->gart_enable(adev);
1562 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1563 (unsigned)(adev->gmc.gart_size >> 20),
1564 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
1565 adev->gart.ready = true;
1569 static int gmc_v9_0_hw_init(void *handle)
1571 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1576 /* The sequence of these two function calls matters.*/
1577 gmc_v9_0_init_golden_registers(adev);
1579 if (adev->mode_info.num_crtc) {
1580 /* Lockout access through VGA aperture*/
1581 WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
1582 /* disable VGA render */
1583 WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
1586 amdgpu_device_program_register_sequence(adev,
1587 golden_settings_vega10_hdp,
1588 ARRAY_SIZE(golden_settings_vega10_hdp));
1590 if (adev->mmhub.funcs->update_power_gating)
1591 adev->mmhub.funcs->update_power_gating(adev, true);
1593 switch (adev->asic_type) {
1595 WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
1601 WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
1603 tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
1604 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
1606 WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
1607 WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
1609 /* After HDP is initialized, flush HDP.*/
1610 adev->nbio.funcs->hdp_flush(adev, NULL);
1612 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
1617 if (!amdgpu_sriov_vf(adev)) {
1618 adev->gfxhub.funcs->set_fault_enable_default(adev, value);
1619 adev->mmhub.funcs->set_fault_enable_default(adev, value);
1621 for (i = 0; i < adev->num_vmhubs; ++i)
1622 gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
1624 if (adev->umc.funcs && adev->umc.funcs->init_registers)
1625 adev->umc.funcs->init_registers(adev);
1627 r = gmc_v9_0_gart_enable(adev);
1633 * gmc_v9_0_gart_disable - gart disable
1635 * @adev: amdgpu_device pointer
1637 * This disables all VM page table.
1639 static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
1641 adev->gfxhub.funcs->gart_disable(adev);
1642 adev->mmhub.funcs->gart_disable(adev);
1643 amdgpu_gart_table_vram_unpin(adev);
1646 static int gmc_v9_0_hw_fini(void *handle)
1648 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1650 if (amdgpu_sriov_vf(adev)) {
1651 /* full access mode, so don't touch any GMC register */
1652 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1656 amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
1657 amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1658 gmc_v9_0_gart_disable(adev);
1663 static int gmc_v9_0_suspend(void *handle)
1665 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1667 return gmc_v9_0_hw_fini(adev);
1670 static int gmc_v9_0_resume(void *handle)
1673 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1675 r = gmc_v9_0_hw_init(adev);
1679 amdgpu_vmid_reset_all(adev);
1684 static bool gmc_v9_0_is_idle(void *handle)
1686 /* MC is always ready in GMC v9.*/
1690 static int gmc_v9_0_wait_for_idle(void *handle)
1692 /* There is no need to wait for MC idle in GMC v9.*/
1696 static int gmc_v9_0_soft_reset(void *handle)
1698 /* XXX for emulation.*/
1702 static int gmc_v9_0_set_clockgating_state(void *handle,
1703 enum amd_clockgating_state state)
1705 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1707 adev->mmhub.funcs->set_clockgating(adev, state);
1709 athub_v1_0_set_clockgating(adev, state);
1714 static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
1716 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1718 adev->mmhub.funcs->get_clockgating(adev, flags);
1720 athub_v1_0_get_clockgating(adev, flags);
1723 static int gmc_v9_0_set_powergating_state(void *handle,
1724 enum amd_powergating_state state)
1729 const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
1731 .early_init = gmc_v9_0_early_init,
1732 .late_init = gmc_v9_0_late_init,
1733 .sw_init = gmc_v9_0_sw_init,
1734 .sw_fini = gmc_v9_0_sw_fini,
1735 .hw_init = gmc_v9_0_hw_init,
1736 .hw_fini = gmc_v9_0_hw_fini,
1737 .suspend = gmc_v9_0_suspend,
1738 .resume = gmc_v9_0_resume,
1739 .is_idle = gmc_v9_0_is_idle,
1740 .wait_for_idle = gmc_v9_0_wait_for_idle,
1741 .soft_reset = gmc_v9_0_soft_reset,
1742 .set_clockgating_state = gmc_v9_0_set_clockgating_state,
1743 .set_powergating_state = gmc_v9_0_set_powergating_state,
1744 .get_clockgating_state = gmc_v9_0_get_clockgating_state,
1747 const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
1749 .type = AMD_IP_BLOCK_TYPE_GMC,
1753 .funcs = &gmc_v9_0_ip_funcs,