2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/firmware.h>
25 #include <linux/pci.h>
27 #include <drm/drm_cache.h>
31 #include "amdgpu_atomfirmware.h"
32 #include "amdgpu_gem.h"
34 #include "hdp/hdp_4_0_offset.h"
35 #include "hdp/hdp_4_0_sh_mask.h"
36 #include "gc/gc_9_0_sh_mask.h"
37 #include "dce/dce_12_0_offset.h"
38 #include "dce/dce_12_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "athub/athub_1_0_sh_mask.h"
42 #include "athub/athub_1_0_offset.h"
43 #include "oss/osssys_4_0_offset.h"
47 #include "soc15_common.h"
48 #include "umc/umc_6_0_sh_mask.h"
50 #include "gfxhub_v1_0.h"
51 #include "mmhub_v1_0.h"
52 #include "athub_v1_0.h"
53 #include "gfxhub_v1_1.h"
54 #include "mmhub_v9_4.h"
58 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
60 #include "amdgpu_ras.h"
61 #include "amdgpu_xgmi.h"
63 /* add these here since we already include dce12 headers and these are for DCN */
64 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
65 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
66 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0
67 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10
68 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
69 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
70 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d
71 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
74 static const char *gfxhub_client_ids[] = {
90 static const char *mmhub_client_ids_raven[][2] = {
115 static const char *mmhub_client_ids_renoir[][2] = {
143 static const char *mmhub_client_ids_vega10[][2] = {
156 [32+14][0] = "SDMA0",
169 [32+4][1] = "DCEDWB",
172 [32+14][1] = "SDMA1",
175 static const char *mmhub_client_ids_vega12[][2] = {
188 [32+15][0] = "SDMA0",
198 [32+1][1] = "DCEDWB",
204 [32+15][1] = "SDMA1",
207 static const char *mmhub_client_ids_vega20[][2] = {
221 [32+12][0] = "UTCL2",
222 [32+14][0] = "SDMA1",
240 [32+14][1] = "SDMA1",
243 static const char *mmhub_client_ids_arcturus[][2] = {
250 [32+15][0] = "SDMA1",
251 [64+15][0] = "SDMA2",
252 [96+15][0] = "SDMA3",
253 [128+15][0] = "SDMA4",
254 [160+11][0] = "JPEG",
256 [160+13][0] = "VCNU",
257 [160+15][0] = "SDMA5",
258 [192+10][0] = "UTCL2",
259 [192+11][0] = "JPEG1",
260 [192+12][0] = "VCN1",
261 [192+13][0] = "VCN1U",
262 [192+15][0] = "SDMA6",
263 [224+15][0] = "SDMA7",
271 [32+15][1] = "SDMA1",
272 [64+15][1] = "SDMA2",
273 [96+15][1] = "SDMA3",
274 [128+15][1] = "SDMA4",
275 [160+11][1] = "JPEG",
277 [160+13][1] = "VCNU",
278 [160+15][1] = "SDMA5",
279 [192+11][1] = "JPEG1",
280 [192+12][1] = "VCN1",
281 [192+13][1] = "VCN1U",
282 [192+15][1] = "SDMA6",
283 [224+15][1] = "SDMA7",
286 static const u32 golden_settings_vega10_hdp[] =
288 0xf64, 0x0fffffff, 0x00000000,
289 0xf65, 0x0fffffff, 0x00000000,
290 0xf66, 0x0fffffff, 0x00000000,
291 0xf67, 0x0fffffff, 0x00000000,
292 0xf68, 0x0fffffff, 0x00000000,
293 0xf6a, 0x0fffffff, 0x00000000,
294 0xf6b, 0x0fffffff, 0x00000000,
295 0xf6c, 0x0fffffff, 0x00000000,
296 0xf6d, 0x0fffffff, 0x00000000,
297 0xf6e, 0x0fffffff, 0x00000000,
300 static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
302 SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
303 SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
306 static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
308 SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
309 SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
312 static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = {
313 (0x000143c0 + 0x00000000),
314 (0x000143c0 + 0x00000800),
315 (0x000143c0 + 0x00001000),
316 (0x000143c0 + 0x00001800),
317 (0x000543c0 + 0x00000000),
318 (0x000543c0 + 0x00000800),
319 (0x000543c0 + 0x00001000),
320 (0x000543c0 + 0x00001800),
321 (0x000943c0 + 0x00000000),
322 (0x000943c0 + 0x00000800),
323 (0x000943c0 + 0x00001000),
324 (0x000943c0 + 0x00001800),
325 (0x000d43c0 + 0x00000000),
326 (0x000d43c0 + 0x00000800),
327 (0x000d43c0 + 0x00001000),
328 (0x000d43c0 + 0x00001800),
329 (0x001143c0 + 0x00000000),
330 (0x001143c0 + 0x00000800),
331 (0x001143c0 + 0x00001000),
332 (0x001143c0 + 0x00001800),
333 (0x001543c0 + 0x00000000),
334 (0x001543c0 + 0x00000800),
335 (0x001543c0 + 0x00001000),
336 (0x001543c0 + 0x00001800),
337 (0x001943c0 + 0x00000000),
338 (0x001943c0 + 0x00000800),
339 (0x001943c0 + 0x00001000),
340 (0x001943c0 + 0x00001800),
341 (0x001d43c0 + 0x00000000),
342 (0x001d43c0 + 0x00000800),
343 (0x001d43c0 + 0x00001000),
344 (0x001d43c0 + 0x00001800),
347 static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
348 (0x000143e0 + 0x00000000),
349 (0x000143e0 + 0x00000800),
350 (0x000143e0 + 0x00001000),
351 (0x000143e0 + 0x00001800),
352 (0x000543e0 + 0x00000000),
353 (0x000543e0 + 0x00000800),
354 (0x000543e0 + 0x00001000),
355 (0x000543e0 + 0x00001800),
356 (0x000943e0 + 0x00000000),
357 (0x000943e0 + 0x00000800),
358 (0x000943e0 + 0x00001000),
359 (0x000943e0 + 0x00001800),
360 (0x000d43e0 + 0x00000000),
361 (0x000d43e0 + 0x00000800),
362 (0x000d43e0 + 0x00001000),
363 (0x000d43e0 + 0x00001800),
364 (0x001143e0 + 0x00000000),
365 (0x001143e0 + 0x00000800),
366 (0x001143e0 + 0x00001000),
367 (0x001143e0 + 0x00001800),
368 (0x001543e0 + 0x00000000),
369 (0x001543e0 + 0x00000800),
370 (0x001543e0 + 0x00001000),
371 (0x001543e0 + 0x00001800),
372 (0x001943e0 + 0x00000000),
373 (0x001943e0 + 0x00000800),
374 (0x001943e0 + 0x00001000),
375 (0x001943e0 + 0x00001800),
376 (0x001d43e0 + 0x00000000),
377 (0x001d43e0 + 0x00000800),
378 (0x001d43e0 + 0x00001000),
379 (0x001d43e0 + 0x00001800),
382 static const uint32_t ecc_umc_mcumc_status_addrs[] = {
383 (0x000143c2 + 0x00000000),
384 (0x000143c2 + 0x00000800),
385 (0x000143c2 + 0x00001000),
386 (0x000143c2 + 0x00001800),
387 (0x000543c2 + 0x00000000),
388 (0x000543c2 + 0x00000800),
389 (0x000543c2 + 0x00001000),
390 (0x000543c2 + 0x00001800),
391 (0x000943c2 + 0x00000000),
392 (0x000943c2 + 0x00000800),
393 (0x000943c2 + 0x00001000),
394 (0x000943c2 + 0x00001800),
395 (0x000d43c2 + 0x00000000),
396 (0x000d43c2 + 0x00000800),
397 (0x000d43c2 + 0x00001000),
398 (0x000d43c2 + 0x00001800),
399 (0x001143c2 + 0x00000000),
400 (0x001143c2 + 0x00000800),
401 (0x001143c2 + 0x00001000),
402 (0x001143c2 + 0x00001800),
403 (0x001543c2 + 0x00000000),
404 (0x001543c2 + 0x00000800),
405 (0x001543c2 + 0x00001000),
406 (0x001543c2 + 0x00001800),
407 (0x001943c2 + 0x00000000),
408 (0x001943c2 + 0x00000800),
409 (0x001943c2 + 0x00001000),
410 (0x001943c2 + 0x00001800),
411 (0x001d43c2 + 0x00000000),
412 (0x001d43c2 + 0x00000800),
413 (0x001d43c2 + 0x00001000),
414 (0x001d43c2 + 0x00001800),
417 static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
418 struct amdgpu_irq_src *src,
420 enum amdgpu_interrupt_state state)
422 u32 bits, i, tmp, reg;
424 /* Devices newer then VEGA10/12 shall have these programming
425 sequences performed by PSP BL */
426 if (adev->asic_type >= CHIP_VEGA20)
432 case AMDGPU_IRQ_STATE_DISABLE:
433 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
434 reg = ecc_umc_mcumc_ctrl_addrs[i];
439 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
440 reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
446 case AMDGPU_IRQ_STATE_ENABLE:
447 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
448 reg = ecc_umc_mcumc_ctrl_addrs[i];
453 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
454 reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
467 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
468 struct amdgpu_irq_src *src,
470 enum amdgpu_interrupt_state state)
472 struct amdgpu_vmhub *hub;
473 u32 tmp, reg, bits, i, j;
475 bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
476 VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
477 VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
478 VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
479 VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
480 VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
481 VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
484 case AMDGPU_IRQ_STATE_DISABLE:
485 for (j = 0; j < adev->num_vmhubs; j++) {
486 hub = &adev->vmhub[j];
487 for (i = 0; i < 16; i++) {
488 reg = hub->vm_context0_cntl + i;
495 case AMDGPU_IRQ_STATE_ENABLE:
496 for (j = 0; j < adev->num_vmhubs; j++) {
497 hub = &adev->vmhub[j];
498 for (i = 0; i < 16; i++) {
499 reg = hub->vm_context0_cntl + i;
512 static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
513 struct amdgpu_irq_src *source,
514 struct amdgpu_iv_entry *entry)
516 bool retry_fault = !!(entry->src_data[1] & 0x80);
517 uint32_t status = 0, cid = 0, rw = 0;
518 struct amdgpu_task_info task_info;
519 struct amdgpu_vmhub *hub;
520 const char *mmhub_cid;
521 const char *hub_name;
524 addr = (u64)entry->src_data[0] << 12;
525 addr |= ((u64)entry->src_data[1] & 0xf) << 44;
527 if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
529 return 1; /* This also prevents sending it to KFD */
531 /* If it's the first fault for this address, process it normally */
532 if (retry_fault && !in_interrupt() &&
533 amdgpu_vm_handle_fault(adev, entry->pasid, addr))
534 return 1; /* This also prevents sending it to KFD */
536 if (!printk_ratelimit())
539 if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
541 hub = &adev->vmhub[AMDGPU_MMHUB_0];
542 } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
544 hub = &adev->vmhub[AMDGPU_MMHUB_1];
546 hub_name = "gfxhub0";
547 hub = &adev->vmhub[AMDGPU_GFXHUB_0];
550 memset(&task_info, 0, sizeof(struct amdgpu_task_info));
551 amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
554 "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
555 "pasid:%u, for process %s pid %d thread %s pid %d)\n",
556 hub_name, retry_fault ? "retry" : "no-retry",
557 entry->src_id, entry->ring_id, entry->vmid,
558 entry->pasid, task_info.process_name, task_info.tgid,
559 task_info.task_name, task_info.pid);
560 dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
561 addr, entry->client_id);
563 if (amdgpu_sriov_vf(adev))
567 * Issue a dummy read to wait for the status register to
568 * be updated to avoid reading an incorrect value due to
569 * the new fast GRBM interface.
571 if (entry->vmid_src == AMDGPU_GFXHUB_0)
572 RREG32(hub->vm_l2_pro_fault_status);
574 status = RREG32(hub->vm_l2_pro_fault_status);
575 cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID);
576 rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
577 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
581 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
583 if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) {
584 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
585 cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
586 gfxhub_client_ids[cid],
589 switch (adev->asic_type) {
591 mmhub_cid = mmhub_client_ids_vega10[cid][rw];
594 mmhub_cid = mmhub_client_ids_vega12[cid][rw];
597 mmhub_cid = mmhub_client_ids_vega20[cid][rw];
600 mmhub_cid = mmhub_client_ids_arcturus[cid][rw];
603 mmhub_cid = mmhub_client_ids_raven[cid][rw];
606 mmhub_cid = mmhub_client_ids_renoir[cid][rw];
612 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
613 mmhub_cid ? mmhub_cid : "unknown", cid);
615 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
616 REG_GET_FIELD(status,
617 VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
618 dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
619 REG_GET_FIELD(status,
620 VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
621 dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
622 REG_GET_FIELD(status,
623 VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
624 dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
625 REG_GET_FIELD(status,
626 VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
627 dev_err(adev->dev, "\t RW: 0x%x\n", rw);
631 static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
632 .set = gmc_v9_0_vm_fault_interrupt_state,
633 .process = gmc_v9_0_process_interrupt,
637 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
638 .set = gmc_v9_0_ecc_interrupt_state,
639 .process = amdgpu_umc_process_ecc_irq,
642 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
644 adev->gmc.vm_fault.num_types = 1;
645 adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
647 if (!amdgpu_sriov_vf(adev)) {
648 adev->gmc.ecc_irq.num_types = 1;
649 adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
653 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
658 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
659 PER_VMID_INVALIDATE_REQ, 1 << vmid);
660 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
661 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
662 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
663 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
664 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
665 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
666 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
667 CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
673 * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
675 * @adev: amdgpu_device pointer
679 static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
682 return ((vmhub == AMDGPU_MMHUB_0 ||
683 vmhub == AMDGPU_MMHUB_1) &&
684 (!amdgpu_sriov_vf(adev)) &&
685 (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
686 (adev->apu_flags & AMD_APU_IS_PICASSO))));
689 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
690 uint8_t vmid, uint16_t *p_pasid)
694 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
696 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
698 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
703 * VMID 0 is the physical GPU addresses as used by the kernel.
704 * VMIDs 1-15 are used for userspace clients and are handled
705 * by the amdgpu vm/hsa code.
709 * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
711 * @adev: amdgpu_device pointer
712 * @vmid: vm instance to flush
713 * @flush_type: the flush type
715 * Flush the TLB for the requested page table using certain type.
717 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
718 uint32_t vmhub, uint32_t flush_type)
720 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
721 const unsigned eng = 17;
722 u32 j, inv_req, inv_req2, tmp;
723 struct amdgpu_vmhub *hub;
725 BUG_ON(vmhub >= adev->num_vmhubs);
727 hub = &adev->vmhub[vmhub];
728 if (adev->gmc.xgmi.num_physical_nodes &&
729 adev->asic_type == CHIP_VEGA20) {
730 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
731 * heavy-weight TLB flush (type 2), which flushes
732 * both. Due to a race condition with concurrent
733 * memory accesses using the same TLB cache line, we
734 * still need a second TLB flush after this.
736 inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
737 inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
739 inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
743 /* This is necessary for a HW workaround under SRIOV as well
744 * as GFXOFF under bare metal
746 if (adev->gfx.kiq.ring.sched.ready &&
747 (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
748 down_read_trylock(&adev->reset_sem)) {
749 uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
750 uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
752 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
754 up_read(&adev->reset_sem);
758 spin_lock(&adev->gmc.invalidate_lock);
761 * It may lose gpuvm invalidate acknowldege state across power-gating
762 * off cycle, add semaphore acquire before invalidation and semaphore
763 * release after invalidation to avoid entering power gated state
767 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
769 for (j = 0; j < adev->usec_timeout; j++) {
770 /* a read return value of 1 means semaphore acuqire */
771 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
772 hub->eng_distance * eng);
778 if (j >= adev->usec_timeout)
779 DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
783 WREG32_NO_KIQ(hub->vm_inv_eng0_req +
784 hub->eng_distance * eng, inv_req);
787 * Issue a dummy read to wait for the ACK register to
788 * be cleared to avoid a false ACK due to the new fast
791 if (vmhub == AMDGPU_GFXHUB_0)
792 RREG32_NO_KIQ(hub->vm_inv_eng0_req +
793 hub->eng_distance * eng);
795 for (j = 0; j < adev->usec_timeout; j++) {
796 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
797 hub->eng_distance * eng);
798 if (tmp & (1 << vmid))
807 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
810 * add semaphore release after invalidation,
811 * write with 0 means semaphore release
813 WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
814 hub->eng_distance * eng, 0);
816 spin_unlock(&adev->gmc.invalidate_lock);
818 if (j < adev->usec_timeout)
821 DRM_ERROR("Timeout waiting for VM flush ACK!\n");
825 * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
827 * @adev: amdgpu_device pointer
828 * @pasid: pasid to be flush
830 * Flush the TLB for the requested pasid.
832 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
833 uint16_t pasid, uint32_t flush_type,
839 uint16_t queried_pasid;
841 struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
842 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
844 if (amdgpu_in_reset(adev))
847 if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) {
848 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
849 * heavy-weight TLB flush (type 2), which flushes
850 * both. Due to a race condition with concurrent
851 * memory accesses using the same TLB cache line, we
852 * still need a second TLB flush after this.
854 bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
855 adev->asic_type == CHIP_VEGA20);
856 /* 2 dwords flush + 8 dwords fence */
857 unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
860 ndw += kiq->pmf->invalidate_tlbs_size;
862 spin_lock(&adev->gfx.kiq.ring_lock);
863 /* 2 dwords flush + 8 dwords fence */
864 amdgpu_ring_alloc(ring, ndw);
866 kiq->pmf->kiq_invalidate_tlbs(ring,
868 kiq->pmf->kiq_invalidate_tlbs(ring,
869 pasid, flush_type, all_hub);
870 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
872 amdgpu_ring_undo(ring);
873 spin_unlock(&adev->gfx.kiq.ring_lock);
874 up_read(&adev->reset_sem);
878 amdgpu_ring_commit(ring);
879 spin_unlock(&adev->gfx.kiq.ring_lock);
880 r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
882 dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
883 up_read(&adev->reset_sem);
886 up_read(&adev->reset_sem);
890 for (vmid = 1; vmid < 16; vmid++) {
892 ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
894 if (ret && queried_pasid == pasid) {
896 for (i = 0; i < adev->num_vmhubs; i++)
897 gmc_v9_0_flush_gpu_tlb(adev, vmid,
900 gmc_v9_0_flush_gpu_tlb(adev, vmid,
901 AMDGPU_GFXHUB_0, flush_type);
911 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
912 unsigned vmid, uint64_t pd_addr)
914 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
915 struct amdgpu_device *adev = ring->adev;
916 struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
917 uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
918 unsigned eng = ring->vm_inv_eng;
921 * It may lose gpuvm invalidate acknowldege state across power-gating
922 * off cycle, add semaphore acquire before invalidation and semaphore
923 * release after invalidation to avoid entering power gated state
927 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
929 /* a read return value of 1 means semaphore acuqire */
930 amdgpu_ring_emit_reg_wait(ring,
931 hub->vm_inv_eng0_sem +
932 hub->eng_distance * eng, 0x1, 0x1);
934 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
935 (hub->ctx_addr_distance * vmid),
936 lower_32_bits(pd_addr));
938 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
939 (hub->ctx_addr_distance * vmid),
940 upper_32_bits(pd_addr));
942 amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
943 hub->eng_distance * eng,
944 hub->vm_inv_eng0_ack +
945 hub->eng_distance * eng,
948 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
951 * add semaphore release after invalidation,
952 * write with 0 means semaphore release
954 amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
955 hub->eng_distance * eng, 0);
960 static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
963 struct amdgpu_device *adev = ring->adev;
966 /* Do nothing because there's no lut register for mmhub1. */
967 if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
970 if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
971 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
973 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
975 amdgpu_ring_emit_wreg(ring, reg, pasid);
979 * PTE format on VEGA 10:
988 * 47:12 4k physical page base address
998 * PDE format on VEGA 10:
999 * 63:59 block fragment size
1003 * 47:6 physical base address of PD or PTE
1010 static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
1014 case AMDGPU_VM_MTYPE_DEFAULT:
1015 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1016 case AMDGPU_VM_MTYPE_NC:
1017 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1018 case AMDGPU_VM_MTYPE_WC:
1019 return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
1020 case AMDGPU_VM_MTYPE_RW:
1021 return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
1022 case AMDGPU_VM_MTYPE_CC:
1023 return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
1024 case AMDGPU_VM_MTYPE_UC:
1025 return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
1027 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1031 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
1032 uint64_t *addr, uint64_t *flags)
1034 if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
1035 *addr = adev->vm_manager.vram_base_offset + *addr -
1036 adev->gmc.vram_start;
1037 BUG_ON(*addr & 0xFFFF00000000003FULL);
1039 if (!adev->gmc.translate_further)
1042 if (level == AMDGPU_VM_PDB1) {
1043 /* Set the block fragment size */
1044 if (!(*flags & AMDGPU_PDE_PTE))
1045 *flags |= AMDGPU_PDE_BFS(0x9);
1047 } else if (level == AMDGPU_VM_PDB0) {
1048 if (*flags & AMDGPU_PDE_PTE)
1049 *flags &= ~AMDGPU_PDE_PTE;
1051 *flags |= AMDGPU_PTE_TF;
1055 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
1056 struct amdgpu_bo_va_mapping *mapping,
1059 *flags &= ~AMDGPU_PTE_EXECUTABLE;
1060 *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1062 *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
1063 *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
1065 if (mapping->flags & AMDGPU_PTE_PRT) {
1066 *flags |= AMDGPU_PTE_PRT;
1067 *flags &= ~AMDGPU_PTE_VALID;
1070 if (adev->asic_type == CHIP_ARCTURUS &&
1071 !(*flags & AMDGPU_PTE_SYSTEM) &&
1072 mapping->bo_va->is_xgmi)
1073 *flags |= AMDGPU_PTE_SNOOPED;
1076 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
1078 u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
1081 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
1082 size = AMDGPU_VBIOS_VGA_ALLOCATION;
1086 switch (adev->asic_type) {
1089 viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
1090 size = (REG_GET_FIELD(viewport,
1091 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
1092 REG_GET_FIELD(viewport,
1093 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
1100 viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
1101 size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
1102 REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
1111 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
1112 .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
1113 .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
1114 .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
1115 .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
1116 .map_mtype = gmc_v9_0_map_mtype,
1117 .get_vm_pde = gmc_v9_0_get_vm_pde,
1118 .get_vm_pte = gmc_v9_0_get_vm_pte,
1119 .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
1122 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
1124 adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
1127 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
1129 switch (adev->asic_type) {
1131 adev->umc.funcs = &umc_v6_0_funcs;
1134 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1135 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1136 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1137 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
1138 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1139 adev->umc.funcs = &umc_v6_1_funcs;
1142 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1143 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1144 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1145 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
1146 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1147 adev->umc.funcs = &umc_v6_1_funcs;
1154 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
1156 switch (adev->asic_type) {
1158 adev->mmhub.funcs = &mmhub_v9_4_funcs;
1161 adev->mmhub.funcs = &mmhub_v1_0_funcs;
1166 static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
1168 adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
1171 static int gmc_v9_0_early_init(void *handle)
1173 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1175 gmc_v9_0_set_gmc_funcs(adev);
1176 gmc_v9_0_set_irq_funcs(adev);
1177 gmc_v9_0_set_umc_funcs(adev);
1178 gmc_v9_0_set_mmhub_funcs(adev);
1179 gmc_v9_0_set_gfxhub_funcs(adev);
1181 adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
1182 adev->gmc.shared_aperture_end =
1183 adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
1184 adev->gmc.private_aperture_start = 0x1000000000000000ULL;
1185 adev->gmc.private_aperture_end =
1186 adev->gmc.private_aperture_start + (4ULL << 30) - 1;
1191 static int gmc_v9_0_late_init(void *handle)
1193 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1196 amdgpu_bo_late_init(adev);
1198 r = amdgpu_gmc_allocate_vm_inv_eng(adev);
1203 * Workaround performance drop issue with VBIOS enables partial
1204 * writes, while disables HBM ECC for vega10.
1206 if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
1207 if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) {
1208 if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
1209 adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
1213 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
1214 adev->mmhub.funcs->reset_ras_error_count(adev);
1216 r = amdgpu_gmc_ras_late_init(adev);
1220 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
1223 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
1224 struct amdgpu_gmc *mc)
1228 if (!amdgpu_sriov_vf(adev))
1229 base = adev->mmhub.funcs->get_fb_location(adev);
1231 /* add the xgmi offset of the physical node */
1232 base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1233 amdgpu_gmc_vram_location(adev, mc, base);
1234 amdgpu_gmc_gart_location(adev, mc);
1235 amdgpu_gmc_agp_location(adev, mc);
1236 /* base offset of vram pages */
1237 adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1239 /* XXX: add the xgmi offset of the physical node? */
1240 adev->vm_manager.vram_base_offset +=
1241 adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1245 * gmc_v9_0_mc_init - initialize the memory controller driver params
1247 * @adev: amdgpu_device pointer
1249 * Look up the amount of vram, vram width, and decide how to place
1250 * vram and gart within the GPU's physical address space.
1251 * Returns 0 for success.
1253 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
1257 /* size in MB on si */
1258 adev->gmc.mc_vram_size =
1259 adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
1260 adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
1262 if (!(adev->flags & AMD_IS_APU)) {
1263 r = amdgpu_device_resize_fb_bar(adev);
1267 adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
1268 adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
1270 #ifdef CONFIG_X86_64
1271 if (adev->flags & AMD_IS_APU) {
1272 adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1273 adev->gmc.aper_size = adev->gmc.real_vram_size;
1276 /* In case the PCI BAR is larger than the actual amount of vram */
1277 adev->gmc.visible_vram_size = adev->gmc.aper_size;
1278 if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
1279 adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
1281 /* set the gart size */
1282 if (amdgpu_gart_size == -1) {
1283 switch (adev->asic_type) {
1284 case CHIP_VEGA10: /* all engines support GPUVM */
1285 case CHIP_VEGA12: /* all engines support GPUVM */
1289 adev->gmc.gart_size = 512ULL << 20;
1291 case CHIP_RAVEN: /* DCE SG support */
1293 adev->gmc.gart_size = 1024ULL << 20;
1297 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
1300 gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
1305 static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
1309 if (adev->gart.bo) {
1310 WARN(1, "VEGA10 PCIE GART already initialized\n");
1313 /* Initialize common gart structure */
1314 r = amdgpu_gart_init(adev);
1317 adev->gart.table_size = adev->gart.num_gpu_pages * 8;
1318 adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
1319 AMDGPU_PTE_EXECUTABLE;
1320 return amdgpu_gart_table_vram_alloc(adev);
1324 * gmc_v9_0_save_registers - saves regs
1326 * @adev: amdgpu_device pointer
1328 * This saves potential register values that should be
1329 * restored upon resume
1331 static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
1333 if (adev->asic_type == CHIP_RAVEN)
1334 adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
1337 static int gmc_v9_0_sw_init(void *handle)
1339 int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
1340 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1342 adev->gfxhub.funcs->init(adev);
1344 adev->mmhub.funcs->init(adev);
1346 spin_lock_init(&adev->gmc.invalidate_lock);
1348 r = amdgpu_atomfirmware_get_vram_info(adev,
1349 &vram_width, &vram_type, &vram_vendor);
1350 if (amdgpu_sriov_vf(adev))
1351 /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
1352 * and DF related registers is not readable, seems hardcord is the
1353 * only way to set the correct vram_width
1355 adev->gmc.vram_width = 2048;
1356 else if (amdgpu_emu_mode != 1)
1357 adev->gmc.vram_width = vram_width;
1359 if (!adev->gmc.vram_width) {
1360 int chansize, numchan;
1362 /* hbm memory channel size */
1363 if (adev->flags & AMD_IS_APU)
1368 numchan = adev->df.funcs->get_hbm_channel_number(adev);
1369 adev->gmc.vram_width = numchan * chansize;
1372 adev->gmc.vram_type = vram_type;
1373 adev->gmc.vram_vendor = vram_vendor;
1374 switch (adev->asic_type) {
1376 adev->num_vmhubs = 2;
1378 if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
1379 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1381 /* vm_size is 128TB + 512GB for legacy 3-level page support */
1382 amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
1383 adev->gmc.translate_further =
1384 adev->vm_manager.num_level > 1;
1391 adev->num_vmhubs = 2;
1395 * To fulfill 4-level page support,
1396 * vm size is 256TB (48bit), maximum size of Vega10,
1397 * block size 512 (9bit)
1399 /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
1400 if (amdgpu_sriov_vf(adev))
1401 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
1403 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1406 adev->num_vmhubs = 3;
1408 /* Keep the vm size same with Vega20 */
1409 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1415 /* This interrupt is VMC page fault.*/
1416 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
1417 &adev->gmc.vm_fault);
1421 if (adev->asic_type == CHIP_ARCTURUS) {
1422 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
1423 &adev->gmc.vm_fault);
1428 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
1429 &adev->gmc.vm_fault);
1434 if (!amdgpu_sriov_vf(adev)) {
1435 /* interrupt sent to DF. */
1436 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
1437 &adev->gmc.ecc_irq);
1442 /* Set the internal MC address mask
1443 * This is the max address of the GPU's
1444 * internal address space.
1446 adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
1448 r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
1450 printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
1453 adev->need_swiotlb = drm_need_swiotlb(44);
1455 if (adev->gmc.xgmi.supported) {
1456 r = adev->gfxhub.funcs->get_xgmi_info(adev);
1461 r = gmc_v9_0_mc_init(adev);
1465 amdgpu_gmc_get_vbios_allocations(adev);
1467 /* Memory manager */
1468 r = amdgpu_bo_init(adev);
1472 r = gmc_v9_0_gart_init(adev);
1478 * VMID 0 is reserved for System
1479 * amdgpu graphics/compute will use VMIDs 1..n-1
1480 * amdkfd will use VMIDs n..15
1482 * The first KFD VMID is 8 for GPUs with graphics, 3 for
1483 * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs
1484 * for video processing.
1486 adev->vm_manager.first_kfd_vmid =
1487 adev->asic_type == CHIP_ARCTURUS ? 3 : 8;
1489 amdgpu_vm_manager_init(adev);
1491 gmc_v9_0_save_registers(adev);
1496 static int gmc_v9_0_sw_fini(void *handle)
1498 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1500 amdgpu_gmc_ras_fini(adev);
1501 amdgpu_gem_force_release(adev);
1502 amdgpu_vm_manager_fini(adev);
1503 amdgpu_gart_table_vram_free(adev);
1504 amdgpu_bo_fini(adev);
1505 amdgpu_gart_fini(adev);
1510 static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
1513 switch (adev->asic_type) {
1515 if (amdgpu_sriov_vf(adev))
1519 soc15_program_register_sequence(adev,
1520 golden_settings_mmhub_1_0_0,
1521 ARRAY_SIZE(golden_settings_mmhub_1_0_0));
1522 soc15_program_register_sequence(adev,
1523 golden_settings_athub_1_0_0,
1524 ARRAY_SIZE(golden_settings_athub_1_0_0));
1529 /* TODO for renoir */
1530 soc15_program_register_sequence(adev,
1531 golden_settings_athub_1_0_0,
1532 ARRAY_SIZE(golden_settings_athub_1_0_0));
1540 * gmc_v9_0_restore_registers - restores regs
1542 * @adev: amdgpu_device pointer
1544 * This restores register values, saved at suspend.
1546 void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
1548 if (adev->asic_type == CHIP_RAVEN) {
1549 WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
1550 WARN_ON(adev->gmc.sdpif_register !=
1551 RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0));
1556 * gmc_v9_0_gart_enable - gart enable
1558 * @adev: amdgpu_device pointer
1560 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
1564 if (adev->gart.bo == NULL) {
1565 dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
1568 r = amdgpu_gart_table_vram_pin(adev);
1572 r = adev->gfxhub.funcs->gart_enable(adev);
1576 r = adev->mmhub.funcs->gart_enable(adev);
1580 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1581 (unsigned)(adev->gmc.gart_size >> 20),
1582 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
1583 adev->gart.ready = true;
1587 static int gmc_v9_0_hw_init(void *handle)
1589 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1594 /* The sequence of these two function calls matters.*/
1595 gmc_v9_0_init_golden_registers(adev);
1597 if (adev->mode_info.num_crtc) {
1598 if (adev->asic_type != CHIP_ARCTURUS) {
1599 /* Lockout access through VGA aperture*/
1600 WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
1602 /* disable VGA render */
1603 WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
1607 amdgpu_device_program_register_sequence(adev,
1608 golden_settings_vega10_hdp,
1609 ARRAY_SIZE(golden_settings_vega10_hdp));
1611 if (adev->mmhub.funcs->update_power_gating)
1612 adev->mmhub.funcs->update_power_gating(adev, true);
1614 switch (adev->asic_type) {
1616 WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
1622 WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
1624 tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
1625 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
1627 WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
1628 WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
1630 /* After HDP is initialized, flush HDP.*/
1631 adev->nbio.funcs->hdp_flush(adev, NULL);
1633 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
1638 if (!amdgpu_sriov_vf(adev)) {
1639 adev->gfxhub.funcs->set_fault_enable_default(adev, value);
1640 adev->mmhub.funcs->set_fault_enable_default(adev, value);
1642 for (i = 0; i < adev->num_vmhubs; ++i)
1643 gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
1645 if (adev->umc.funcs && adev->umc.funcs->init_registers)
1646 adev->umc.funcs->init_registers(adev);
1648 r = gmc_v9_0_gart_enable(adev);
1654 * gmc_v9_0_gart_disable - gart disable
1656 * @adev: amdgpu_device pointer
1658 * This disables all VM page table.
1660 static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
1662 adev->gfxhub.funcs->gart_disable(adev);
1663 adev->mmhub.funcs->gart_disable(adev);
1664 amdgpu_gart_table_vram_unpin(adev);
1667 static int gmc_v9_0_hw_fini(void *handle)
1669 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1671 if (amdgpu_sriov_vf(adev)) {
1672 /* full access mode, so don't touch any GMC register */
1673 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1677 amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
1678 amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1679 gmc_v9_0_gart_disable(adev);
1684 static int gmc_v9_0_suspend(void *handle)
1686 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1688 return gmc_v9_0_hw_fini(adev);
1691 static int gmc_v9_0_resume(void *handle)
1694 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1696 r = gmc_v9_0_hw_init(adev);
1700 amdgpu_vmid_reset_all(adev);
1705 static bool gmc_v9_0_is_idle(void *handle)
1707 /* MC is always ready in GMC v9.*/
1711 static int gmc_v9_0_wait_for_idle(void *handle)
1713 /* There is no need to wait for MC idle in GMC v9.*/
1717 static int gmc_v9_0_soft_reset(void *handle)
1719 /* XXX for emulation.*/
1723 static int gmc_v9_0_set_clockgating_state(void *handle,
1724 enum amd_clockgating_state state)
1726 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1728 adev->mmhub.funcs->set_clockgating(adev, state);
1730 athub_v1_0_set_clockgating(adev, state);
1735 static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
1737 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1739 adev->mmhub.funcs->get_clockgating(adev, flags);
1741 athub_v1_0_get_clockgating(adev, flags);
1744 static int gmc_v9_0_set_powergating_state(void *handle,
1745 enum amd_powergating_state state)
1750 const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
1752 .early_init = gmc_v9_0_early_init,
1753 .late_init = gmc_v9_0_late_init,
1754 .sw_init = gmc_v9_0_sw_init,
1755 .sw_fini = gmc_v9_0_sw_fini,
1756 .hw_init = gmc_v9_0_hw_init,
1757 .hw_fini = gmc_v9_0_hw_fini,
1758 .suspend = gmc_v9_0_suspend,
1759 .resume = gmc_v9_0_resume,
1760 .is_idle = gmc_v9_0_is_idle,
1761 .wait_for_idle = gmc_v9_0_wait_for_idle,
1762 .soft_reset = gmc_v9_0_soft_reset,
1763 .set_clockgating_state = gmc_v9_0_set_clockgating_state,
1764 .set_powergating_state = gmc_v9_0_set_powergating_state,
1765 .get_clockgating_state = gmc_v9_0_get_clockgating_state,
1768 const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
1770 .type = AMD_IP_BLOCK_TYPE_GMC,
1774 .funcs = &gmc_v9_0_ip_funcs,