2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/firmware.h>
25 #include <linux/pci.h>
27 #include <drm/drm_cache.h>
31 #include "amdgpu_atomfirmware.h"
32 #include "amdgpu_gem.h"
34 #include "hdp/hdp_4_0_offset.h"
35 #include "hdp/hdp_4_0_sh_mask.h"
36 #include "gc/gc_9_0_sh_mask.h"
37 #include "dce/dce_12_0_offset.h"
38 #include "dce/dce_12_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "athub/athub_1_0_sh_mask.h"
42 #include "athub/athub_1_0_offset.h"
43 #include "oss/osssys_4_0_offset.h"
47 #include "soc15_common.h"
48 #include "umc/umc_6_0_sh_mask.h"
50 #include "gfxhub_v1_0.h"
51 #include "mmhub_v1_0.h"
52 #include "athub_v1_0.h"
53 #include "gfxhub_v1_1.h"
54 #include "mmhub_v9_4.h"
58 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
60 #include "amdgpu_ras.h"
61 #include "amdgpu_xgmi.h"
63 /* add these here since we already include dce12 headers and these are for DCN */
64 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
65 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
66 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0
67 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10
68 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
69 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
70 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d
71 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
74 static const char *gfxhub_client_ids[] = {
90 static const char *mmhub_client_ids_raven[][2] = {
115 static const char *mmhub_client_ids_renoir[][2] = {
143 static const char *mmhub_client_ids_vega10[][2] = {
156 [32+14][0] = "SDMA0",
169 [32+4][1] = "DCEDWB",
172 [32+14][1] = "SDMA1",
175 static const char *mmhub_client_ids_vega12[][2] = {
188 [32+15][0] = "SDMA0",
198 [32+1][1] = "DCEDWB",
204 [32+15][1] = "SDMA1",
207 static const char *mmhub_client_ids_vega20[][2] = {
221 [32+12][0] = "UTCL2",
222 [32+14][0] = "SDMA1",
240 [32+14][1] = "SDMA1",
243 static const char *mmhub_client_ids_arcturus[][2] = {
250 [32+15][0] = "SDMA1",
251 [64+15][0] = "SDMA2",
252 [96+15][0] = "SDMA3",
253 [128+15][0] = "SDMA4",
254 [160+11][0] = "JPEG",
256 [160+13][0] = "VCNU",
257 [160+15][0] = "SDMA5",
258 [192+10][0] = "UTCL2",
259 [192+11][0] = "JPEG1",
260 [192+12][0] = "VCN1",
261 [192+13][0] = "VCN1U",
262 [192+15][0] = "SDMA6",
263 [224+15][0] = "SDMA7",
271 [32+15][1] = "SDMA1",
272 [64+15][1] = "SDMA2",
273 [96+15][1] = "SDMA3",
274 [128+15][1] = "SDMA4",
275 [160+11][1] = "JPEG",
277 [160+13][1] = "VCNU",
278 [160+15][1] = "SDMA5",
279 [192+11][1] = "JPEG1",
280 [192+12][1] = "VCN1",
281 [192+13][1] = "VCN1U",
282 [192+15][1] = "SDMA6",
283 [224+15][1] = "SDMA7",
286 static const u32 golden_settings_vega10_hdp[] =
288 0xf64, 0x0fffffff, 0x00000000,
289 0xf65, 0x0fffffff, 0x00000000,
290 0xf66, 0x0fffffff, 0x00000000,
291 0xf67, 0x0fffffff, 0x00000000,
292 0xf68, 0x0fffffff, 0x00000000,
293 0xf6a, 0x0fffffff, 0x00000000,
294 0xf6b, 0x0fffffff, 0x00000000,
295 0xf6c, 0x0fffffff, 0x00000000,
296 0xf6d, 0x0fffffff, 0x00000000,
297 0xf6e, 0x0fffffff, 0x00000000,
300 static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
302 SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
303 SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
306 static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
308 SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
309 SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
312 static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = {
313 (0x000143c0 + 0x00000000),
314 (0x000143c0 + 0x00000800),
315 (0x000143c0 + 0x00001000),
316 (0x000143c0 + 0x00001800),
317 (0x000543c0 + 0x00000000),
318 (0x000543c0 + 0x00000800),
319 (0x000543c0 + 0x00001000),
320 (0x000543c0 + 0x00001800),
321 (0x000943c0 + 0x00000000),
322 (0x000943c0 + 0x00000800),
323 (0x000943c0 + 0x00001000),
324 (0x000943c0 + 0x00001800),
325 (0x000d43c0 + 0x00000000),
326 (0x000d43c0 + 0x00000800),
327 (0x000d43c0 + 0x00001000),
328 (0x000d43c0 + 0x00001800),
329 (0x001143c0 + 0x00000000),
330 (0x001143c0 + 0x00000800),
331 (0x001143c0 + 0x00001000),
332 (0x001143c0 + 0x00001800),
333 (0x001543c0 + 0x00000000),
334 (0x001543c0 + 0x00000800),
335 (0x001543c0 + 0x00001000),
336 (0x001543c0 + 0x00001800),
337 (0x001943c0 + 0x00000000),
338 (0x001943c0 + 0x00000800),
339 (0x001943c0 + 0x00001000),
340 (0x001943c0 + 0x00001800),
341 (0x001d43c0 + 0x00000000),
342 (0x001d43c0 + 0x00000800),
343 (0x001d43c0 + 0x00001000),
344 (0x001d43c0 + 0x00001800),
347 static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
348 (0x000143e0 + 0x00000000),
349 (0x000143e0 + 0x00000800),
350 (0x000143e0 + 0x00001000),
351 (0x000143e0 + 0x00001800),
352 (0x000543e0 + 0x00000000),
353 (0x000543e0 + 0x00000800),
354 (0x000543e0 + 0x00001000),
355 (0x000543e0 + 0x00001800),
356 (0x000943e0 + 0x00000000),
357 (0x000943e0 + 0x00000800),
358 (0x000943e0 + 0x00001000),
359 (0x000943e0 + 0x00001800),
360 (0x000d43e0 + 0x00000000),
361 (0x000d43e0 + 0x00000800),
362 (0x000d43e0 + 0x00001000),
363 (0x000d43e0 + 0x00001800),
364 (0x001143e0 + 0x00000000),
365 (0x001143e0 + 0x00000800),
366 (0x001143e0 + 0x00001000),
367 (0x001143e0 + 0x00001800),
368 (0x001543e0 + 0x00000000),
369 (0x001543e0 + 0x00000800),
370 (0x001543e0 + 0x00001000),
371 (0x001543e0 + 0x00001800),
372 (0x001943e0 + 0x00000000),
373 (0x001943e0 + 0x00000800),
374 (0x001943e0 + 0x00001000),
375 (0x001943e0 + 0x00001800),
376 (0x001d43e0 + 0x00000000),
377 (0x001d43e0 + 0x00000800),
378 (0x001d43e0 + 0x00001000),
379 (0x001d43e0 + 0x00001800),
382 static const uint32_t ecc_umc_mcumc_status_addrs[] = {
383 (0x000143c2 + 0x00000000),
384 (0x000143c2 + 0x00000800),
385 (0x000143c2 + 0x00001000),
386 (0x000143c2 + 0x00001800),
387 (0x000543c2 + 0x00000000),
388 (0x000543c2 + 0x00000800),
389 (0x000543c2 + 0x00001000),
390 (0x000543c2 + 0x00001800),
391 (0x000943c2 + 0x00000000),
392 (0x000943c2 + 0x00000800),
393 (0x000943c2 + 0x00001000),
394 (0x000943c2 + 0x00001800),
395 (0x000d43c2 + 0x00000000),
396 (0x000d43c2 + 0x00000800),
397 (0x000d43c2 + 0x00001000),
398 (0x000d43c2 + 0x00001800),
399 (0x001143c2 + 0x00000000),
400 (0x001143c2 + 0x00000800),
401 (0x001143c2 + 0x00001000),
402 (0x001143c2 + 0x00001800),
403 (0x001543c2 + 0x00000000),
404 (0x001543c2 + 0x00000800),
405 (0x001543c2 + 0x00001000),
406 (0x001543c2 + 0x00001800),
407 (0x001943c2 + 0x00000000),
408 (0x001943c2 + 0x00000800),
409 (0x001943c2 + 0x00001000),
410 (0x001943c2 + 0x00001800),
411 (0x001d43c2 + 0x00000000),
412 (0x001d43c2 + 0x00000800),
413 (0x001d43c2 + 0x00001000),
414 (0x001d43c2 + 0x00001800),
417 static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
418 struct amdgpu_irq_src *src,
420 enum amdgpu_interrupt_state state)
422 u32 bits, i, tmp, reg;
424 /* Devices newer then VEGA10/12 shall have these programming
425 sequences performed by PSP BL */
426 if (adev->asic_type >= CHIP_VEGA20)
432 case AMDGPU_IRQ_STATE_DISABLE:
433 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
434 reg = ecc_umc_mcumc_ctrl_addrs[i];
439 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
440 reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
446 case AMDGPU_IRQ_STATE_ENABLE:
447 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
448 reg = ecc_umc_mcumc_ctrl_addrs[i];
453 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
454 reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
467 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
468 struct amdgpu_irq_src *src,
470 enum amdgpu_interrupt_state state)
472 struct amdgpu_vmhub *hub;
473 u32 tmp, reg, bits, i, j;
475 bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
476 VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
477 VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
478 VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
479 VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
480 VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
481 VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
484 case AMDGPU_IRQ_STATE_DISABLE:
485 for (j = 0; j < adev->num_vmhubs; j++) {
486 hub = &adev->vmhub[j];
487 for (i = 0; i < 16; i++) {
488 reg = hub->vm_context0_cntl + i;
495 case AMDGPU_IRQ_STATE_ENABLE:
496 for (j = 0; j < adev->num_vmhubs; j++) {
497 hub = &adev->vmhub[j];
498 for (i = 0; i < 16; i++) {
499 reg = hub->vm_context0_cntl + i;
512 static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
513 struct amdgpu_irq_src *source,
514 struct amdgpu_iv_entry *entry)
516 struct amdgpu_vmhub *hub;
517 bool retry_fault = !!(entry->src_data[1] & 0x80);
518 uint32_t status = 0, cid = 0, rw = 0;
521 const char *mmhub_cid;
523 addr = (u64)entry->src_data[0] << 12;
524 addr |= ((u64)entry->src_data[1] & 0xf) << 44;
526 if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
528 return 1; /* This also prevents sending it to KFD */
530 if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
531 snprintf(hub_name, sizeof(hub_name), "mmhub0");
532 hub = &adev->vmhub[AMDGPU_MMHUB_0];
533 } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
534 snprintf(hub_name, sizeof(hub_name), "mmhub1");
535 hub = &adev->vmhub[AMDGPU_MMHUB_1];
537 snprintf(hub_name, sizeof(hub_name), "gfxhub0");
538 hub = &adev->vmhub[AMDGPU_GFXHUB_0];
541 /* If it's the first fault for this address, process it normally */
542 if (retry_fault && !in_interrupt() &&
543 amdgpu_vm_handle_fault(adev, entry->pasid, addr))
544 return 1; /* This also prevents sending it to KFD */
546 if (!amdgpu_sriov_vf(adev)) {
548 * Issue a dummy read to wait for the status register to
549 * be updated to avoid reading an incorrect value due to
550 * the new fast GRBM interface.
552 if (entry->vmid_src == AMDGPU_GFXHUB_0)
553 RREG32(hub->vm_l2_pro_fault_status);
555 status = RREG32(hub->vm_l2_pro_fault_status);
556 cid = REG_GET_FIELD(status,
557 VM_L2_PROTECTION_FAULT_STATUS, CID);
558 rw = REG_GET_FIELD(status,
559 VM_L2_PROTECTION_FAULT_STATUS, RW);
560 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
563 if (printk_ratelimit()) {
564 struct amdgpu_task_info task_info;
566 memset(&task_info, 0, sizeof(struct amdgpu_task_info));
567 amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
570 "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
571 "pasid:%u, for process %s pid %d thread %s pid %d)\n",
572 hub_name, retry_fault ? "retry" : "no-retry",
573 entry->src_id, entry->ring_id, entry->vmid,
574 entry->pasid, task_info.process_name, task_info.tgid,
575 task_info.task_name, task_info.pid);
576 dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
577 addr, entry->client_id);
578 if (!amdgpu_sriov_vf(adev)) {
580 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
582 if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) {
583 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
584 cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
587 switch (adev->asic_type) {
589 mmhub_cid = mmhub_client_ids_vega10[cid][rw];
592 mmhub_cid = mmhub_client_ids_vega12[cid][rw];
595 mmhub_cid = mmhub_client_ids_vega20[cid][rw];
598 mmhub_cid = mmhub_client_ids_arcturus[cid][rw];
601 mmhub_cid = mmhub_client_ids_raven[cid][rw];
604 mmhub_cid = mmhub_client_ids_renoir[cid][rw];
610 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
611 mmhub_cid ? mmhub_cid : "unknown", cid);
613 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
614 REG_GET_FIELD(status,
615 VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
616 dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
617 REG_GET_FIELD(status,
618 VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
619 dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
620 REG_GET_FIELD(status,
621 VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
622 dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
623 REG_GET_FIELD(status,
624 VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
625 dev_err(adev->dev, "\t RW: 0x%x\n", rw);
632 static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
633 .set = gmc_v9_0_vm_fault_interrupt_state,
634 .process = gmc_v9_0_process_interrupt,
638 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
639 .set = gmc_v9_0_ecc_interrupt_state,
640 .process = amdgpu_umc_process_ecc_irq,
643 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
645 adev->gmc.vm_fault.num_types = 1;
646 adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
648 if (!amdgpu_sriov_vf(adev)) {
649 adev->gmc.ecc_irq.num_types = 1;
650 adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
654 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
659 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
660 PER_VMID_INVALIDATE_REQ, 1 << vmid);
661 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
662 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
663 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
664 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
665 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
666 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
667 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
668 CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
674 * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
676 * @adev: amdgpu_device pointer
680 static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
683 return ((vmhub == AMDGPU_MMHUB_0 ||
684 vmhub == AMDGPU_MMHUB_1) &&
685 (!amdgpu_sriov_vf(adev)) &&
686 (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
687 (adev->apu_flags & AMD_APU_IS_PICASSO))));
690 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
691 uint8_t vmid, uint16_t *p_pasid)
695 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
697 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
699 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
704 * VMID 0 is the physical GPU addresses as used by the kernel.
705 * VMIDs 1-15 are used for userspace clients and are handled
706 * by the amdgpu vm/hsa code.
710 * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
712 * @adev: amdgpu_device pointer
713 * @vmid: vm instance to flush
714 * @flush_type: the flush type
716 * Flush the TLB for the requested page table using certain type.
718 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
719 uint32_t vmhub, uint32_t flush_type)
721 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
722 const unsigned eng = 17;
723 u32 j, inv_req, inv_req2, tmp;
724 struct amdgpu_vmhub *hub;
726 BUG_ON(vmhub >= adev->num_vmhubs);
728 hub = &adev->vmhub[vmhub];
729 if (adev->gmc.xgmi.num_physical_nodes &&
730 adev->asic_type == CHIP_VEGA20) {
731 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
732 * heavy-weight TLB flush (type 2), which flushes
733 * both. Due to a race condition with concurrent
734 * memory accesses using the same TLB cache line, we
735 * still need a second TLB flush after this.
737 inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
738 inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
740 inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
744 /* This is necessary for a HW workaround under SRIOV as well
745 * as GFXOFF under bare metal
747 if (adev->gfx.kiq.ring.sched.ready &&
748 (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
749 down_read_trylock(&adev->reset_sem)) {
750 uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
751 uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
753 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
755 up_read(&adev->reset_sem);
759 spin_lock(&adev->gmc.invalidate_lock);
762 * It may lose gpuvm invalidate acknowldege state across power-gating
763 * off cycle, add semaphore acquire before invalidation and semaphore
764 * release after invalidation to avoid entering power gated state
768 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
770 for (j = 0; j < adev->usec_timeout; j++) {
771 /* a read return value of 1 means semaphore acuqire */
772 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
773 hub->eng_distance * eng);
779 if (j >= adev->usec_timeout)
780 DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
784 WREG32_NO_KIQ(hub->vm_inv_eng0_req +
785 hub->eng_distance * eng, inv_req);
788 * Issue a dummy read to wait for the ACK register to
789 * be cleared to avoid a false ACK due to the new fast
792 if (vmhub == AMDGPU_GFXHUB_0)
793 RREG32_NO_KIQ(hub->vm_inv_eng0_req +
794 hub->eng_distance * eng);
796 for (j = 0; j < adev->usec_timeout; j++) {
797 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
798 hub->eng_distance * eng);
799 if (tmp & (1 << vmid))
808 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
811 * add semaphore release after invalidation,
812 * write with 0 means semaphore release
814 WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
815 hub->eng_distance * eng, 0);
817 spin_unlock(&adev->gmc.invalidate_lock);
819 if (j < adev->usec_timeout)
822 DRM_ERROR("Timeout waiting for VM flush ACK!\n");
826 * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
828 * @adev: amdgpu_device pointer
829 * @pasid: pasid to be flush
831 * Flush the TLB for the requested pasid.
833 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
834 uint16_t pasid, uint32_t flush_type,
840 uint16_t queried_pasid;
842 struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
843 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
845 if (amdgpu_in_reset(adev))
848 if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) {
849 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
850 * heavy-weight TLB flush (type 2), which flushes
851 * both. Due to a race condition with concurrent
852 * memory accesses using the same TLB cache line, we
853 * still need a second TLB flush after this.
855 bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
856 adev->asic_type == CHIP_VEGA20);
857 /* 2 dwords flush + 8 dwords fence */
858 unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
861 ndw += kiq->pmf->invalidate_tlbs_size;
863 spin_lock(&adev->gfx.kiq.ring_lock);
864 /* 2 dwords flush + 8 dwords fence */
865 amdgpu_ring_alloc(ring, ndw);
867 kiq->pmf->kiq_invalidate_tlbs(ring,
869 kiq->pmf->kiq_invalidate_tlbs(ring,
870 pasid, flush_type, all_hub);
871 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
873 amdgpu_ring_undo(ring);
874 spin_unlock(&adev->gfx.kiq.ring_lock);
875 up_read(&adev->reset_sem);
879 amdgpu_ring_commit(ring);
880 spin_unlock(&adev->gfx.kiq.ring_lock);
881 r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
883 dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
884 up_read(&adev->reset_sem);
887 up_read(&adev->reset_sem);
891 for (vmid = 1; vmid < 16; vmid++) {
893 ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
895 if (ret && queried_pasid == pasid) {
897 for (i = 0; i < adev->num_vmhubs; i++)
898 gmc_v9_0_flush_gpu_tlb(adev, vmid,
901 gmc_v9_0_flush_gpu_tlb(adev, vmid,
902 AMDGPU_GFXHUB_0, flush_type);
912 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
913 unsigned vmid, uint64_t pd_addr)
915 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
916 struct amdgpu_device *adev = ring->adev;
917 struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
918 uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
919 unsigned eng = ring->vm_inv_eng;
922 * It may lose gpuvm invalidate acknowldege state across power-gating
923 * off cycle, add semaphore acquire before invalidation and semaphore
924 * release after invalidation to avoid entering power gated state
928 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
930 /* a read return value of 1 means semaphore acuqire */
931 amdgpu_ring_emit_reg_wait(ring,
932 hub->vm_inv_eng0_sem +
933 hub->eng_distance * eng, 0x1, 0x1);
935 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
936 (hub->ctx_addr_distance * vmid),
937 lower_32_bits(pd_addr));
939 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
940 (hub->ctx_addr_distance * vmid),
941 upper_32_bits(pd_addr));
943 amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
944 hub->eng_distance * eng,
945 hub->vm_inv_eng0_ack +
946 hub->eng_distance * eng,
949 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
952 * add semaphore release after invalidation,
953 * write with 0 means semaphore release
955 amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
956 hub->eng_distance * eng, 0);
961 static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
964 struct amdgpu_device *adev = ring->adev;
967 /* Do nothing because there's no lut register for mmhub1. */
968 if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
971 if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
972 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
974 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
976 amdgpu_ring_emit_wreg(ring, reg, pasid);
980 * PTE format on VEGA 10:
989 * 47:12 4k physical page base address
999 * PDE format on VEGA 10:
1000 * 63:59 block fragment size
1004 * 47:6 physical base address of PD or PTE
1011 static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
1015 case AMDGPU_VM_MTYPE_DEFAULT:
1016 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1017 case AMDGPU_VM_MTYPE_NC:
1018 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1019 case AMDGPU_VM_MTYPE_WC:
1020 return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
1021 case AMDGPU_VM_MTYPE_RW:
1022 return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
1023 case AMDGPU_VM_MTYPE_CC:
1024 return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
1025 case AMDGPU_VM_MTYPE_UC:
1026 return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
1028 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1032 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
1033 uint64_t *addr, uint64_t *flags)
1035 if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
1036 *addr = adev->vm_manager.vram_base_offset + *addr -
1037 adev->gmc.vram_start;
1038 BUG_ON(*addr & 0xFFFF00000000003FULL);
1040 if (!adev->gmc.translate_further)
1043 if (level == AMDGPU_VM_PDB1) {
1044 /* Set the block fragment size */
1045 if (!(*flags & AMDGPU_PDE_PTE))
1046 *flags |= AMDGPU_PDE_BFS(0x9);
1048 } else if (level == AMDGPU_VM_PDB0) {
1049 if (*flags & AMDGPU_PDE_PTE)
1050 *flags &= ~AMDGPU_PDE_PTE;
1052 *flags |= AMDGPU_PTE_TF;
1056 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
1057 struct amdgpu_bo_va_mapping *mapping,
1060 *flags &= ~AMDGPU_PTE_EXECUTABLE;
1061 *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1063 *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
1064 *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
1066 if (mapping->flags & AMDGPU_PTE_PRT) {
1067 *flags |= AMDGPU_PTE_PRT;
1068 *flags &= ~AMDGPU_PTE_VALID;
1071 if (adev->asic_type == CHIP_ARCTURUS &&
1072 !(*flags & AMDGPU_PTE_SYSTEM) &&
1073 mapping->bo_va->is_xgmi)
1074 *flags |= AMDGPU_PTE_SNOOPED;
1077 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
1079 u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
1082 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
1083 size = AMDGPU_VBIOS_VGA_ALLOCATION;
1087 switch (adev->asic_type) {
1090 viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
1091 size = (REG_GET_FIELD(viewport,
1092 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
1093 REG_GET_FIELD(viewport,
1094 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
1101 viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
1102 size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
1103 REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
1112 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
1113 .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
1114 .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
1115 .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
1116 .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
1117 .map_mtype = gmc_v9_0_map_mtype,
1118 .get_vm_pde = gmc_v9_0_get_vm_pde,
1119 .get_vm_pte = gmc_v9_0_get_vm_pte,
1120 .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
1123 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
1125 adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
1128 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
1130 switch (adev->asic_type) {
1132 adev->umc.funcs = &umc_v6_0_funcs;
1135 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1136 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1137 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1138 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
1139 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1140 adev->umc.funcs = &umc_v6_1_funcs;
1143 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1144 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1145 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1146 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
1147 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1148 adev->umc.funcs = &umc_v6_1_funcs;
1155 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
1157 switch (adev->asic_type) {
1159 adev->mmhub.funcs = &mmhub_v9_4_funcs;
1162 adev->mmhub.funcs = &mmhub_v1_0_funcs;
1167 static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
1169 switch (adev->asic_type) {
1172 adev->gfxhub.funcs = &gfxhub_v1_1_funcs;
1175 adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
1180 static int gmc_v9_0_early_init(void *handle)
1182 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1184 gmc_v9_0_set_gmc_funcs(adev);
1185 gmc_v9_0_set_irq_funcs(adev);
1186 gmc_v9_0_set_umc_funcs(adev);
1187 gmc_v9_0_set_mmhub_funcs(adev);
1188 gmc_v9_0_set_gfxhub_funcs(adev);
1190 adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
1191 adev->gmc.shared_aperture_end =
1192 adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
1193 adev->gmc.private_aperture_start = 0x1000000000000000ULL;
1194 adev->gmc.private_aperture_end =
1195 adev->gmc.private_aperture_start + (4ULL << 30) - 1;
1200 static int gmc_v9_0_late_init(void *handle)
1202 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1205 amdgpu_bo_late_init(adev);
1207 r = amdgpu_gmc_allocate_vm_inv_eng(adev);
1212 * Workaround performance drop issue with VBIOS enables partial
1213 * writes, while disables HBM ECC for vega10.
1215 if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
1216 if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) {
1217 if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
1218 adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
1222 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
1223 adev->mmhub.funcs->reset_ras_error_count(adev);
1225 r = amdgpu_gmc_ras_late_init(adev);
1229 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
1232 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
1233 struct amdgpu_gmc *mc)
1237 if (!amdgpu_sriov_vf(adev))
1238 base = adev->mmhub.funcs->get_fb_location(adev);
1240 /* add the xgmi offset of the physical node */
1241 base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1242 amdgpu_gmc_vram_location(adev, mc, base);
1243 amdgpu_gmc_gart_location(adev, mc);
1244 amdgpu_gmc_agp_location(adev, mc);
1245 /* base offset of vram pages */
1246 adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1248 /* XXX: add the xgmi offset of the physical node? */
1249 adev->vm_manager.vram_base_offset +=
1250 adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1254 * gmc_v9_0_mc_init - initialize the memory controller driver params
1256 * @adev: amdgpu_device pointer
1258 * Look up the amount of vram, vram width, and decide how to place
1259 * vram and gart within the GPU's physical address space.
1260 * Returns 0 for success.
1262 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
1266 /* size in MB on si */
1267 adev->gmc.mc_vram_size =
1268 adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
1269 adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
1271 if (!(adev->flags & AMD_IS_APU)) {
1272 r = amdgpu_device_resize_fb_bar(adev);
1276 adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
1277 adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
1279 #ifdef CONFIG_X86_64
1280 if (adev->flags & AMD_IS_APU) {
1281 adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1282 adev->gmc.aper_size = adev->gmc.real_vram_size;
1285 /* In case the PCI BAR is larger than the actual amount of vram */
1286 adev->gmc.visible_vram_size = adev->gmc.aper_size;
1287 if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
1288 adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
1290 /* set the gart size */
1291 if (amdgpu_gart_size == -1) {
1292 switch (adev->asic_type) {
1293 case CHIP_VEGA10: /* all engines support GPUVM */
1294 case CHIP_VEGA12: /* all engines support GPUVM */
1298 adev->gmc.gart_size = 512ULL << 20;
1300 case CHIP_RAVEN: /* DCE SG support */
1302 adev->gmc.gart_size = 1024ULL << 20;
1306 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
1309 gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
1314 static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
1318 if (adev->gart.bo) {
1319 WARN(1, "VEGA10 PCIE GART already initialized\n");
1322 /* Initialize common gart structure */
1323 r = amdgpu_gart_init(adev);
1326 adev->gart.table_size = adev->gart.num_gpu_pages * 8;
1327 adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
1328 AMDGPU_PTE_EXECUTABLE;
1329 return amdgpu_gart_table_vram_alloc(adev);
1333 * gmc_v9_0_save_registers - saves regs
1335 * @adev: amdgpu_device pointer
1337 * This saves potential register values that should be
1338 * restored upon resume
1340 static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
1342 if (adev->asic_type == CHIP_RAVEN)
1343 adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
1346 static int gmc_v9_0_sw_init(void *handle)
1348 int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
1349 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1351 adev->gfxhub.funcs->init(adev);
1353 adev->mmhub.funcs->init(adev);
1355 spin_lock_init(&adev->gmc.invalidate_lock);
1357 r = amdgpu_atomfirmware_get_vram_info(adev,
1358 &vram_width, &vram_type, &vram_vendor);
1359 if (amdgpu_sriov_vf(adev))
1360 /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
1361 * and DF related registers is not readable, seems hardcord is the
1362 * only way to set the correct vram_width
1364 adev->gmc.vram_width = 2048;
1365 else if (amdgpu_emu_mode != 1)
1366 adev->gmc.vram_width = vram_width;
1368 if (!adev->gmc.vram_width) {
1369 int chansize, numchan;
1371 /* hbm memory channel size */
1372 if (adev->flags & AMD_IS_APU)
1377 numchan = adev->df.funcs->get_hbm_channel_number(adev);
1378 adev->gmc.vram_width = numchan * chansize;
1381 adev->gmc.vram_type = vram_type;
1382 adev->gmc.vram_vendor = vram_vendor;
1383 switch (adev->asic_type) {
1385 adev->num_vmhubs = 2;
1387 if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
1388 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1390 /* vm_size is 128TB + 512GB for legacy 3-level page support */
1391 amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
1392 adev->gmc.translate_further =
1393 adev->vm_manager.num_level > 1;
1400 adev->num_vmhubs = 2;
1404 * To fulfill 4-level page support,
1405 * vm size is 256TB (48bit), maximum size of Vega10,
1406 * block size 512 (9bit)
1408 /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
1409 if (amdgpu_sriov_vf(adev))
1410 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
1412 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1415 adev->num_vmhubs = 3;
1417 /* Keep the vm size same with Vega20 */
1418 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1424 /* This interrupt is VMC page fault.*/
1425 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
1426 &adev->gmc.vm_fault);
1430 if (adev->asic_type == CHIP_ARCTURUS) {
1431 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
1432 &adev->gmc.vm_fault);
1437 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
1438 &adev->gmc.vm_fault);
1443 if (!amdgpu_sriov_vf(adev)) {
1444 /* interrupt sent to DF. */
1445 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
1446 &adev->gmc.ecc_irq);
1451 /* Set the internal MC address mask
1452 * This is the max address of the GPU's
1453 * internal address space.
1455 adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
1457 r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
1459 printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
1462 adev->need_swiotlb = drm_need_swiotlb(44);
1464 if (adev->gmc.xgmi.supported) {
1465 r = adev->gfxhub.funcs->get_xgmi_info(adev);
1470 r = gmc_v9_0_mc_init(adev);
1474 amdgpu_gmc_get_vbios_allocations(adev);
1476 /* Memory manager */
1477 r = amdgpu_bo_init(adev);
1481 r = gmc_v9_0_gart_init(adev);
1487 * VMID 0 is reserved for System
1488 * amdgpu graphics/compute will use VMIDs 1..n-1
1489 * amdkfd will use VMIDs n..15
1491 * The first KFD VMID is 8 for GPUs with graphics, 3 for
1492 * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs
1493 * for video processing.
1495 adev->vm_manager.first_kfd_vmid =
1496 adev->asic_type == CHIP_ARCTURUS ? 3 : 8;
1498 amdgpu_vm_manager_init(adev);
1500 gmc_v9_0_save_registers(adev);
1505 static int gmc_v9_0_sw_fini(void *handle)
1507 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1509 amdgpu_gmc_ras_fini(adev);
1510 amdgpu_gem_force_release(adev);
1511 amdgpu_vm_manager_fini(adev);
1512 amdgpu_gart_table_vram_free(adev);
1513 amdgpu_bo_fini(adev);
1514 amdgpu_gart_fini(adev);
1519 static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
1522 switch (adev->asic_type) {
1524 if (amdgpu_sriov_vf(adev))
1528 soc15_program_register_sequence(adev,
1529 golden_settings_mmhub_1_0_0,
1530 ARRAY_SIZE(golden_settings_mmhub_1_0_0));
1531 soc15_program_register_sequence(adev,
1532 golden_settings_athub_1_0_0,
1533 ARRAY_SIZE(golden_settings_athub_1_0_0));
1538 /* TODO for renoir */
1539 soc15_program_register_sequence(adev,
1540 golden_settings_athub_1_0_0,
1541 ARRAY_SIZE(golden_settings_athub_1_0_0));
1549 * gmc_v9_0_restore_registers - restores regs
1551 * @adev: amdgpu_device pointer
1553 * This restores register values, saved at suspend.
1555 void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
1557 if (adev->asic_type == CHIP_RAVEN) {
1558 WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
1559 WARN_ON(adev->gmc.sdpif_register !=
1560 RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0));
1565 * gmc_v9_0_gart_enable - gart enable
1567 * @adev: amdgpu_device pointer
1569 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
1573 if (adev->gart.bo == NULL) {
1574 dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
1577 r = amdgpu_gart_table_vram_pin(adev);
1581 r = adev->gfxhub.funcs->gart_enable(adev);
1585 r = adev->mmhub.funcs->gart_enable(adev);
1589 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1590 (unsigned)(adev->gmc.gart_size >> 20),
1591 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
1592 adev->gart.ready = true;
1596 static int gmc_v9_0_hw_init(void *handle)
1598 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1603 /* The sequence of these two function calls matters.*/
1604 gmc_v9_0_init_golden_registers(adev);
1606 if (adev->mode_info.num_crtc) {
1607 if (adev->asic_type != CHIP_ARCTURUS) {
1608 /* Lockout access through VGA aperture*/
1609 WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
1611 /* disable VGA render */
1612 WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
1616 amdgpu_device_program_register_sequence(adev,
1617 golden_settings_vega10_hdp,
1618 ARRAY_SIZE(golden_settings_vega10_hdp));
1620 if (adev->mmhub.funcs->update_power_gating)
1621 adev->mmhub.funcs->update_power_gating(adev, true);
1623 switch (adev->asic_type) {
1625 WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
1631 WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
1633 tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
1634 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
1636 WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
1637 WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
1639 /* After HDP is initialized, flush HDP.*/
1640 adev->nbio.funcs->hdp_flush(adev, NULL);
1642 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
1647 if (!amdgpu_sriov_vf(adev)) {
1648 adev->gfxhub.funcs->set_fault_enable_default(adev, value);
1649 adev->mmhub.funcs->set_fault_enable_default(adev, value);
1651 for (i = 0; i < adev->num_vmhubs; ++i)
1652 gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
1654 if (adev->umc.funcs && adev->umc.funcs->init_registers)
1655 adev->umc.funcs->init_registers(adev);
1657 r = gmc_v9_0_gart_enable(adev);
1663 * gmc_v9_0_gart_disable - gart disable
1665 * @adev: amdgpu_device pointer
1667 * This disables all VM page table.
1669 static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
1671 adev->gfxhub.funcs->gart_disable(adev);
1672 adev->mmhub.funcs->gart_disable(adev);
1673 amdgpu_gart_table_vram_unpin(adev);
1676 static int gmc_v9_0_hw_fini(void *handle)
1678 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1680 if (amdgpu_sriov_vf(adev)) {
1681 /* full access mode, so don't touch any GMC register */
1682 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1686 amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
1687 amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1688 gmc_v9_0_gart_disable(adev);
1693 static int gmc_v9_0_suspend(void *handle)
1695 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1697 return gmc_v9_0_hw_fini(adev);
1700 static int gmc_v9_0_resume(void *handle)
1703 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1705 r = gmc_v9_0_hw_init(adev);
1709 amdgpu_vmid_reset_all(adev);
1714 static bool gmc_v9_0_is_idle(void *handle)
1716 /* MC is always ready in GMC v9.*/
1720 static int gmc_v9_0_wait_for_idle(void *handle)
1722 /* There is no need to wait for MC idle in GMC v9.*/
1726 static int gmc_v9_0_soft_reset(void *handle)
1728 /* XXX for emulation.*/
1732 static int gmc_v9_0_set_clockgating_state(void *handle,
1733 enum amd_clockgating_state state)
1735 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1737 adev->mmhub.funcs->set_clockgating(adev, state);
1739 athub_v1_0_set_clockgating(adev, state);
1744 static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
1746 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1748 adev->mmhub.funcs->get_clockgating(adev, flags);
1750 athub_v1_0_get_clockgating(adev, flags);
1753 static int gmc_v9_0_set_powergating_state(void *handle,
1754 enum amd_powergating_state state)
1759 const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
1761 .early_init = gmc_v9_0_early_init,
1762 .late_init = gmc_v9_0_late_init,
1763 .sw_init = gmc_v9_0_sw_init,
1764 .sw_fini = gmc_v9_0_sw_fini,
1765 .hw_init = gmc_v9_0_hw_init,
1766 .hw_fini = gmc_v9_0_hw_fini,
1767 .suspend = gmc_v9_0_suspend,
1768 .resume = gmc_v9_0_resume,
1769 .is_idle = gmc_v9_0_is_idle,
1770 .wait_for_idle = gmc_v9_0_wait_for_idle,
1771 .soft_reset = gmc_v9_0_soft_reset,
1772 .set_clockgating_state = gmc_v9_0_set_clockgating_state,
1773 .set_powergating_state = gmc_v9_0_set_powergating_state,
1774 .get_clockgating_state = gmc_v9_0_get_clockgating_state,
1777 const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
1779 .type = AMD_IP_BLOCK_TYPE_GMC,
1783 .funcs = &gmc_v9_0_ip_funcs,