drm/amdgpu: cache gpuvm fault information for gmc7+
authorAlex Deucher <alexander.deucher@amd.com>
Thu, 29 Sep 2022 15:57:12 +0000 (11:57 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 4 Oct 2023 22:37:07 +0000 (18:37 -0400)
Cache the current fault info in the vm struct.  This can be queried
by userspace later to help debug UMDs.

Cc: samuel.pitoiset@gmail.com
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 8e6e362..d8a4fdd 100644 (file)
@@ -149,6 +149,9 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
 
                status = RREG32(hub->vm_l2_pro_fault_status);
                WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+               amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+                                            entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
        }
 
        if (!printk_ratelimit())
index 07f50ca..f4bb388 100644 (file)
@@ -119,6 +119,9 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
 
                status = RREG32(hub->vm_l2_pro_fault_status);
                WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+               amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+                                            entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
        }
 
        if (printk_ratelimit()) {
index 3869cef..61ca1a8 100644 (file)
@@ -1268,6 +1268,9 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
        if (!addr && !status)
                return 0;
 
+       amdgpu_vm_update_fault_cache(adev, entry->pasid,
+                                    ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
+
        if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
                gmc_v7_0_set_fault_enable_default(adev, false);
 
index 4126172..fa59749 100644 (file)
@@ -1436,6 +1436,9 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
        if (!addr && !status)
                return 0;
 
+       amdgpu_vm_update_fault_cache(adev, entry->pasid,
+                                    ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
+
        if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
                gmc_v8_0_set_fault_enable_default(adev, false);
 
index 04edd61..4e823e3 100644 (file)
@@ -553,6 +553,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
        struct amdgpu_vmhub *hub;
        const char *mmhub_cid;
        const char *hub_name;
+       unsigned int vmhub;
        u64 addr;
        uint32_t cam_index = 0;
        int ret, xcc_id = 0;
@@ -565,10 +566,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 
        if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
                hub_name = "mmhub0";
-               hub = &adev->vmhub[AMDGPU_MMHUB0(node_id / 4)];
+               vmhub = AMDGPU_MMHUB0(node_id / 4);
        } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
                hub_name = "mmhub1";
-               hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
+               vmhub = AMDGPU_MMHUB1(0);
        } else {
                hub_name = "gfxhub0";
                if (adev->gfx.funcs->ih_node_to_logical_xcc) {
@@ -577,8 +578,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
                        if (xcc_id < 0)
                                xcc_id = 0;
                }
-               hub = &adev->vmhub[xcc_id];
+               vmhub = xcc_id;
        }
+       hub = &adev->vmhub[vmhub];
 
        if (retry_fault) {
                if (adev->irq.retry_cam_enabled) {
@@ -624,7 +626,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
        if (!printk_ratelimit())
                return 0;
 
-
        memset(&task_info, 0, sizeof(struct amdgpu_task_info));
        amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
 
@@ -660,6 +661,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
        rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
        WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
 
+       amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub);
+
        dev_err(adev->dev,
                "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
                status);