drm/amdgpu: save vm fault information for amdkfd
authorshaoyunl <Shaoyun.Liu@amd.com>
Thu, 12 Jul 2018 02:32:49 +0000 (22:32 -0400)
committerOded Gabbay <oded.gabbay@gmail.com>
Thu, 12 Jul 2018 02:32:49 +0000 (22:32 -0400)
amdgpu save the vm fault related information for KFD usage and keep the
copy until KFD read it.

Signed-off-by: shaoyun liu <shaoyun.liu@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/include/kgd_kfd_interface.h

index a8418a3..3dc76d9 100644 (file)
@@ -183,6 +183,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
                                            struct dma_fence **ef);
 
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+                                             struct kfd_vm_fault_info *info);
+
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
 
index ea79908..befc7c4 100644 (file)
@@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .invalidate_tlbs = invalidate_tlbs,
        .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
        .submit_ib = amdgpu_amdkfd_submit_ib,
+       .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
index 19dd665..c68ef85 100644 (file)
@@ -176,6 +176,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .invalidate_tlbs = invalidate_tlbs,
        .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
        .submit_ib = amdgpu_amdkfd_submit_ib,
+       .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
index fa38a96..8a707d8 100644 (file)
@@ -1621,6 +1621,20 @@ bo_reserve_failed:
        return ret;
 }
 
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+                                             struct kfd_vm_fault_info *mem)
+{
+       struct amdgpu_device *adev;
+
+       adev = (struct amdgpu_device *)kgd;
+       if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+               *mem = *adev->gmc.vm_fault_info;
+               mb();
+               atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+       }
+       return 0;
+}
+
 /* Evict a userptr BO by stopping the queues if necessary
  *
  * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
index 6cb4948..bb5a47a 100644 (file)
@@ -105,6 +105,8 @@ struct amdgpu_gmc {
        /* protects concurrent invalidation */
        spinlock_t              invalidate_lock;
        bool                    translate_further;
+       struct kfd_vm_fault_info *vm_fault_info;
+       atomic_t                vm_fault_info_updated;
 
        const struct amdgpu_gmc_funcs   *gmc_funcs;
 };
index 10920f0..36dc367 100644 (file)
@@ -28,6 +28,7 @@
 #include "cik.h"
 #include "gmc_v7_0.h"
 #include "amdgpu_ucode.h"
+#include "amdgpu_amdkfd.h"
 
 #include "bif/bif_4_1_d.h"
 #include "bif/bif_4_1_sh_mask.h"
@@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle)
                adev->vm_manager.vram_base_offset = 0;
        }
 
+       adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
+                                       GFP_KERNEL);
+       if (!adev->gmc.vm_fault_info)
+               return -ENOMEM;
+       atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+
        return 0;
 }
 
@@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle)
 
        amdgpu_gem_force_release(adev);
        amdgpu_vm_manager_fini(adev);
+       kfree(adev->gmc.vm_fault_info);
        gmc_v7_0_gart_fini(adev);
        amdgpu_bo_fini(adev);
        release_firmware(adev->gmc.fw);
@@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
                                      struct amdgpu_irq_src *source,
                                      struct amdgpu_iv_entry *entry)
 {
-       u32 addr, status, mc_client;
+       u32 addr, status, mc_client, vmid;
 
        addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
        status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
@@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
                                         entry->pasid);
        }
 
+       vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+                            VMID);
+       if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
+               && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+               struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
+               u32 protections = REG_GET_FIELD(status,
+                                       VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+                                       PROTECTIONS);
+
+               info->vmid = vmid;
+               info->mc_id = REG_GET_FIELD(status,
+                                           VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+                                           MEMORY_CLIENT_ID);
+               info->status = status;
+               info->page_addr = addr;
+               info->prot_valid = protections & 0x7 ? true : false;
+               info->prot_read = protections & 0x8 ? true : false;
+               info->prot_write = protections & 0x10 ? true : false;
+               info->prot_exec = protections & 0x20 ? true : false;
+               mb();
+               atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+       }
+
        return 0;
 }
 
index 75f3ffb..70fc97b 100644 (file)
@@ -26,6 +26,7 @@
 #include "amdgpu.h"
 #include "gmc_v8_0.h"
 #include "amdgpu_ucode.h"
+#include "amdgpu_amdkfd.h"
 
 #include "gmc/gmc_8_1_d.h"
 #include "gmc/gmc_8_1_sh_mask.h"
@@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle)
                adev->vm_manager.vram_base_offset = 0;
        }
 
+       adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
+                                       GFP_KERNEL);
+       if (!adev->gmc.vm_fault_info)
+               return -ENOMEM;
+       atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+
        return 0;
 }
 
@@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle)
 
        amdgpu_gem_force_release(adev);
        amdgpu_vm_manager_fini(adev);
+       kfree(adev->gmc.vm_fault_info);
        gmc_v8_0_gart_fini(adev);
        amdgpu_bo_fini(adev);
        release_firmware(adev->gmc.fw);
@@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
                                      struct amdgpu_irq_src *source,
                                      struct amdgpu_iv_entry *entry)
 {
-       u32 addr, status, mc_client;
+       u32 addr, status, mc_client, vmid;
 
        if (amdgpu_sriov_vf(adev)) {
                dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
@@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
                                         entry->pasid);
        }
 
+       vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+                            VMID);
+       if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
+               && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+               struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
+               u32 protections = REG_GET_FIELD(status,
+                                       VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+                                       PROTECTIONS);
+
+               info->vmid = vmid;
+               info->mc_id = REG_GET_FIELD(status,
+                                           VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+                                           MEMORY_CLIENT_ID);
+               info->status = status;
+               info->page_addr = addr;
+               info->prot_valid = protections & 0x7 ? true : false;
+               info->prot_read = protections & 0x8 ? true : false;
+               info->prot_write = protections & 0x10 ? true : false;
+               info->prot_exec = protections & 0x20 ? true : false;
+               mb();
+               atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+       }
+
        return 0;
 }
 
index 5733fbe..28b11d1 100644 (file)
@@ -47,6 +47,17 @@ enum kfd_preempt_type {
        KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
 };
 
+struct kfd_vm_fault_info {
+       uint64_t        page_addr;
+       uint32_t        vmid;
+       uint32_t        mc_id;
+       uint32_t        status;
+       bool            prot_valid;
+       bool            prot_read;
+       bool            prot_write;
+       bool            prot_exec;
+};
+
 struct kfd_cu_info {
        uint32_t num_shader_engines;
        uint32_t num_shader_arrays_per_engine;
@@ -259,6 +270,12 @@ struct tile_config {
  * IB to the corresponding ring (ring type). The IB is executed with the
  * specified VMID in a user mode context.
  *
+ * @get_vm_fault_info: Return information about a recent VM fault on
+ * GFXv7 and v8. If multiple VM faults occurred since the last call of
+ * this function, it will return information about the first of those
+ * faults. On GFXv9 VM fault information is fully contained in the IH
+ * packet and this function is not needed.
+ *
  * This structure contains function pointers to services that the kgd driver
  * provides to amdkfd driver.
  *
@@ -374,6 +391,9 @@ struct kfd2kgd_calls {
        int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
                        uint32_t vmid, uint64_t gpu_addr,
                        uint32_t *ib_cmd, uint32_t ib_len);
+
+       int (*get_vm_fault_info)(struct kgd_dev *kgd,
+                       struct kfd_vm_fault_info *info);
 };
 
 /**