Revert "drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute" for Raven
authorJesse Zhang <Jesse.Zhang@amd.com>
Thu, 29 Feb 2024 06:00:14 +0000 (14:00 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 4 Mar 2024 20:58:45 +0000 (15:58 -0500)
fix the issue:
"amdgpu: Failed to create process VM object".

[Why]when amdgpu initialized, seq64 do mampping and update bo mapping in vm page table.
But when clifo run. It also initializes a vm for a process device through the function kfd_process_device_init_vm and ensure the root PD is clean through the function amdgpu_vm_pt_is_root_clean.
So they have a conflict, and clinfo  always failed.

v1:
  - remove all the pte_supports_ats stuff from the amdgpu_vm code (Felix)

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c

index ed4a8c5..d004ace 100644 (file)
@@ -1385,10 +1385,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
                        struct amdgpu_bo_va_mapping, list);
                list_del(&mapping->list);
 
-               if (vm->pte_support_ats &&
-                   mapping->start < AMDGPU_GMC_HOLE_START)
-                       init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
-
                r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
                                           resv, mapping->start, mapping->last,
                                           init_pte_value, 0, 0, NULL, NULL,
@@ -2264,7 +2260,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        if (r)
                return r;
 
-       vm->pte_support_ats = false;
        vm->is_compute_context = false;
 
        vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
@@ -2350,30 +2345,12 @@ error_free_delayed:
  */
 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
-       bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
        int r;
 
        r = amdgpu_bo_reserve(vm->root.bo, true);
        if (r)
                return r;
 
-       /* Check if PD needs to be reinitialized and do it before
-        * changing any other state, in case it fails.
-        */
-       if (pte_support_ats != vm->pte_support_ats) {
-               /* Sanity checks */
-               if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
-                       r = -EINVAL;
-                       goto unreserve_bo;
-               }
-
-               vm->pte_support_ats = pte_support_ats;
-               r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
-                                      false);
-               if (r)
-                       goto unreserve_bo;
-       }
-
        /* Update VM state */
        vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
                                    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
index 42f6dde..9f6b5e1 100644 (file)
@@ -357,9 +357,6 @@ struct amdgpu_vm {
        /* Functions to use for VM table updates */
        const struct amdgpu_vm_update_funcs     *update_funcs;
 
-       /* Flag to indicate ATS support from PTE for GFX9 */
-       bool                    pte_support_ats;
-
        /* Up to 128 pending retry page faults */
        DECLARE_KFIFO(faults, u64, 128);
 
index a160265..2835cb3 100644 (file)
@@ -89,22 +89,6 @@ static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
        return AMDGPU_VM_PTE_COUNT(adev);
 }
 
-/**
- * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD
- *
- * @adev: amdgpu_device pointer
- *
- * Returns:
- * The number of entries in the root page directory which needs the ATS setting.
- */
-static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev)
-{
-       unsigned int shift;
-
-       shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
-       return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
-}
-
 /**
  * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
  *
@@ -379,7 +363,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        struct ttm_operation_ctx ctx = { true, false };
        struct amdgpu_vm_update_params params;
        struct amdgpu_bo *ancestor = &vmbo->bo;
-       unsigned int entries, ats_entries;
+       unsigned int entries;
        struct amdgpu_bo *bo = &vmbo->bo;
        uint64_t addr;
        int r, idx;
@@ -394,27 +378,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        }
 
        entries = amdgpu_bo_size(bo) / 8;
-       if (!vm->pte_support_ats) {
-               ats_entries = 0;
-
-       } else if (!bo->parent) {
-               ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
-               ats_entries = min(ats_entries, entries);
-               entries -= ats_entries;
-
-       } else {
-               struct amdgpu_vm_bo_base *pt;
-
-               pt = ancestor->vm_bo;
-               ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
-               if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >=
-                   ats_entries) {
-                       ats_entries = 0;
-               } else {
-                       ats_entries = entries;
-                       entries = 0;
-               }
-       }
 
        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
        if (r)
@@ -445,23 +408,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                goto exit;
 
        addr = 0;
-       if (ats_entries) {
-               uint64_t value = 0, flags;
-
-               flags = AMDGPU_PTE_DEFAULT_ATC;
-               if (level != AMDGPU_VM_PTB) {
-                       /* Handle leaf PDEs as PTEs */
-                       flags |= AMDGPU_PDE_PTE;
-                       amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
-               }
-
-               r = vm->update_funcs->update(&params, vmbo, addr, 0,
-                                            ats_entries, value, flags);
-               if (r)
-                       goto exit;
-
-               addr += ats_entries * 8;
-       }
 
        if (entries) {
                uint64_t value = 0, flags = 0;