From fd1fa48b935f22c7d99713bf33846e14a6bb6ab9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Tue, 20 Jan 2026 12:57:21 +0100 Subject: [PATCH] drm/amdgpu: lock both VM and BO in amdgpu_gem_object_open MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The VM was not locked in the past since we initially only cleared the linked list element and not added it to any VM state. But this has changed quite some time ago, we just never realized this problem because the VM state lock was masking it. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 19 +++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 22 ++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 +++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++ 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 00ea69baa126..6893d175f7c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -878,6 +878,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, struct amdgpu_bo *bo[2] = {NULL, NULL}; struct amdgpu_bo_va *bo_va; bool same_hive = false; + struct drm_exec exec; int i, ret; if (!va) { @@ -958,19 +959,25 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, goto unwind; } - /* Add BO to VM internal data structures */ - ret = amdgpu_bo_reserve(bo[i], false); - if (ret) { - pr_debug("Unable to reserve BO during memory attach"); - goto unwind; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unwind; + ret = drm_exec_lock_obj(&exec, &bo[i]->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unwind; } + bo_va = amdgpu_vm_bo_find(vm, bo[i]); if (!bo_va) bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); else ++bo_va->ref_count; attachment[i]->bo_va = bo_va; - amdgpu_bo_unreserve(bo[i]); + drm_exec_fini(&exec); if (unlikely(!attachment[i]->bo_va)) { ret = -ENOMEM; pr_err("Failed to add BO object to VM. ret == %d\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 5f9fa2140f09..5c90de58cc28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -232,6 +232,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va *bo_va; struct mm_struct *mm; + struct drm_exec exec; int r; mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm); @@ -242,9 +243,18 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, !amdgpu_vm_is_bo_always_valid(vm, abo)) return -EPERM; - r = amdgpu_bo_reserve(abo, false); - if (r) - return r; + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + r = drm_exec_prepare_obj(&exec, &abo->tbo.base, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto out_unlock; + + r = amdgpu_vm_lock_pd(vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto out_unlock; + } amdgpu_vm_bo_update_shared(abo); bo_va = amdgpu_vm_bo_find(vm, abo); @@ -260,8 +270,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, amdgpu_bo_unreserve(abo); return r; } - - amdgpu_bo_unreserve(abo); + drm_exec_fini(&exec); /* Validate and add eviction fence to DMABuf imports with dynamic * attachment in compute VMs. Re-validation will be done by @@ -294,7 +303,10 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, } } mutex_unlock(&vm->process_info->lock); + return r; +out_unlock: + drm_exec_fini(&exec); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 1878e0faa722..f69332eed051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1445,6 +1445,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) { struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv; + struct drm_exec exec; int r, pasid; /* Ensure IB tests are run on ring */ @@ -1484,7 +1485,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) if (r) goto error_pasid; + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error_vm; + } + fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); + drm_exec_fini(&exec); if (!fpriv->prt_va) { r = -ENOMEM; goto error_vm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 31383583fc68..11597224d437 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1735,6 +1735,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, { struct amdgpu_bo_va *bo_va; + amdgpu_vm_assert_locked(vm); + bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL); if (bo_va == NULL) { return NULL; -- 2.30.2