drm/xe/bo: don't hold dma-resv lock over drm_gem_handle_create
authorMatthew Auld <matthew.auld@intel.com>
Mon, 9 Oct 2023 09:00:38 +0000 (10:00 +0100)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Thu, 21 Dec 2023 16:44:39 +0000 (11:44 -0500)
This seems to create a locking inversion with object_name_lock. The lock
is held by drm_prime_fd_to_handle when calling our xe_gem_prime_import
hook, which might eventually go on to grab the dma-resv lock during the
attach. However we also have the opposite locking order in
xe_gem_create_ioctl which is holding the dma-resv lock when calling
drm_gem_handle_create, which wants to eventually grab object_name_lock:

-> #1 (reservation_ww_class_mutex){+.+.}-{3:3}:
<4> [635.739288]        lock_acquire+0x169/0x3d0
<4> [635.739294]        __ww_mutex_lock.constprop.0+0x164/0x1e60
<4> [635.739300]        ww_mutex_lock_interruptible+0x42/0x1a0
<4> [635.739305]        drm_gem_shmem_pin+0x4b/0x140 [drm_shmem_helper]
<4> [635.739317]        dma_buf_dynamic_attach+0x101/0x430
<4> [635.739323]        xe_gem_prime_import+0xcc/0x2e0 [xe]
<4> [635.739499]        drm_prime_fd_to_handle_ioctl+0x184/0x2e0 [drm]
<4> [635.739594]        drm_ioctl_kernel+0x16f/0x250 [drm]
<4> [635.739693]        drm_ioctl+0x35e/0x620 [drm]
<4> [635.739789]        __x64_sys_ioctl+0xb7/0xf0
<4> [635.739794]        do_syscall_64+0x3c/0x90
<4> [635.739799]        entry_SYSCALL_64_after_hwframe+0x6e/0xd8
<4> [635.739805]
-> #0 (&dev->object_name_lock){+.+.}-{3:3}:
<4> [635.739813]        check_prev_add+0x1ba/0x14a0
<4> [635.739818]        __lock_acquire+0x203e/0x2ff0
<4> [635.739823]        lock_acquire+0x169/0x3d0
<4> [635.739827]        __mutex_lock+0x124/0x1310
<4> [635.739832]        drm_gem_handle_create+0x32/0x50 [drm]
<4> [635.739927]        xe_gem_create_ioctl+0x1d3/0x550 [xe]
<4> [635.740102]        drm_ioctl_kernel+0x16f/0x250 [drm]
<4> [635.740197]        drm_ioctl+0x35e/0x620 [drm]
<4> [635.740293]        __x64_sys_ioctl+0xb7/0xf0
<4> [635.740297]        do_syscall_64+0x3c/0x90
<4> [635.740302]        entry_SYSCALL_64_after_hwframe+0x6e/0xd8
<4> [635.740307]

It looks like it should be safe to simply drop the dma-resv lock prior
to publishing the object when calling drm_gem_handle_create.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/743
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
drivers/gpu/drm/xe/xe_bo.c

index e8c89b6..835eab6 100644 (file)
@@ -1856,14 +1856,16 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
                if (XE_IOCTL_DBG(xe, !vm))
                        return -ENOENT;
                err = xe_vm_lock(vm, true);
-               if (err) {
-                       xe_vm_put(vm);
-                       return err;
-               }
+               if (err)
+                       goto out_vm;
        }
 
        bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device,
                          bo_flags);
+
+       if (vm)
+               xe_vm_unlock(vm);
+
        if (IS_ERR(bo)) {
                err = PTR_ERR(bo);
                goto out_vm;
@@ -1877,15 +1879,17 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
        goto out_put;
 
 out_bulk:
-       if (vm && !xe_vm_in_fault_mode(vm))
+       if (vm && !xe_vm_in_fault_mode(vm)) {
+               xe_vm_lock(vm, false);
                __xe_bo_unset_bulk_move(bo);
+               xe_vm_unlock(vm);
+       }
 out_put:
        xe_bo_put(bo);
 out_vm:
-       if (vm) {
-               xe_vm_unlock(vm);
+       if (vm)
                xe_vm_put(vm);
-       }
+
        return err;
 }