drm/amdkfd: Fix eviction fence handling
authorFelix Kuehling <felix.kuehling@amd.com>
Thu, 18 Apr 2024 01:13:59 +0000 (21:13 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 19 Apr 2024 03:54:48 +0000 (23:54 -0400)
Handle case that dma_fence_get_rcu_safe returns NULL.

If restore work is already scheduled, only update its timer. The same
work item cannot be queued twice, so undo the extra queue eviction.

Fixes: 9a1c1339abf9 ("drm/amdkfd: Run restore_workers on freezable WQs")
Signed-off-by: Felix Kuehling <felix.kuehling@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Tested-by: Gang BA <Gang.Ba@amd.com>
Reviewed-by: Gang BA <Gang.Ba@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_process.c

index b799864..aafdf06 100644 (file)
@@ -1922,6 +1922,8 @@ static int signal_eviction_fence(struct kfd_process *p)
        rcu_read_lock();
        ef = dma_fence_get_rcu_safe(&p->ef);
        rcu_read_unlock();
+       if (!ef)
+               return -EINVAL;
 
        ret = dma_fence_signal(ef);
        dma_fence_put(ef);
@@ -1949,10 +1951,9 @@ static void evict_process_worker(struct work_struct *work)
                 * they are responsible stopping the queues and scheduling
                 * the restore work.
                 */
-               if (!signal_eviction_fence(p))
-                       queue_delayed_work(kfd_restore_wq, &p->restore_work,
-                               msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
-               else
+               if (signal_eviction_fence(p) ||
+                   mod_delayed_work(kfd_restore_wq, &p->restore_work,
+                                    msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
                        kfd_process_restore_queues(p);
 
                pr_debug("Finished evicting pasid 0x%x\n", p->pasid);