drm: Don't free jobs in wait_event_interruptible()
authorSteven Price <steven.price@arm.com>
Fri, 25 Oct 2019 10:51:56 +0000 (11:51 +0100)
committerChristian König <christian.koenig@amd.com>
Fri, 25 Oct 2019 11:47:58 +0000 (13:47 +0200)
drm_sched_cleanup_jobs() attempts to free finished jobs, however because
it is called as the condition of wait_event_interruptible() it must not
sleep. Unfortunately some free callbacks (notably for Panfrost) do sleep.

Instead let's rename drm_sched_cleanup_jobs() to
drm_sched_get_cleanup_job() and simply return a job for processing if
there is one. The caller can then call the free_job() callback outside
the wait_event_interruptible() where sleeping is possible before
re-checking and returning to sleep if necessary.

Tested-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Fixes: 5918045c4ed4 ("drm/scheduler: rework job destruction")
Signed-off-by: Steven Price <steven.price@arm.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/337652/
drivers/gpu/drm/scheduler/sched_main.c

index 9a0ee74..d4cc728 100644 (file)
@@ -622,43 +622,41 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 }
 
 /**
- * drm_sched_cleanup_jobs - destroy finished jobs
+ * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
  *
  * @sched: scheduler instance
  *
- * Remove all finished jobs from the mirror list and destroy them.
+ * Returns the next finished job from the mirror list (if there is one)
+ * ready for it to be destroyed.
  */
-static void drm_sched_cleanup_jobs(struct drm_gpu_scheduler *sched)
+static struct drm_sched_job *
+drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 {
+       struct drm_sched_job *job;
        unsigned long flags;
 
        /* Don't destroy jobs while the timeout worker is running */
        if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
            !cancel_delayed_work(&sched->work_tdr))
-               return;
-
+               return NULL;
 
-       while (!list_empty(&sched->ring_mirror_list)) {
-               struct drm_sched_job *job;
+       spin_lock_irqsave(&sched->job_list_lock, flags);
 
-               job = list_first_entry(&sched->ring_mirror_list,
+       job = list_first_entry_or_null(&sched->ring_mirror_list,
                                       struct drm_sched_job, node);
-               if (!dma_fence_is_signaled(&job->s_fence->finished))
-                       break;
 
-               spin_lock_irqsave(&sched->job_list_lock, flags);
+       if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
                /* remove job from ring_mirror_list */
                list_del_init(&job->node);
-               spin_unlock_irqrestore(&sched->job_list_lock, flags);
-
-               sched->ops->free_job(job);
+       } else {
+               job = NULL;
+               /* queue timeout for next job */
+               drm_sched_start_timeout(sched);
        }
 
-       /* queue timeout for next job */
-       spin_lock_irqsave(&sched->job_list_lock, flags);
-       drm_sched_start_timeout(sched);
        spin_unlock_irqrestore(&sched->job_list_lock, flags);
 
+       return job;
 }
 
 /**
@@ -698,12 +696,19 @@ static int drm_sched_main(void *param)
                struct drm_sched_fence *s_fence;
                struct drm_sched_job *sched_job;
                struct dma_fence *fence;
+               struct drm_sched_job *cleanup_job = NULL;
 
                wait_event_interruptible(sched->wake_up_worker,
-                                        (drm_sched_cleanup_jobs(sched),
+                                        (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
                                         (!drm_sched_blocked(sched) &&
                                          (entity = drm_sched_select_entity(sched))) ||
-                                        kthread_should_stop()));
+                                        kthread_should_stop());
+
+               if (cleanup_job) {
+                       sched->ops->free_job(cleanup_job);
+                       /* queue timeout for next job */
+                       drm_sched_start_timeout(sched);
+               }
 
                if (!entity)
                        continue;