gpu: host1x: Implement job tracking using DMA fences
authorMikko Perttunen <mperttunen@nvidia.com>
Thu, 19 Jan 2023 13:09:19 +0000 (15:09 +0200)
committerThierry Reding <treding@nvidia.com>
Thu, 26 Jan 2023 14:55:38 +0000 (15:55 +0100)
In anticipation of removal of the intr API, implement job tracking
using DMA fences instead. The main two things about this are
making cdma_update schedule the work since fence completion can
now be called from interrupt context, and some complication in
ensuring the callback is not running when we free the fence.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
drivers/gpu/host1x/cdma.c
drivers/gpu/host1x/cdma.h
drivers/gpu/host1x/hw/channel_hw.c
drivers/gpu/host1x/job.c
include/linux/host1x.h

index 103fda0..bc821b0 100644 (file)
@@ -490,6 +490,15 @@ resume:
        host1x_hw_cdma_resume(host1x, cdma, restart_addr);
 }
 
+static void cdma_update_work(struct work_struct *work)
+{
+       struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, update_work);
+
+       mutex_lock(&cdma->lock);
+       update_cdma_locked(cdma);
+       mutex_unlock(&cdma->lock);
+}
+
 /*
  * Create a cdma
  */
@@ -499,6 +508,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma)
 
        mutex_init(&cdma->lock);
        init_completion(&cdma->complete);
+       INIT_WORK(&cdma->update_work, cdma_update_work);
 
        INIT_LIST_HEAD(&cdma->sync_queue);
 
@@ -679,7 +689,5 @@ void host1x_cdma_end(struct host1x_cdma *cdma,
  */
 void host1x_cdma_update(struct host1x_cdma *cdma)
 {
-       mutex_lock(&cdma->lock);
-       update_cdma_locked(cdma);
-       mutex_unlock(&cdma->lock);
+       schedule_work(&cdma->update_work);
 }
index 12c4327..7fd8168 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/completion.h>
 #include <linux/list.h>
+#include <linux/workqueue.h>
 
 struct host1x_syncpt;
 struct host1x_userctx_timeout;
@@ -69,6 +70,7 @@ struct host1x_cdma {
        struct buffer_timeout timeout;  /* channel's timeout state/wq */
        bool running;
        bool torndown;
+       struct work_struct update_work;
 };
 
 #define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma)
index 732abe0..8a3119f 100644 (file)
@@ -278,6 +278,14 @@ static void channel_program_cdma(struct host1x_job *job)
 #endif
 }
 
+static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+       struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb);
+
+       /* Schedules CDMA update. */
+       host1x_cdma_update(&job->channel->cdma);
+}
+
 static int channel_submit(struct host1x_job *job)
 {
        struct host1x_channel *ch = job->channel;
@@ -285,7 +293,6 @@ static int channel_submit(struct host1x_job *job)
        u32 prev_max = 0;
        u32 syncval;
        int err;
-       struct host1x_waitlist *completed_waiter = NULL;
        struct host1x *host = dev_get_drvdata(ch->dev->parent);
 
        trace_host1x_channel_submit(dev_name(ch->dev),
@@ -298,14 +305,7 @@ static int channel_submit(struct host1x_job *job)
        /* get submit lock */
        err = mutex_lock_interruptible(&ch->submitlock);
        if (err)
-               goto error;
-
-       completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL);
-       if (!completed_waiter) {
-               mutex_unlock(&ch->submitlock);
-               err = -ENOMEM;
-               goto error;
-       }
+               return err;
 
        host1x_channel_set_streamid(ch);
        host1x_enable_gather_filter(ch);
@@ -315,31 +315,37 @@ static int channel_submit(struct host1x_job *job)
        err = host1x_cdma_begin(&ch->cdma, job);
        if (err) {
                mutex_unlock(&ch->submitlock);
-               goto error;
+               return err;
        }
 
        channel_program_cdma(job);
        syncval = host1x_syncpt_read_max(sp);
 
+       /*
+        * Create fence before submitting job to HW to avoid job completing
+        * before the fence is set up.
+        */
+       job->fence = host1x_fence_create(sp, syncval);
+       if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) {
+               job->fence = NULL;
+       } else {
+               err = dma_fence_add_callback(job->fence, &job->fence_cb,
+                                            job_complete_callback);
+       }
+
        /* end CDMA submit & stash pinned hMems into sync queue */
        host1x_cdma_end(&ch->cdma, job);
 
        trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval);
 
-       /* schedule a submit complete interrupt */
-       err = host1x_intr_add_action(host, sp, syncval,
-                                    HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch,
-                                    completed_waiter, &job->waiter);
-       completed_waiter = NULL;
-       WARN(err, "Failed to set submit complete interrupt");
-
        mutex_unlock(&ch->submitlock);
 
-       return 0;
+       if (err == -ENOENT)
+               host1x_cdma_update(&ch->cdma);
+       else
+               WARN(err, "Failed to set submit complete interrupt");
 
-error:
-       kfree(completed_waiter);
-       return err;
+       return 0;
 }
 
 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
index b2761aa..3ed49e1 100644 (file)
@@ -88,9 +88,15 @@ static void job_free(struct kref *ref)
        if (job->release)
                job->release(job);
 
-       if (job->waiter)
-               host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
-                                   job->waiter, false);
+       if (job->fence) {
+               /*
+                * remove_callback is atomic w.r.t. fence signaling, so
+                * after the call returns, we know that the callback is not
+                * in execution, and the fence can be safely freed.
+                */
+               dma_fence_remove_callback(job->fence, &job->fence_cb);
+               dma_fence_put(job->fence);
+       }
 
        if (job->syncpt)
                host1x_syncpt_put(job->syncpt);
index dc55d9d..db6cf6f 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/device.h>
 #include <linux/dma-direction.h>
+#include <linux/dma-fence.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
@@ -288,8 +289,9 @@ struct host1x_job {
        u32 syncpt_incrs;
        u32 syncpt_end;
 
-       /* Completion waiter ref */
-       void *waiter;
+       /* Completion fence for job tracking */
+       struct dma_fence *fence;
+       struct dma_fence_cb fence_cb;
 
        /* Maximum time to wait for this job */
        unsigned int timeout;