habanalabs: wait again for multi-CS if no CS completed
authorOhad Sharabi <osharabi@habana.ai>
Wed, 1 Dec 2021 08:52:27 +0000 (10:52 +0200)
committerOded Gabbay <ogabbay@kernel.org>
Sun, 26 Dec 2021 06:59:08 +0000 (08:59 +0200)
The original multi-CS design assumption that stream masters are used
exclusively (i.e. multi-CS with set of stream master QIDs will not get
completed by CS not from the multi-CS set) is inaccurate.

Thus multi-CS behavior is now modified not to treat such case as an
error.

Instead, if we have multi-CS completion but we detect that no CS from
the list is actually completed we will do another multi-CS wait (with
modified timeout).

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/command_submission.c
drivers/misc/habanalabs/common/habanalabs.h

index a63ebbc..f58fff3 100644 (file)
@@ -545,13 +545,6 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
                         * mcs fences.
                         */
                        fence->mcs_handling_done = true;
-                       /*
-                        * Since CS (and its related fence) can be associated with only one
-                        * multi CS context, once it triggered multi CS completion no need to
-                        * continue checking other multi CS contexts.
-                        */
-                       spin_unlock(&mcs_compl->lock);
-                       break;
                }
 
                spin_unlock(&mcs_compl->lock);
@@ -2498,6 +2491,21 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
        return rc;
 }
 
+static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
+{
+       if (usecs <= U32_MAX)
+               return usecs_to_jiffies(usecs);
+
+       /*
+        * If the value in nanoseconds is larger than 64 bit, use the largest
+        * 64 bit value.
+        */
+       if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
+               return nsecs_to_jiffies(U64_MAX);
+
+       return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
+}
+
 /*
  * hl_wait_multi_cs_completion_init - init completion structure
  *
@@ -2534,8 +2542,7 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
        }
 
        if (i == MULTI_CS_MAX_USER_CTX) {
-               dev_err(hdev->dev,
-                               "no available multi-CS completion structure\n");
+               dev_err(hdev->dev, "no available multi-CS completion structure\n");
                return ERR_PTR(-ENOMEM);
        }
        return mcs_compl;
@@ -2566,27 +2573,18 @@ static void hl_wait_multi_cs_completion_fini(
  *
  * @return 0 on success, otherwise non 0 error code
  */
-static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data)
+static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
+                                               struct multi_cs_completion *mcs_compl)
 {
-       struct hl_device *hdev = mcs_data->ctx->hdev;
-       struct multi_cs_completion *mcs_compl;
        long completion_rc;
 
-       mcs_compl = hl_wait_multi_cs_completion_init(hdev,
-                                       mcs_data->stream_master_qid_map);
-       if (IS_ERR(mcs_compl))
-               return PTR_ERR(mcs_compl);
-
-       completion_rc = wait_for_completion_interruptible_timeout(
-                                       &mcs_compl->completion,
-                                       usecs_to_jiffies(mcs_data->timeout_us));
+       completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
+                                                                       mcs_data->timeout_jiffies);
 
        /* update timestamp */
        if (completion_rc > 0)
                mcs_data->timestamp = mcs_compl->timestamp;
 
-       hl_wait_multi_cs_completion_fini(mcs_compl);
-
        mcs_data->wait_status = completion_rc;
 
        return 0;
@@ -2619,6 +2617,7 @@ void hl_multi_cs_completion_init(struct hl_device *hdev)
  */
 static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 {
+       struct multi_cs_completion *mcs_compl;
        struct hl_device *hdev = hpriv->hdev;
        struct multi_cs_data mcs_data = {0};
        union hl_wait_cs_args *args = data;
@@ -2686,12 +2685,19 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
                goto put_ctx;
 
        /* wait (with timeout) for the first CS to be completed */
-       mcs_data.timeout_us = args->in.timeout_us;
-       rc = hl_wait_multi_cs_completion(&mcs_data);
-       if (rc)
+       mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
+
+       mcs_compl = hl_wait_multi_cs_completion_init(hdev, mcs_data.stream_master_qid_map);
+       if (IS_ERR(mcs_compl)) {
+               rc = PTR_ERR(mcs_compl);
                goto put_ctx;
+       }
+
+       while (true) {
+               rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
+               if (rc || (mcs_data.wait_status == 0))
+                       break;
 
-       if (mcs_data.wait_status > 0) {
                /*
                 * poll fences once again to update the CS map.
                 * no timestamp should be updated this time.
@@ -2699,18 +2705,26 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
                mcs_data.update_ts = false;
                rc = hl_cs_poll_fences(&mcs_data);
 
+               if (mcs_data.completion_bitmap)
+                       break;
+
                /*
                 * if hl_wait_multi_cs_completion returned before timeout (i.e.
-                * it got a completion) we expect to see at least one CS
-                * completed after the poll function.
+                * it got a completion) it either got completed by CS in the multi CS list
+                * (in which case the indication will be non empty completion_bitmap) or it
+                * got completed by CS submitted to one of the shared stream master but
+                * not in the multi CS list (in which case we should wait again but reinit
+                * the completion, modify the timeout and set timestamp as zero to let a CS
+                * related to the current multi-CS set a new, relevant, timestamp)
                 */
-               if (!mcs_data.completion_bitmap) {
-                       dev_warn_ratelimited(hdev->dev,
-                               "Multi-CS got completion on wait but no CS completed\n");
-                       rc = -EFAULT;
-               }
+               /* wait again with modified timeout */
+               mcs_data.timeout_jiffies = mcs_data.wait_status;
+               reinit_completion(&mcs_compl->completion);
+               mcs_compl->timestamp = 0;
        }
 
+       hl_wait_multi_cs_completion_fini(mcs_compl);
+
 put_ctx:
        hl_ctx_put(ctx);
        kfree(fence_arr);
@@ -2741,7 +2755,7 @@ free_seq_arr:
                }
 
                /* update if some CS was gone */
-               if (mcs_data.timestamp)
+               if (!mcs_data.timestamp)
                        args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
        } else {
                args->out.status = HL_WAIT_CS_STATUS_BUSY;
@@ -2807,21 +2821,6 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
        return 0;
 }
 
-static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
-{
-       if (usecs <= U32_MAX)
-               return usecs_to_jiffies(usecs);
-
-       /*
-        * If the value in nanoseconds is larger than 64 bit, use the largest
-        * 64 bit value.
-        */
-       if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
-               return nsecs_to_jiffies(U64_MAX);
-
-       return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
-}
-
 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
                                u64 timeout_us, u64 user_address,
                                u64 target_value, struct hl_user_interrupt *interrupt,
index df19359..eda1c70 100644 (file)
@@ -2362,7 +2362,7 @@ struct multi_cs_completion {
  * @ctx: pointer to the context structure
  * @fence_arr: array of fences of all CSs
  * @seq_arr: array of CS sequence numbers
- * @timeout_us: timeout in usec for waiting for CS to complete
+ * @timeout_jiffies: timeout in jiffies for waiting for CS to complete
  * @timestamp: timestamp of first completed CS
  * @wait_status: wait for CS status
  * @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
@@ -2376,7 +2376,7 @@ struct multi_cs_data {
        struct hl_ctx   *ctx;
        struct hl_fence **fence_arr;
        u64             *seq_arr;
-       s64             timeout_us;
+       s64             timeout_jiffies;
        s64             timestamp;
        long            wait_status;
        u32             completion_bitmap;