io_uring: fix failed linkchain code logic
authorHao Xu <haoxu@linux.alibaba.com>
Fri, 27 Aug 2021 09:46:09 +0000 (17:46 +0800)
committerJens Axboe <axboe@kernel.dk>
Fri, 27 Aug 2021 13:27:24 +0000 (07:27 -0600)
Given a linkchain like this:
req0(link_flag)-->req1(link_flag)-->...-->reqn(no link_flag)

There is a problem:
 - if some intermediate linked req like req1 's submittion fails, reqs
   after it won't be cancelled.

   - sqpoll disabled: maybe it's ok since users can get the error info
     of req1 and stop submitting the following sqes.

   - sqpoll enabled: definitely a problem, the following sqes will be
     submitted in the next round.

The solution is to refactor the code logic to:
 - if a linked req's submittion fails, just mark it and the head(if it
   exists) as REQ_F_FAIL. Leverage req->result to indicate whether it
   is failed or cancelled.
 - submit or fail the whole chain when we come to the end of it.

Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20210827094609.36052-3-haoxu@linux.alibaba.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c

index 48cc632..1341b71 100644 (file)
@@ -1188,6 +1188,12 @@ static inline void req_set_fail(struct io_kiocb *req)
        req->flags |= REQ_F_FAIL;
 }
 
+static inline void req_fail_link_node(struct io_kiocb *req, int res)
+{
+       req_set_fail(req);
+       req->result = res;
+}
+
 static void io_ring_ctx_ref_free(struct percpu_ref *ref)
 {
        struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
@@ -1957,11 +1963,16 @@ static void io_fail_links(struct io_kiocb *req)
 
        req->link = NULL;
        while (link) {
+               long res = -ECANCELED;
+
+               if (link->flags & REQ_F_FAIL)
+                       res = link->result;
+
                nxt = link->link;
                link->link = NULL;
 
                trace_io_uring_fail_link(req, link);
-               io_cqring_fill_event(link->ctx, link->user_data, -ECANCELED, 0);
+               io_cqring_fill_event(link->ctx, link->user_data, res, 0);
                io_put_req_deferred(link);
                link = nxt;
        }
@@ -6622,8 +6633,10 @@ static inline void io_queue_sqe(struct io_kiocb *req)
        if (unlikely(req->ctx->drain_active) && io_drain_req(req))
                return;
 
-       if (likely(!(req->flags & REQ_F_FORCE_ASYNC))) {
+       if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
                __io_queue_sqe(req);
+       } else if (req->flags & REQ_F_FAIL) {
+               io_req_complete_failed(req, req->result);
        } else {
                int ret = io_req_prep_async(req);
 
@@ -6732,19 +6745,34 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
        ret = io_init_req(ctx, req, sqe);
        if (unlikely(ret)) {
 fail_req:
+               /* fail even hard links since we don't submit */
                if (link->head) {
-                       /* fail even hard links since we don't submit */
-                       io_req_complete_failed(link->head, -ECANCELED);
-                       link->head = NULL;
+                       /*
+                        * we can judge a link req is failed or cancelled by if
+                        * REQ_F_FAIL is set, but the head is an exception since
+                        * it may be set REQ_F_FAIL because of other req's failure
+                        * so let's leverage req->result to distinguish if a head
+                        * is set REQ_F_FAIL because of its failure or other req's
+                        * failure so that we can set the correct ret code for it.
+                        * init result here to avoid affecting the normal path.
+                        */
+                       if (!(link->head->flags & REQ_F_FAIL))
+                               req_fail_link_node(link->head, -ECANCELED);
+               } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
+                       /*
+                        * the current req is a normal req, we should return
+                        * error and thus break the submittion loop.
+                        */
+                       io_req_complete_failed(req, ret);
+                       return ret;
                }
-               io_req_complete_failed(req, ret);
-               return ret;
+               req_fail_link_node(req, ret);
+       } else {
+               ret = io_req_prep(req, sqe);
+               if (unlikely(ret))
+                       goto fail_req;
        }
 
-       ret = io_req_prep(req, sqe);
-       if (unlikely(ret))
-               goto fail_req;
-
        /* don't need @sqe from now on */
        trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data,
                                  req->flags, true,
@@ -6760,9 +6788,14 @@ fail_req:
        if (link->head) {
                struct io_kiocb *head = link->head;
 
-               ret = io_req_prep_async(req);
-               if (unlikely(ret))
-                       goto fail_req;
+               if (!(req->flags & REQ_F_FAIL)) {
+                       ret = io_req_prep_async(req);
+                       if (unlikely(ret)) {
+                               req_fail_link_node(req, ret);
+                               if (!(head->flags & REQ_F_FAIL))
+                                       req_fail_link_node(head, -ECANCELED);
+                       }
+               }
                trace_io_uring_link(ctx, req, head);
                link->last->link = req;
                link->last = req;