media: c8sectpfe: Clean up handling of *_buffer_aligned

[linux-2.6-microblaze.git] / fs / io_uring.c
diff --git a/fs/io_uring.c b/fs/io_uring.c

index fd8a1ff..d3ee4fc 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -112,7 +112,8 @@
                         IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS)
  
  #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
-                               REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA)
+                               REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \
+                               REQ_F_ASYNC_DATA)
  
  #define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
                                  IO_REQ_CLEAN_FLAGS)
@@ -297,8 +298,8 @@ struct io_buffer_list {
         /* below is for ring provided buffers */
         __u16 buf_nr_pages;
         __u16 nr_entries;
-       __u32 head;
-       __u32 mask;
+       __u16 head;
+       __u16 mask;
  };
  
  struct io_buffer {
@@ -540,6 +541,7 @@ struct io_uring_task {
         const struct io_ring_ctx *last;
         struct io_wq            *io_wq;
         struct percpu_counter   inflight;
+       atomic_t                inflight_tracked;
         atomic_t                in_idle;
  
         spinlock_t              task_lock;
@@ -574,7 +576,6 @@ struct io_close {
         struct file                     *file;
         int                             fd;
         u32                             file_slot;
-       u32                             flags;
  };
  
  struct io_timeout_data {
@@ -782,12 +783,6 @@ struct io_msg {
         u32 len;
  };
  
-struct io_nop {
-       struct file                     *file;
-       u64                             extra1;
-       u64                             extra2;
-};
-
  struct io_async_connect {
         struct sockaddr_storage         address;
  };
@@ -849,6 +844,7 @@ enum {
         REQ_F_SINGLE_POLL_BIT,
         REQ_F_DOUBLE_POLL_BIT,
         REQ_F_PARTIAL_IO_BIT,
+       REQ_F_CQE32_INIT_BIT,
         REQ_F_APOLL_MULTISHOT_BIT,
         /* keep async read/write and isreg together and in order */
         REQ_F_SUPPORT_NOWAIT_BIT,
@@ -918,6 +914,8 @@ enum {
         REQ_F_PARTIAL_IO        = BIT(REQ_F_PARTIAL_IO_BIT),
         /* fast poll multishot mode */
         REQ_F_APOLL_MULTISHOT   = BIT(REQ_F_APOLL_MULTISHOT_BIT),
+       /* ->extra1 and ->extra2 are initialised */
+       REQ_F_CQE32_INIT        = BIT(REQ_F_CQE32_INIT_BIT),
  };
  
  struct async_poll {
@@ -992,7 +990,6 @@ struct io_kiocb {
                 struct io_msg           msg;
                 struct io_xattr         xattr;
                 struct io_socket        sock;
-               struct io_nop           nop;
                 struct io_uring_cmd     uring_cmd;
         };
  
@@ -1119,7 +1116,6 @@ static const struct io_op_def io_op_defs[] = {
         [IORING_OP_NOP] = {
                 .audit_skip             = 1,
                 .iopoll                 = 1,
-               .buffer_select          = 1,
         },
         [IORING_OP_READV] = {
                 .needs_file             = 1,
@@ -1356,8 +1352,6 @@ static void io_clean_op(struct io_kiocb *req);
  static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
                                              unsigned issue_flags);
  static struct file *io_file_get_normal(struct io_kiocb *req, int fd);
-static void io_drop_inflight_file(struct io_kiocb *req);
-static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags);
  static void io_queue_sqe(struct io_kiocb *req);
  static void io_rsrc_put_work(struct work_struct *work);
  
@@ -1729,9 +1723,16 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
  
         if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
                 return;
-       /* don't recycle if we already did IO to this buffer */
-       if (req->flags & REQ_F_PARTIAL_IO)
+       /*
+        * For legacy provided buffer mode, don't recycle if we already did
+        * IO to this buffer. For ring-mapped provided buffer mode, we should
+        * increment ring->head to explicitly monopolize the buffer to avoid
+        * multiple use.
+        */
+       if ((req->flags & REQ_F_BUFFER_SELECTED) &&
+           (req->flags & REQ_F_PARTIAL_IO))
                 return;
+
         /*
          * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
          * the flag and hence ensure that bl->head doesn't get incremented.
@@ -1739,8 +1740,13 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
          */
         if (req->flags & REQ_F_BUFFER_RING) {
                 if (req->buf_list) {
-                       req->buf_index = req->buf_list->bgid;
-                       req->flags &= ~REQ_F_BUFFER_RING;
+                       if (req->flags & REQ_F_PARTIAL_IO) {
+                               req->buf_list->head++;
+                               req->buf_list = NULL;
+                       } else {
+                               req->buf_index = req->buf_list->bgid;
+                               req->flags &= ~REQ_F_BUFFER_RING;
+                       }
                 }
                 return;
         }
@@ -1760,9 +1766,29 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
                           bool cancel_all)
         __must_hold(&req->ctx->timeout_lock)
  {
+       struct io_kiocb *req;
+
         if (task && head->task != task)
                 return false;
-       return cancel_all;
+       if (cancel_all)
+               return true;
+
+       io_for_each_link(req, head) {
+               if (req->flags & REQ_F_INFLIGHT)
+                       return true;
+       }
+       return false;
+}
+
+static bool io_match_linked(struct io_kiocb *head)
+{
+       struct io_kiocb *req;
+
+       io_for_each_link(req, head) {
+               if (req->flags & REQ_F_INFLIGHT)
+                       return true;
+       }
+       return false;
  }
  
  /*
@@ -1772,9 +1798,24 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
  static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
                                bool cancel_all)
  {
+       bool matched;
+
         if (task && head->task != task)
                 return false;
-       return cancel_all;
+       if (cancel_all)
+               return true;
+
+       if (head->flags & REQ_F_LINK_TIMEOUT) {
+               struct io_ring_ctx *ctx = head->ctx;
+
+               /* protect against races with linked timeouts */
+               spin_lock_irq(&ctx->timeout_lock);
+               matched = io_match_linked(head);
+               spin_unlock_irq(&ctx->timeout_lock);
+       } else {
+               matched = io_match_linked(head);
+       }
+       return matched;
  }
  
  static inline bool req_has_async_data(struct io_kiocb *req)
@@ -1930,6 +1971,14 @@ static inline bool io_req_ffs_set(struct io_kiocb *req)
         return req->flags & REQ_F_FIXED_FILE;
  }
  
+static inline void io_req_track_inflight(struct io_kiocb *req)
+{
+       if (!(req->flags & REQ_F_INFLIGHT)) {
+               req->flags |= REQ_F_INFLIGHT;
+               atomic_inc(&current->io_uring->inflight_tracked);
+       }
+}
+
  static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
  {
         if (WARN_ON_ONCE(!req->link))
@@ -2398,94 +2447,66 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
         return true;
  }
  
-static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
-                                s32 res, u32 cflags)
+static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
+                                    struct io_kiocb *req)
  {
         struct io_uring_cqe *cqe;
  
-       /*
-        * If we can't get a cq entry, userspace overflowed the
-        * submission (by quite a lot). Increment the overflow count in
-        * the ring.
-        */
-       cqe = io_get_cqe(ctx);
-       if (likely(cqe)) {
-               WRITE_ONCE(cqe->user_data, user_data);
-               WRITE_ONCE(cqe->res, res);
-               WRITE_ONCE(cqe->flags, cflags);
-               return true;
-       }
-       return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
-}
+       if (!(ctx->flags & IORING_SETUP_CQE32)) {
+               trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+                                       req->cqe.res, req->cqe.flags, 0, 0);
  
-static inline bool __io_fill_cqe_req_filled(struct io_ring_ctx *ctx,
-                                           struct io_kiocb *req)
-{
-       struct io_uring_cqe *cqe;
+               /*
+                * If we can't get a cq entry, userspace overflowed the
+                * submission (by quite a lot). Increment the overflow count in
+                * the ring.
+                */
+               cqe = io_get_cqe(ctx);
+               if (likely(cqe)) {
+                       memcpy(cqe, &req->cqe, sizeof(*cqe));
+                       return true;
+               }
  
-       trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
-                               req->cqe.res, req->cqe.flags, 0, 0);
+               return io_cqring_event_overflow(ctx, req->cqe.user_data,
+                                               req->cqe.res, req->cqe.flags,
+                                               0, 0);
+       } else {
+               u64 extra1 = 0, extra2 = 0;
  
-       /*
-        * If we can't get a cq entry, userspace overflowed the
-        * submission (by quite a lot). Increment the overflow count in
-        * the ring.
-        */
-       cqe = io_get_cqe(ctx);
-       if (likely(cqe)) {
-               memcpy(cqe, &req->cqe, sizeof(*cqe));
-               return true;
-       }
-       return io_cqring_event_overflow(ctx, req->cqe.user_data,
-                                       req->cqe.res, req->cqe.flags, 0, 0);
-}
+               if (req->flags & REQ_F_CQE32_INIT) {
+                       extra1 = req->extra1;
+                       extra2 = req->extra2;
+               }
  
-static inline bool __io_fill_cqe32_req_filled(struct io_ring_ctx *ctx,
-                                             struct io_kiocb *req)
-{
-       struct io_uring_cqe *cqe;
-       u64 extra1 = req->extra1;
-       u64 extra2 = req->extra2;
+               trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+                                       req->cqe.res, req->cqe.flags, extra1, extra2);
  
-       trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
-                               req->cqe.res, req->cqe.flags, extra1, extra2);
+               /*
+                * If we can't get a cq entry, userspace overflowed the
+                * submission (by quite a lot). Increment the overflow count in
+                * the ring.
+                */
+               cqe = io_get_cqe(ctx);
+               if (likely(cqe)) {
+                       memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
+                       WRITE_ONCE(cqe->big_cqe[0], extra1);
+                       WRITE_ONCE(cqe->big_cqe[1], extra2);
+                       return true;
+               }
  
-       /*
-        * If we can't get a cq entry, userspace overflowed the
-        * submission (by quite a lot). Increment the overflow count in
-        * the ring.
-        */
-       cqe = io_get_cqe(ctx);
-       if (likely(cqe)) {
-               memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
-               cqe->big_cqe[0] = extra1;
-               cqe->big_cqe[1] = extra2;
-               return true;
+               return io_cqring_event_overflow(ctx, req->cqe.user_data,
+                               req->cqe.res, req->cqe.flags,
+                               extra1, extra2);
         }
-
-       return io_cqring_event_overflow(ctx, req->cqe.user_data, req->cqe.res,
-                                       req->cqe.flags, extra1, extra2);
  }
  
-static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
-{
-       trace_io_uring_complete(req->ctx, req, req->cqe.user_data, res, cflags, 0, 0);
-       return __io_fill_cqe(req->ctx, req->cqe.user_data, res, cflags);
-}
-
-static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags,
-                               u64 extra1, u64 extra2)
+static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
+                                    s32 res, u32 cflags)
  {
-       struct io_ring_ctx *ctx = req->ctx;
         struct io_uring_cqe *cqe;
  
-       if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_CQE32)))
-               return;
-       if (req->flags & REQ_F_CQE_SKIP)
-               return;
-
-       trace_io_uring_complete(ctx, req, req->cqe.user_data, res, cflags,
-                               extra1, extra2);
+       ctx->cq_extra++;
+       trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
  
         /*
          * If we can't get a cq entry, userspace overflowed the
@@ -2494,23 +2515,17 @@ static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags
          */
         cqe = io_get_cqe(ctx);
         if (likely(cqe)) {
-               WRITE_ONCE(cqe->user_data, req->cqe.user_data);
+               WRITE_ONCE(cqe->user_data, user_data);
                 WRITE_ONCE(cqe->res, res);
                 WRITE_ONCE(cqe->flags, cflags);
-               WRITE_ONCE(cqe->big_cqe[0], extra1);
-               WRITE_ONCE(cqe->big_cqe[1], extra2);
-               return;
-       }
  
-       io_cqring_event_overflow(ctx, req->cqe.user_data, res, cflags, extra1, extra2);
-}
-
-static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
-                                    s32 res, u32 cflags)
-{
-       ctx->cq_extra++;
-       trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
-       return __io_fill_cqe(ctx, user_data, res, cflags);
+               if (ctx->flags & IORING_SETUP_CQE32) {
+                       WRITE_ONCE(cqe->big_cqe[0], 0);
+                       WRITE_ONCE(cqe->big_cqe[1], 0);
+               }
+               return true;
+       }
+       return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
  }
  
  static void __io_req_complete_put(struct io_kiocb *req)
@@ -2547,16 +2562,11 @@ static void __io_req_complete_put(struct io_kiocb *req)
  static void __io_req_complete_post(struct io_kiocb *req, s32 res,
                                    u32 cflags)
  {
-       if (!(req->flags & REQ_F_CQE_SKIP))
-               __io_fill_cqe_req(req, res, cflags);
-       __io_req_complete_put(req);
-}
-
-static void __io_req_complete_post32(struct io_kiocb *req, s32 res,
-                                  u32 cflags, u64 extra1, u64 extra2)
-{
-       if (!(req->flags & REQ_F_CQE_SKIP))
-               __io_fill_cqe32_req(req, res, cflags, extra1, extra2);
+       if (!(req->flags & REQ_F_CQE_SKIP)) {
+               req->cqe.res = res;
+               req->cqe.flags = cflags;
+               __io_fill_cqe_req(req->ctx, req);
+       }
         __io_req_complete_put(req);
  }
  
@@ -2571,18 +2581,6 @@ static void io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags)
         io_cqring_ev_posted(ctx);
  }
  
-static void io_req_complete_post32(struct io_kiocb *req, s32 res,
-                                  u32 cflags, u64 extra1, u64 extra2)
-{
-       struct io_ring_ctx *ctx = req->ctx;
-
-       spin_lock(&ctx->completion_lock);
-       __io_req_complete_post32(req, res, cflags, extra1, extra2);
-       io_commit_cqring(ctx);
-       spin_unlock(&ctx->completion_lock);
-       io_cqring_ev_posted(ctx);
-}
-
  static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
                                          u32 cflags)
  {
@@ -2600,19 +2598,6 @@ static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags,
                 io_req_complete_post(req, res, cflags);
  }
  
-static inline void __io_req_complete32(struct io_kiocb *req,
-                                      unsigned int issue_flags, s32 res,
-                                      u32 cflags, u64 extra1, u64 extra2)
-{
-       if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
-               io_req_complete_state(req, res, cflags);
-               req->extra1 = extra1;
-               req->extra2 = extra2;
-       } else {
-               io_req_complete_post32(req, res, cflags, extra1, extra2);
-       }
-}
-
  static inline void io_req_complete(struct io_kiocb *req, s32 res)
  {
         if (res < 0)
@@ -2991,8 +2976,6 @@ static void __io_req_task_work_add(struct io_kiocb *req,
         unsigned long flags;
         bool running;
  
-       io_drop_inflight_file(req);
-
         spin_lock_irqsave(&tctx->task_lock, flags);
         wq_list_add_tail(&req->io_task_work.node, list);
         running = tctx->task_running;
@@ -3161,12 +3144,8 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
                         struct io_kiocb *req = container_of(node, struct io_kiocb,
                                                     comp_list);
  
-                       if (!(req->flags & REQ_F_CQE_SKIP)) {
-                               if (!(ctx->flags & IORING_SETUP_CQE32))
-                                       __io_fill_cqe_req_filled(ctx, req);
-                               else
-                                       __io_fill_cqe32_req_filled(ctx, req);
-                       }
+                       if (!(req->flags & REQ_F_CQE_SKIP))
+                               __io_fill_cqe_req(ctx, req);
                 }
  
                 io_commit_cqring(ctx);
@@ -3285,7 +3264,9 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
                 nr_events++;
                 if (unlikely(req->flags & REQ_F_CQE_SKIP))
                         continue;
-               __io_fill_cqe_req(req, req->cqe.res, io_put_kbuf(req, 0));
+
+               req->cqe.flags = io_put_kbuf(req, 0);
+               __io_fill_cqe_req(req->ctx, req);
         }
  
         if (unlikely(!nr_events))
@@ -3835,19 +3816,17 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
  {
         struct io_uring_buf_ring *br = bl->buf_ring;
         struct io_uring_buf *buf;
-       __u32 head = bl->head;
+       __u16 head = bl->head;
  
-       if (unlikely(smp_load_acquire(&br->tail) == head)) {
-               io_ring_submit_unlock(req->ctx, issue_flags);
+       if (unlikely(smp_load_acquire(&br->tail) == head))
                 return NULL;
-       }
  
         head &= bl->mask;
         if (head < IO_BUFFER_LIST_BUF_PER_PAGE) {
                 buf = &br->bufs[head];
         } else {
                 int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
-               int index = head / IO_BUFFER_LIST_BUF_PER_PAGE - 1;
+               int index = head / IO_BUFFER_LIST_BUF_PER_PAGE;
                 buf = page_address(bl->buf_pages[index]);
                 buf += off;
         }
@@ -3857,7 +3836,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
         req->buf_list = bl;
         req->buf_index = buf->bid;
  
-       if (issue_flags & IO_URING_F_UNLOCKED) {
+       if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) {
                 /*
                  * If we came in unlocked, we have no choice but to consume the
                  * buffer here. This does mean it'll be pinned until the IO
@@ -5038,10 +5017,18 @@ void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
  
         req->uring_cmd.task_work_cb = task_work_cb;
         req->io_task_work.func = io_uring_cmd_work;
-       io_req_task_prio_work_add(req);
+       io_req_task_work_add(req);
  }
  EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
  
+static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
+                                         u64 extra1, u64 extra2)
+{
+       req->extra1 = extra1;
+       req->extra2 = extra2;
+       req->flags |= REQ_F_CQE32_INIT;
+}
+
  /*
   * Called by consumers of io_uring_cmd, if they originally returned
   * -EIOCBQUEUED upon receiving the command.
@@ -5052,10 +5039,10 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
  
         if (ret < 0)
                 req_set_fail(req);
+
         if (req->ctx->flags & IORING_SETUP_CQE32)
-               __io_req_complete32(req, 0, ret, 0, res2, 0);
-       else
-               io_req_complete(req, ret);
+               io_req_set_cqe32_extra(req, res2, 0);
+       io_req_complete(req, ret);
  }
  EXPORT_SYMBOL_GPL(io_uring_cmd_done);
  
@@ -5217,14 +5204,6 @@ done:
  
  static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  {
-       /*
-        * If the ring is setup with CQE32, relay back addr/addr
-        */
-       if (req->ctx->flags & IORING_SETUP_CQE32) {
-               req->nop.extra1 = READ_ONCE(sqe->addr);
-               req->nop.extra2 = READ_ONCE(sqe->addr2);
-       }
-
         return 0;
  }
  
@@ -5233,23 +5212,7 @@ static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
   */
  static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
  {
-       unsigned int cflags;
-       void __user *buf;
-
-       if (req->flags & REQ_F_BUFFER_SELECT) {
-               size_t len = 1;
-
-               buf = io_buffer_select(req, &len, issue_flags);
-               if (!buf)
-                       return -ENOBUFS;
-       }
-
-       cflags = io_put_kbuf(req, issue_flags);
-       if (!(req->ctx->flags & IORING_SETUP_CQE32))
-               __io_req_complete(req, issue_flags, 0, cflags);
-       else
-               __io_req_complete32(req, issue_flags, 0, cflags,
-                                   req->nop.extra1, req->nop.extra2);
+       __io_req_complete(req, issue_flags, 0, 0);
         return 0;
  }
  
@@ -5448,27 +5411,24 @@ static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
         struct io_ring_ctx *ctx = req->ctx;
         int ret;
  
+       io_ring_submit_lock(ctx, issue_flags);
+
         if (alloc_slot) {
-               io_ring_submit_lock(ctx, issue_flags);
                 ret = io_file_bitmap_get(ctx);
-               if (unlikely(ret < 0)) {
-                       io_ring_submit_unlock(ctx, issue_flags);
-                       fput(file);
-                       return ret;
-               }
-
+               if (unlikely(ret < 0))
+                       goto err;
                 file_slot = ret;
         } else {
                 file_slot--;
         }
  
         ret = io_install_fixed_file(req, file, issue_flags, file_slot);
-       if (alloc_slot) {
-               io_ring_submit_unlock(ctx, issue_flags);
-               if (!ret)
-                       return file_slot;
-       }
-
+       if (!ret && alloc_slot)
+               ret = file_slot;
+err:
+       io_ring_submit_unlock(ctx, issue_flags);
+       if (unlikely(ret < 0))
+               fput(file);
         return ret;
  }
  
@@ -5950,18 +5910,14 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
  
  static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  {
-       if (sqe->off || sqe->addr || sqe->len || sqe->buf_index)
+       if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
                 return -EINVAL;
         if (req->flags & REQ_F_FIXED_FILE)
                 return -EBADF;
  
         req->close.fd = READ_ONCE(sqe->fd);
         req->close.file_slot = READ_ONCE(sqe->file_index);
-       req->close.flags = READ_ONCE(sqe->close_flags);
-       if (req->close.flags & ~IORING_CLOSE_FD_AND_FILE_SLOT)
-               return -EINVAL;
-       if (!(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT) &&
-           req->close.file_slot && req->close.fd)
+       if (req->close.file_slot && req->close.fd)
                 return -EINVAL;
  
         return 0;
@@ -5972,13 +5928,12 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
         struct files_struct *files = current->files;
         struct io_close *close = &req->close;
         struct fdtable *fdt;
-       struct file *file = NULL;
+       struct file *file;
         int ret = -EBADF;
  
         if (req->close.file_slot) {
                 ret = io_close_fixed(req, issue_flags);
-               if (ret || !(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT))
-                       goto err;
+               goto err;
         }
  
         spin_lock(&files->file_lock);
@@ -5991,7 +5946,6 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
                         lockdep_is_held(&files->file_lock));
         if (!file || file->f_op == &io_uring_fops) {
                 spin_unlock(&files->file_lock);
-               file = NULL;
                 goto err;
         }
  
@@ -6001,21 +5955,16 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
                 return -EAGAIN;
         }
  
-       ret = __close_fd_get_file(close->fd, &file);
+       file = __close_fd_get_file(close->fd);
         spin_unlock(&files->file_lock);
-       if (ret < 0) {
-               if (ret == -ENOENT)
-                       ret = -EBADF;
+       if (!file)
                 goto err;
-       }
  
         /* No ->flush() or already async, safely close from here */
         ret = filp_close(file, current->files);
  err:
         if (ret < 0)
                 req_set_fail(req);
-       if (file)
-               fput(file);
         __io_req_complete(req, issue_flags, ret, 0);
         return 0;
  }
@@ -6917,10 +6866,6 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
  
                 if (!req->cqe.res) {
                         struct poll_table_struct pt = { ._key = req->apoll_events };
-                       unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED;
-
-                       if (unlikely(!io_assign_file(req, flags)))
-                               return -EBADF;
                         req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
                 }
  
@@ -8035,8 +7980,8 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req,
                 if (ret < 0)
                         break;
                 if (copy_to_user(&fds[done], &ret, sizeof(ret))) {
-                       ret = -EFAULT;
                         __io_close_fixed(req, issue_flags, ret);
+                       ret = -EFAULT;
                         break;
                 }
         }
@@ -8328,6 +8273,11 @@ static void io_clean_op(struct io_kiocb *req)
                 kfree(req->apoll);
                 req->apoll = NULL;
         }
+       if (req->flags & REQ_F_INFLIGHT) {
+               struct io_uring_task *tctx = req->task->io_uring;
+
+               atomic_dec(&tctx->inflight_tracked);
+       }
         if (req->flags & REQ_F_CREDS)
                 put_cred(req->creds);
         if (req->flags & REQ_F_ASYNC_DATA) {
@@ -8634,19 +8584,6 @@ out:
         return file;
  }
  
-/*
- * Drop the file for requeue operations. Only used of req->file is the
- * io_uring descriptor itself.
- */
-static void io_drop_inflight_file(struct io_kiocb *req)
-{
-       if (unlikely(req->flags & REQ_F_INFLIGHT)) {
-               fput(req->file);
-               req->file = NULL;
-               req->flags &= ~REQ_F_INFLIGHT;
-       }
-}
-
  static struct file *io_file_get_normal(struct io_kiocb *req, int fd)
  {
         struct file *file = fget(fd);
@@ -8655,7 +8592,7 @@ static struct file *io_file_get_normal(struct io_kiocb *req, int fd)
  
         /* we don't allow fixed io_uring files */
         if (file && file->f_op == &io_uring_fops)
-               req->flags |= REQ_F_INFLIGHT;
+               io_req_track_inflight(req);
         return file;
  }
  
@@ -8753,6 +8690,7 @@ static void io_queue_async(struct io_kiocb *req, int ret)
                  * Queued up for async execution, worker will release
                  * submit reference when the iocb is actually submitted.
                  */
+               io_kbuf_recycle(req, 0);
                 io_queue_iowq(req, NULL);
                 break;
         case IO_APOLL_OK:
@@ -10187,21 +10125,19 @@ static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
  
  static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
                                  unsigned int issue_flags, u32 slot_index)
+       __must_hold(&req->ctx->uring_lock)
  {
         struct io_ring_ctx *ctx = req->ctx;
         bool needs_switch = false;
         struct io_fixed_file *file_slot;
-       int ret = -EBADF;
+       int ret;
  
-       io_ring_submit_lock(ctx, issue_flags);
         if (file->f_op == &io_uring_fops)
-               goto err;
-       ret = -ENXIO;
+               return -EBADF;
         if (!ctx->file_data)
-               goto err;
-       ret = -EINVAL;
+               return -ENXIO;
         if (slot_index >= ctx->nr_user_files)
-               goto err;
+               return -EINVAL;
  
         slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
         file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
@@ -10232,7 +10168,6 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
  err:
         if (needs_switch)
                 io_rsrc_node_switch(ctx, ctx->file_data);
-       io_ring_submit_unlock(ctx, issue_flags);
         if (ret)
                 fput(file);
         return ret;
@@ -10430,6 +10365,7 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task,
         xa_init(&tctx->xa);
         init_waitqueue_head(&tctx->wait);
         atomic_set(&tctx->in_idle, 0);
+       atomic_set(&tctx->inflight_tracked, 0);
         task->io_uring = tctx;
         spin_lock_init(&tctx->task_lock);
         INIT_WQ_LIST(&tctx->task_list);
@@ -11668,7 +11604,7 @@ static __cold void io_uring_clean_tctx(struct io_uring_task *tctx)
  static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
  {
         if (tracked)
-               return 0;
+               return atomic_read(&tctx->inflight_tracked);
         return percpu_counter_sum(&tctx->inflight);
  }
  
@@ -12044,14 +11980,14 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                         return -EINVAL;
                 fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
                 f.file = tctx->registered_rings[fd];
-               if (unlikely(!f.file))
-                       return -EBADF;
+               f.flags = 0;
         } else {
                 f = fdget(fd);
-               if (unlikely(!f.file))
-                       return -EBADF;
         }
  
+       if (unlikely(!f.file))
+               return -EBADF;
+
         ret = -EOPNOTSUPP;
         if (unlikely(f.file->f_op != &io_uring_fops))
                 goto out_fput;
@@ -12149,8 +12085,7 @@ iopoll_locked:
  out:
         percpu_ref_put(&ctx->refs);
  out_fput:
-       if (!(flags & IORING_ENTER_REGISTERED_RING))
-               fdput(f);
+       fdput(f);
         return ret;
  }
  
@@ -13000,6 +12935,10 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
         if (!is_power_of_2(reg.ring_entries))
                 return -EINVAL;
  
+       /* cannot disambiguate full vs empty due to head/tail size */
+       if (reg.ring_entries >= 65536)
+               return -EINVAL;
+
         if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
                 int ret = io_init_bl_list(ctx);
                 if (ret)