struct io_submit_state submit_state;
struct list_head timeout_list;
+ struct list_head ltimeout_list;
struct list_head cq_overflow_list;
struct xarray io_buffers;
struct xarray personalities;
struct hrtimer timer;
struct timespec64 ts;
enum hrtimer_mode mode;
+ u32 flags;
};
struct io_accept {
struct sockaddr __user *addr;
int __user *addr_len;
int flags;
+ u32 file_slot;
unsigned long nofile;
};
/* timeout update */
struct timespec64 ts;
u32 flags;
+ bool ltimeout;
};
struct io_rw {
struct io_open {
struct file *file;
int dfd;
+ u32 file_slot;
struct filename *filename;
struct open_how how;
unsigned long nofile;
struct filename *filename;
};
+ struct io_mkdir {
+ struct file *file;
+ int dfd;
+ umode_t mode;
+ struct filename *filename;
+ };
+
+ struct io_symlink {
+ struct file *file;
+ int new_dfd;
+ struct filename *oldpath;
+ struct filename *newpath;
+ };
+
+ struct io_hardlink {
+ struct file *file;
+ int old_dfd;
+ int new_dfd;
+ struct filename *oldpath;
+ struct filename *newpath;
+ int flags;
+ };
+
struct io_completion {
struct file *file;
u32 cflags;
struct io_poll_iocb *double_poll;
};
-typedef void (*io_req_tw_func_t)(struct io_kiocb *req);
+typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked);
struct io_task_work {
union {
struct io_shutdown shutdown;
struct io_rename rename;
struct io_unlink unlink;
+ struct io_mkdir mkdir;
+ struct io_symlink symlink;
+ struct io_hardlink hardlink;
/* use only after cleaning per-op data, see io_clean_op() */
struct io_completion compl;
};
},
[IORING_OP_RENAMEAT] = {},
[IORING_OP_UNLINKAT] = {},
+ [IORING_OP_MKDIRAT] = {},
+ [IORING_OP_SYMLINKAT] = {},
+ [IORING_OP_LINKAT] = {},
};
/* requests with any of those set should undergo io_disarm_next() */
static void io_submit_flush_completions(struct io_ring_ctx *ctx);
static int io_req_prep_async(struct io_kiocb *req);
+static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+ unsigned int issue_flags, u32 slot_index);
+static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
+
static struct kmem_cache *req_cachep;
static const struct file_operations io_uring_fops;
}
EXPORT_SYMBOL(io_uring_get_socket);
+static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
+{
+ if (!*locked) {
+ mutex_lock(&ctx->uring_lock);
+ *locked = true;
+ }
+}
+
#define io_for_each_link(pos, head) \
for (pos = (head); pos; pos = pos->link)
req->flags |= REQ_F_FAIL;
}
+static inline void req_fail_link_node(struct io_kiocb *req, int res)
+{
+ req_set_fail(req);
+ req->result = res;
+}
+
static void io_ring_ctx_ref_free(struct percpu_ref *ref)
{
struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
fallback_work.work);
struct llist_node *node = llist_del_all(&ctx->fallback_llist);
struct io_kiocb *req, *tmp;
+ bool locked = false;
percpu_ref_get(&ctx->refs);
llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node)
- req->io_task_work.func(req);
+ req->io_task_work.func(req, &locked);
+
+ if (locked) {
+ if (ctx->submit_state.compl_nr)
+ io_submit_flush_completions(ctx);
+ mutex_unlock(&ctx->uring_lock);
+ }
percpu_ref_put(&ctx->refs);
+
}
static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->iopoll_list);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
+ INIT_LIST_HEAD(&ctx->ltimeout_list);
spin_lock_init(&ctx->rsrc_ref_lock);
INIT_LIST_HEAD(&ctx->rsrc_ref_list);
INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
}
}
-static void io_queue_async_work(struct io_kiocb *req)
+static void io_queue_async_work(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *link = io_prep_linked_timeout(req);
struct io_uring_task *tctx = req->task->io_uring;
+ /* must not take the lock, NULL it as a precaution */
+ locked = NULL;
+
BUG_ON(!tctx);
BUG_ON(!tctx->io_wq);
}
}
+static void io_task_refs_refill(struct io_uring_task *tctx)
+{
+ unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR;
+
+ percpu_counter_add(&tctx->inflight, refill);
+ refcount_add(refill, ¤t->usage);
+ tctx->cached_refs += refill;
+}
+
+static inline void io_get_task_refs(int nr)
+{
+ struct io_uring_task *tctx = current->io_uring;
+
+ tctx->cached_refs -= nr;
+ if (unlikely(tctx->cached_refs < 0))
+ io_task_refs_refill(tctx);
+}
+
static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
long res, unsigned int cflags)
{
io_remove_next_linked(req);
link->timeout.head = NULL;
if (hrtimer_try_to_cancel(&io->timer) != -1) {
+ list_del(&link->timeout.list);
io_cqring_fill_event(link->ctx, link->user_data,
-ECANCELED, 0);
io_put_req_deferred(link);
req->link = NULL;
while (link) {
+ long res = -ECANCELED;
+
+ if (link->flags & REQ_F_FAIL)
+ res = link->result;
+
nxt = link->link;
link->link = NULL;
trace_io_uring_fail_link(req, link);
- io_cqring_fill_event(link->ctx, link->user_data, -ECANCELED, 0);
+ io_cqring_fill_event(link->ctx, link->user_data, res, 0);
io_put_req_deferred(link);
link = nxt;
}
return __io_req_find_next(req);
}
-static void ctx_flush_and_put(struct io_ring_ctx *ctx)
+static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
{
if (!ctx)
return;
- if (ctx->submit_state.compl_nr) {
- mutex_lock(&ctx->uring_lock);
+ if (*locked) {
if (ctx->submit_state.compl_nr)
io_submit_flush_completions(ctx);
mutex_unlock(&ctx->uring_lock);
+ *locked = false;
}
percpu_ref_put(&ctx->refs);
}
static void tctx_task_work(struct callback_head *cb)
{
+ bool locked = false;
struct io_ring_ctx *ctx = NULL;
struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
task_work);
io_task_work.node);
if (req->ctx != ctx) {
- ctx_flush_and_put(ctx);
+ ctx_flush_and_put(ctx, &locked);
ctx = req->ctx;
+ /* if not contended, grab and improve batching */
+ locked = mutex_trylock(&ctx->uring_lock);
percpu_ref_get(&ctx->refs);
}
- req->io_task_work.func(req);
+ req->io_task_work.func(req, &locked);
node = next;
} while (node);
cond_resched();
}
- ctx_flush_and_put(ctx);
+ ctx_flush_and_put(ctx, &locked);
}
static void io_req_task_work_add(struct io_kiocb *req)
}
}
-static void io_req_task_cancel(struct io_kiocb *req)
+static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
- /* ctx is guaranteed to stay alive while we hold uring_lock */
- mutex_lock(&ctx->uring_lock);
+ /* not needed for normal modes, but SQPOLL depends on it */
+ io_tw_lock(ctx, locked);
io_req_complete_failed(req, req->result);
- mutex_unlock(&ctx->uring_lock);
}
-static void io_req_task_submit(struct io_kiocb *req)
+static void io_req_task_submit(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
- /* ctx stays valid until unlock, even if we drop all ours ctx->refs */
- mutex_lock(&ctx->uring_lock);
+ io_tw_lock(ctx, locked);
/* req->task == current here, checking PF_EXITING is safe */
if (likely(!(req->task->flags & PF_EXITING)))
__io_queue_sqe(req);
else
io_req_complete_failed(req, -EFAULT);
- mutex_unlock(&ctx->uring_lock);
}
static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
__io_free_req(req);
}
+static void io_free_req_work(struct io_kiocb *req, bool *locked)
+{
+ io_free_req(req);
+}
+
struct req_batch {
struct task_struct *task;
int task_refs;
static inline void io_put_req_deferred(struct io_kiocb *req)
{
if (req_ref_put_and_test(req)) {
- req->io_task_work.func = io_free_req;
+ req->io_task_work.func = io_free_req_work;
io_req_task_work_add(req);
}
}
return false;
}
-static void io_req_task_complete(struct io_kiocb *req)
+static void io_req_task_complete(struct io_kiocb *req, bool *locked)
{
- __io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
+ unsigned int cflags = io_put_rw_kbuf(req);
+ long res = req->result;
+
+ if (*locked) {
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_submit_state *state = &ctx->submit_state;
+
+ io_req_complete_state(req, res, cflags);
+ state->compl_reqs[state->compl_nr++] = req;
+ if (state->compl_nr == ARRAY_SIZE(state->compl_reqs))
+ io_submit_flush_completions(ctx);
+ } else {
+ io_req_complete_post(req, res, cflags);
+ }
}
static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
{
if (__io_complete_rw_common(req, res))
return;
- io_req_task_complete(req);
+ __io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
!kiocb->ki_filp->f_op->iopoll)
return -EOPNOTSUPP;
- kiocb->ki_flags |= IOCB_HIPRI;
+ kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
kiocb->ki_complete = io_complete_rw_iopoll;
req->iopoll_completed = 0;
} else {
return 0;
}
+ static int io_mkdirat_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+ {
+ struct io_mkdir *mkd = &req->mkdir;
+ const char __user *fname;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index ||
+ sqe->splice_fd_in)
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ mkd->dfd = READ_ONCE(sqe->fd);
+ mkd->mode = READ_ONCE(sqe->len);
+
+ fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ mkd->filename = getname(fname);
+ if (IS_ERR(mkd->filename))
+ return PTR_ERR(mkd->filename);
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+ }
+
+ static int io_mkdirat(struct io_kiocb *req, int issue_flags)
+ {
+ struct io_mkdir *mkd = &req->mkdir;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode);
+
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_complete(req, ret);
+ return 0;
+ }
+
+ static int io_symlinkat_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+ {
+ struct io_symlink *sl = &req->symlink;
+ const char __user *oldpath, *newpath;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index ||
+ sqe->splice_fd_in)
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ sl->new_dfd = READ_ONCE(sqe->fd);
+ oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+
+ sl->oldpath = getname(oldpath);
+ if (IS_ERR(sl->oldpath))
+ return PTR_ERR(sl->oldpath);
+
+ sl->newpath = getname(newpath);
+ if (IS_ERR(sl->newpath)) {
+ putname(sl->oldpath);
+ return PTR_ERR(sl->newpath);
+ }
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+ }
+
+ static int io_symlinkat(struct io_kiocb *req, int issue_flags)
+ {
+ struct io_symlink *sl = &req->symlink;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath);
+
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_complete(req, ret);
+ return 0;
+ }
+
+ static int io_linkat_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+ {
+ struct io_hardlink *lnk = &req->hardlink;
+ const char __user *oldf, *newf;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ lnk->old_dfd = READ_ONCE(sqe->fd);
+ lnk->new_dfd = READ_ONCE(sqe->len);
+ oldf = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ lnk->flags = READ_ONCE(sqe->hardlink_flags);
+
+ lnk->oldpath = getname(oldf);
+ if (IS_ERR(lnk->oldpath))
+ return PTR_ERR(lnk->oldpath);
+
+ lnk->newpath = getname(newf);
+ if (IS_ERR(lnk->newpath)) {
+ putname(lnk->oldpath);
+ return PTR_ERR(lnk->newpath);
+ }
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+ }
+
+ static int io_linkat(struct io_kiocb *req, int issue_flags)
+ {
+ struct io_hardlink *lnk = &req->hardlink;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd,
+ lnk->newpath, lnk->flags);
+
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_complete(req, ret);
+ return 0;
+ }
+
static int io_shutdown_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (unlikely(sqe->ioprio || sqe->buf_index || sqe->splice_fd_in))
+ if (unlikely(sqe->ioprio || sqe->buf_index))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
req->open.filename = NULL;
return ret;
}
+
+ req->open.file_slot = READ_ONCE(sqe->file_index);
+ if (req->open.file_slot && (req->open.how.flags & O_CLOEXEC))
+ return -EINVAL;
+
req->open.nofile = rlimit(RLIMIT_NOFILE);
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
{
struct open_flags op;
struct file *file;
- bool nonblock_set;
- bool resolve_nonblock;
+ bool resolve_nonblock, nonblock_set;
+ bool fixed = !!req->open.file_slot;
int ret;
ret = build_open_flags(&req->open.how, &op);
op.open_flag |= O_NONBLOCK;
}
- ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
- if (ret < 0)
- goto err;
+ if (!fixed) {
+ ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
+ if (ret < 0)
+ goto err;
+ }
file = do_filp_open(req->open.dfd, req->open.filename, &op);
if (IS_ERR(file)) {
* marginal gain for something that is now known to be a slower
* path. So just put it, and we'll get a new one when we retry.
*/
- put_unused_fd(ret);
+ if (!fixed)
+ put_unused_fd(ret);
ret = PTR_ERR(file);
/* only retry if RESOLVE_CACHED wasn't already set by application */
if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set)
file->f_flags &= ~O_NONBLOCK;
fsnotify_open(file);
- fd_install(ret, file);
+
+ if (!fixed)
+ fd_install(ret, file);
+ else
+ ret = io_install_fixed_file(req, file, issue_flags,
+ req->open.file_slot - 1);
err:
putname(req->open.filename);
req->flags &= ~REQ_F_NEED_CLEANUP;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->splice_fd_in)
+ if (sqe->ioprio || sqe->len || sqe->buf_index)
return -EINVAL;
accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
accept->nofile = rlimit(RLIMIT_NOFILE);
+
+ accept->file_slot = READ_ONCE(sqe->file_index);
+ if (accept->file_slot && ((req->open.how.flags & O_CLOEXEC) ||
+ (accept->flags & SOCK_CLOEXEC)))
+ return -EINVAL;
+ if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return -EINVAL;
+ if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
+ accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
return 0;
}
struct io_accept *accept = &req->accept;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
- int ret;
+ bool fixed = !!accept->file_slot;
+ struct file *file;
+ int ret, fd;
if (req->file->f_flags & O_NONBLOCK)
req->flags |= REQ_F_NOWAIT;
- ret = __sys_accept4_file(req->file, file_flags, accept->addr,
- accept->addr_len, accept->flags,
- accept->nofile);
- if (ret == -EAGAIN && force_nonblock)
- return -EAGAIN;
- if (ret < 0) {
+ if (!fixed) {
+ fd = __get_unused_fd_flags(accept->flags, accept->nofile);
+ if (unlikely(fd < 0))
+ return fd;
+ }
+ file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
+ accept->flags);
+ if (IS_ERR(file)) {
+ if (!fixed)
+ put_unused_fd(fd);
+ ret = PTR_ERR(file);
+ if (ret == -EAGAIN && force_nonblock)
+ return -EAGAIN;
if (ret == -ERESTARTSYS)
ret = -EINTR;
req_set_fail(req);
+ } else if (!fixed) {
+ fd_install(fd, file);
+ ret = fd;
+ } else {
+ ret = io_install_fixed_file(req, file, issue_flags,
+ accept->file_slot - 1);
}
__io_req_complete(req, issue_flags, ret, 0);
return 0;
return !(flags & IORING_CQE_F_MORE);
}
-static void io_poll_task_func(struct io_kiocb *req)
+static void io_poll_task_func(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *nxt;
if (done) {
nxt = io_put_req_find_next(req);
if (nxt)
- io_req_task_submit(nxt);
+ io_req_task_submit(nxt, locked);
}
}
}
__io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
}
-static void io_async_task_func(struct io_kiocb *req)
+static void io_async_task_func(struct io_kiocb *req, bool *locked)
{
struct async_poll *apoll = req->apoll;
struct io_ring_ctx *ctx = req->ctx;
spin_unlock(&ctx->completion_lock);
if (!READ_ONCE(apoll->poll.canceled))
- io_req_task_submit(req);
+ io_req_task_submit(req, locked);
else
io_req_complete_failed(req, -ECANCELED);
}
return 0;
}
-static void io_req_task_timeout(struct io_kiocb *req)
+static void io_req_task_timeout(struct io_kiocb *req, bool *locked)
{
req_set_fail(req);
io_req_complete_post(req, -ETIME, 0);
return 0;
}
+static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
+{
+ switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) {
+ case IORING_TIMEOUT_BOOTTIME:
+ return CLOCK_BOOTTIME;
+ case IORING_TIMEOUT_REALTIME:
+ return CLOCK_REALTIME;
+ default:
+ /* can't happen, vetted at prep time */
+ WARN_ON_ONCE(1);
+ fallthrough;
+ case 0:
+ return CLOCK_MONOTONIC;
+ }
+}
+
+static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
+ struct timespec64 *ts, enum hrtimer_mode mode)
+ __must_hold(&ctx->timeout_lock)
+{
+ struct io_timeout_data *io;
+ struct io_kiocb *req;
+ bool found = false;
+
+ list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) {
+ found = user_data == req->user_data;
+ if (found)
+ break;
+ }
+ if (!found)
+ return -ENOENT;
+
+ io = req->async_data;
+ if (hrtimer_try_to_cancel(&io->timer) == -1)
+ return -EALREADY;
+ hrtimer_init(&io->timer, io_timeout_get_clock(io), mode);
+ io->timer.function = io_link_timeout_fn;
+ hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode);
+ return 0;
+}
+
static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
struct timespec64 *ts, enum hrtimer_mode mode)
__must_hold(&ctx->timeout_lock)
req->timeout.off = 0; /* noseq */
data = req->async_data;
list_add_tail(&req->timeout.list, &ctx->timeout_list);
- hrtimer_init(&data->timer, CLOCK_MONOTONIC, mode);
+ hrtimer_init(&data->timer, io_timeout_get_clock(data), mode);
data->timer.function = io_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode);
return 0;
if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in)
return -EINVAL;
+ tr->ltimeout = false;
tr->addr = READ_ONCE(sqe->addr);
tr->flags = READ_ONCE(sqe->timeout_flags);
- if (tr->flags & IORING_TIMEOUT_UPDATE) {
- if (tr->flags & ~(IORING_TIMEOUT_UPDATE|IORING_TIMEOUT_ABS))
+ if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) {
+ if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
+ return -EINVAL;
+ if (tr->flags & IORING_LINK_TIMEOUT_UPDATE)
+ tr->ltimeout = true;
+ if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
return -EINVAL;
if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
return -EFAULT;
spin_unlock_irq(&ctx->timeout_lock);
spin_unlock(&ctx->completion_lock);
} else {
+ enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
+
spin_lock_irq(&ctx->timeout_lock);
- ret = io_timeout_update(ctx, tr->addr, &tr->ts,
- io_translate_timeout_mode(tr->flags));
+ if (tr->ltimeout)
+ ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
+ else
+ ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
spin_unlock_irq(&ctx->timeout_lock);
}
if (off && is_timeout_link)
return -EINVAL;
flags = READ_ONCE(sqe->timeout_flags);
- if (flags & ~IORING_TIMEOUT_ABS)
+ if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK))
+ return -EINVAL;
+ /* more than one clock specified is invalid, obviously */
+ if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
return -EINVAL;
+ INIT_LIST_HEAD(&req->timeout.list);
req->timeout.off = off;
if (unlikely(off && !req->ctx->off_timeout_used))
req->ctx->off_timeout_used = true;
data = req->async_data;
data->req = req;
+ data->flags = flags;
if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
return -EFAULT;
data->mode = io_translate_timeout_mode(flags);
- hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
+ hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
if (is_timeout_link) {
struct io_submit_link *link = &req->ctx->submit_state.link;
struct io_ring_ctx *ctx = req->ctx;
int ret;
- WARN_ON_ONCE(req->task != current);
+ WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
if (ret != -ENOENT)
return io_renameat_prep(req, sqe);
case IORING_OP_UNLINKAT:
return io_unlinkat_prep(req, sqe);
+ case IORING_OP_MKDIRAT:
+ return io_mkdirat_prep(req, sqe);
+ case IORING_OP_SYMLINKAT:
+ return io_symlinkat_prep(req, sqe);
+ case IORING_OP_LINKAT:
+ return io_linkat_prep(req, sqe);
}
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
spin_unlock(&ctx->completion_lock);
kfree(de);
- io_queue_async_work(req);
+ io_queue_async_work(req, NULL);
return true;
}
case IORING_OP_UNLINKAT:
putname(req->unlink.filename);
break;
+ case IORING_OP_MKDIRAT:
+ putname(req->mkdir.filename);
+ break;
+ case IORING_OP_SYMLINKAT:
+ putname(req->symlink.oldpath);
+ putname(req->symlink.newpath);
+ break;
+ case IORING_OP_LINKAT:
+ putname(req->hardlink.oldpath);
+ putname(req->hardlink.newpath);
+ break;
}
}
if ((req->flags & REQ_F_POLLED) && req->apoll) {
case IORING_OP_UNLINKAT:
ret = io_unlinkat(req, issue_flags);
break;
+ case IORING_OP_MKDIRAT:
+ ret = io_mkdirat(req, issue_flags);
+ break;
+ case IORING_OP_SYMLINKAT:
+ ret = io_symlinkat(req, issue_flags);
+ break;
+ case IORING_OP_LINKAT:
+ ret = io_linkat(req, issue_flags);
+ break;
default:
ret = -EINVAL;
break;
if (timeout)
io_queue_linked_timeout(timeout);
+ /* either cancelled or io-wq is dying, so don't touch tctx->iowq */
if (work->flags & IO_WQ_WORK_CANCEL)
ret = -ECANCELED;
return io_file_get_normal(ctx, req, fd);
}
-static void io_req_task_link_timeout(struct io_kiocb *req)
+static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
{
struct io_kiocb *prev = req->timeout.prev;
int ret;
if (!req_ref_inc_not_zero(prev))
prev = NULL;
}
+ list_del(&req->timeout.list);
req->timeout.prev = prev;
spin_unlock_irqrestore(&ctx->timeout_lock, flags);
data->timer.function = io_link_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
data->mode);
+ list_add_tail(&req->timeout.list, &ctx->ltimeout_list);
}
spin_unlock_irq(&ctx->timeout_lock);
/* drop submission reference */
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
*/
- io_queue_async_work(req);
+ io_queue_async_work(req, NULL);
break;
}
if (unlikely(req->ctx->drain_active) && io_drain_req(req))
return;
- if (likely(!(req->flags & REQ_F_FORCE_ASYNC))) {
+ if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
__io_queue_sqe(req);
+ } else if (req->flags & REQ_F_FAIL) {
+ io_req_complete_failed(req, req->result);
} else {
int ret = io_req_prep_async(req);
if (unlikely(ret))
io_req_complete_failed(req, ret);
else
- io_queue_async_work(req);
+ io_queue_async_work(req, NULL);
}
}
ret = io_init_req(ctx, req, sqe);
if (unlikely(ret)) {
fail_req:
+ /* fail even hard links since we don't submit */
if (link->head) {
- /* fail even hard links since we don't submit */
- req_set_fail(link->head);
- io_req_complete_failed(link->head, -ECANCELED);
- link->head = NULL;
+ /*
+ * we can judge a link req is failed or cancelled by if
+ * REQ_F_FAIL is set, but the head is an exception since
+ * it may be set REQ_F_FAIL because of other req's failure
+ * so let's leverage req->result to distinguish if a head
+ * is set REQ_F_FAIL because of its failure or other req's
+ * failure so that we can set the correct ret code for it.
+ * init result here to avoid affecting the normal path.
+ */
+ if (!(link->head->flags & REQ_F_FAIL))
+ req_fail_link_node(link->head, -ECANCELED);
+ } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
+ /*
+ * the current req is a normal req, we should return
+ * error and thus break the submittion loop.
+ */
+ io_req_complete_failed(req, ret);
+ return ret;
}
- io_req_complete_failed(req, ret);
- return ret;
+ req_fail_link_node(req, ret);
+ } else {
+ ret = io_req_prep(req, sqe);
+ if (unlikely(ret))
+ goto fail_req;
}
- ret = io_req_prep(req, sqe);
- if (unlikely(ret))
- goto fail_req;
-
/* don't need @sqe from now on */
trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data,
req->flags, true,
if (link->head) {
struct io_kiocb *head = link->head;
- ret = io_req_prep_async(req);
- if (unlikely(ret))
- goto fail_req;
+ if (!(req->flags & REQ_F_FAIL)) {
+ ret = io_req_prep_async(req);
+ if (unlikely(ret)) {
+ req_fail_link_node(req, ret);
+ if (!(head->flags & REQ_F_FAIL))
+ req_fail_link_node(head, -ECANCELED);
+ }
+ }
trace_io_uring_link(ctx, req, head);
link->last->link = req;
link->last = req;
static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
__must_hold(&ctx->uring_lock)
{
- struct io_uring_task *tctx;
int submitted = 0;
/* make sure SQ entry isn't read before tail */
nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx));
if (!percpu_ref_tryget_many(&ctx->refs, nr))
return -EAGAIN;
+ io_get_task_refs(nr);
- tctx = current->io_uring;
- tctx->cached_refs -= nr;
- if (unlikely(tctx->cached_refs < 0)) {
- unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR;
-
- percpu_counter_add(&tctx->inflight, refill);
- refcount_add(refill, ¤t->usage);
- tctx->cached_refs += refill;
- }
io_submit_state_start(&ctx->submit_state, nr);
-
while (submitted < nr) {
const struct io_uring_sqe *sqe;
struct io_kiocb *req;
}
sqe = io_get_sqe(ctx);
if (unlikely(!sqe)) {
- kmem_cache_free(req_cachep, req);
+ list_add(&req->inflight_entry, &ctx->submit_state.free_list);
break;
}
/* will complete beyond this point, count as submitted */
#endif
}
+static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+ unsigned int issue_flags, u32 slot_index)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ struct io_fixed_file *file_slot;
+ int ret = -EBADF;
+
+ io_ring_submit_lock(ctx, !force_nonblock);
+ if (file->f_op == &io_uring_fops)
+ goto err;
+ ret = -ENXIO;
+ if (!ctx->file_data)
+ goto err;
+ ret = -EINVAL;
+ if (slot_index >= ctx->nr_user_files)
+ goto err;
+
+ slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
+ file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
+ ret = -EBADF;
+ if (file_slot->file_ptr)
+ goto err;
+
+ *io_get_tag_slot(ctx->file_data, slot_index) = 0;
+ io_fixed_file_set(file_slot, file);
+ ret = io_sqe_file_register(ctx, file, slot_index);
+ if (ret) {
+ file_slot->file_ptr = 0;
+ goto err;
+ }
+
+ ret = 0;
+err:
+ io_ring_submit_unlock(ctx, !force_nonblock);
+ if (ret)
+ fput(file);
+ return ret;
+}
+
static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
struct io_rsrc_node *node, void *rsrc)
{
sock_release(ctx->ring_sock);
}
#endif
+ WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
io_mem_free(ctx->rings);
io_mem_free(ctx->sq_sqes);
* Must be after io_uring_del_task_file() (removes nodes under
* uring_lock) to avoid race with io_uring_try_cancel_iowq().
*/
- tctx->io_wq = NULL;
io_wq_put_and_exit(wq);
+ tctx->io_wq = NULL;
}
}
return io_wq_cpu_affinity(tctx->io_wq, NULL);
}
+static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
+ void __user *arg)
+{
+ struct io_uring_task *tctx = current->io_uring;
+ __u32 new_count[2];
+ int i, ret;
+
+ if (!tctx || !tctx->io_wq)
+ return -EINVAL;
+ if (copy_from_user(new_count, arg, sizeof(new_count)))
+ return -EFAULT;
+ for (i = 0; i < ARRAY_SIZE(new_count); i++)
+ if (new_count[i] > INT_MAX)
+ return -EINVAL;
+
+ ret = io_wq_max_workers(tctx->io_wq, new_count);
+ if (ret)
+ return ret;
+
+ if (copy_to_user(arg, new_count, sizeof(new_count)))
+ return -EFAULT;
+
+ return 0;
+}
+
static bool io_register_op_must_quiesce(int op)
{
switch (op) {
case IORING_REGISTER_BUFFERS_UPDATE:
case IORING_REGISTER_IOWQ_AFF:
case IORING_UNREGISTER_IOWQ_AFF:
+ case IORING_REGISTER_IOWQ_MAX_WORKERS:
return false;
default:
return true;
break;
ret = io_unregister_iowq_aff(ctx);
break;
+ case IORING_REGISTER_IOWQ_MAX_WORKERS:
+ ret = -EINVAL;
+ if (!arg || nr_args != 2)
+ break;
+ ret = io_register_iowq_max_workers(ctx, arg);
+ break;
default:
ret = -EINVAL;
break;
BUILD_BUG_SQE_ELEM(40, __u16, buf_group);
BUILD_BUG_SQE_ELEM(42, __u16, personality);
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
+ BUILD_BUG_SQE_ELEM(44, __u32, file_index);
BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
sizeof(struct io_uring_rsrc_update));
BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) >
sizeof(struct io_uring_rsrc_update2));
+
+ /* ->buf_index is u16 */
+ BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
+
/* should fit into one byte */
BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
return result;
}
+ struct filename *
+ getname_uflags(const char __user *filename, int uflags)
+ {
+ int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+
+ return getname_flags(filename, flags, NULL);
+ }
+
struct filename *
getname(const char __user * filename)
{
void putname(struct filename *name)
{
+ if (IS_ERR_OR_NULL(name))
+ return;
+
BUG_ON(name->refcnt <= 0);
if (--name->refcnt > 0)
return err;
}
- int filename_lookup(int dfd, struct filename *name, unsigned flags,
+ static int __filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root)
{
int retval;
audit_inode(name, path->dentry,
flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
restore_nameidata();
+ return retval;
+ }
+
+ int filename_lookup(int dfd, struct filename *name, unsigned flags,
+ struct path *path, struct path *root)
+ {
+ int retval = __filename_lookup(dfd, name, flags, path, root);
+
putname(name);
return retval;
}
return err;
}
- static struct filename *filename_parentat(int dfd, struct filename *name,
+ static int __filename_parentat(int dfd, struct filename *name,
unsigned int flags, struct path *parent,
struct qstr *last, int *type)
{
struct nameidata nd;
if (IS_ERR(name))
- return name;
+ return PTR_ERR(name);
set_nameidata(&nd, dfd, name, NULL);
retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
if (unlikely(retval == -ECHILD))
*last = nd.last;
*type = nd.last_type;
audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
- } else {
- putname(name);
- name = ERR_PTR(retval);
}
restore_nameidata();
- return name;
+ return retval;
+ }
+
+ static int filename_parentat(int dfd, struct filename *name,
+ unsigned int flags, struct path *parent,
+ struct qstr *last, int *type)
+ {
+ int retval = __filename_parentat(dfd, name, flags, parent, last, type);
+
+ putname(name);
+ return retval;
}
/* does lookup, returns the object with parent locked */
struct dentry *kern_path_locked(const char *name, struct path *path)
{
- struct filename *filename;
struct dentry *d;
struct qstr last;
- int type;
+ int type, error;
- filename = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
+ error = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
&last, &type);
- if (IS_ERR(filename))
- return ERR_CAST(filename);
+ if (error)
+ return ERR_PTR(error);
if (unlikely(type != LAST_NORM)) {
path_put(path);
- putname(filename);
return ERR_PTR(-EINVAL);
}
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
inode_unlock(path->dentry->d_inode);
path_put(path);
}
- putname(filename);
return d;
}
/*
* Refuse to truncate files with mandatory locks held on them.
*/
- error = locks_verify_locked(filp);
- if (!error)
- error = security_path_truncate(path);
+ error = security_path_truncate(path);
if (!error) {
error = do_truncate(mnt_userns, path->dentry, 0,
ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
return file;
}
- static struct dentry *filename_create(int dfd, struct filename *name,
+ static struct dentry *__filename_create(int dfd, struct filename *name,
struct path *path, unsigned int lookup_flags)
{
struct dentry *dentry = ERR_PTR(-EEXIST);
*/
lookup_flags &= LOOKUP_REVAL;
- name = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
- if (IS_ERR(name))
- return ERR_CAST(name);
+ error = __filename_parentat(dfd, name, lookup_flags, path, &last, &type);
+ if (error)
+ return ERR_PTR(error);
/*
* Yucky last component or no last component at all?
error = err2;
goto fail;
}
- putname(name);
return dentry;
fail:
dput(dentry);
mnt_drop_write(path->mnt);
out:
path_put(path);
- putname(name);
return dentry;
}
+ static inline struct dentry *filename_create(int dfd, struct filename *name,
+ struct path *path, unsigned int lookup_flags)
+ {
+ struct dentry *res = __filename_create(dfd, name, path, lookup_flags);
+
+ putname(name);
+ return res;
+ }
+
struct dentry *kern_path_create(int dfd, const char *pathname,
struct path *path, unsigned int lookup_flags)
{
}
}
- static long do_mknodat(int dfd, const char __user *filename, umode_t mode,
+ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
unsigned int dev)
{
struct user_namespace *mnt_userns;
error = may_mknod(mode);
if (error)
- return error;
+ goto out1;
retry:
- dentry = user_path_create(dfd, filename, &path, lookup_flags);
+ dentry = __filename_create(dfd, name, &path, lookup_flags);
+ error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ goto out1;
if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
error = security_path_mknod(&path, dentry, mode, dev);
if (error)
- goto out;
+ goto out2;
mnt_userns = mnt_user_ns(path.mnt);
switch (mode & S_IFMT) {
dentry, mode, 0);
break;
}
- out:
+ out2:
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
+ out1:
+ putname(name);
return error;
}
SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
unsigned int, dev)
{
- return do_mknodat(dfd, filename, mode, dev);
+ return do_mknodat(dfd, getname(filename), mode, dev);
}
SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
{
- return do_mknodat(AT_FDCWD, filename, mode, dev);
+ return do_mknodat(AT_FDCWD, getname(filename), mode, dev);
}
/**
}
EXPORT_SYMBOL(vfs_mkdir);
- static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
+ int do_mkdirat(int dfd, struct filename *name, umode_t mode)
{
struct dentry *dentry;
struct path path;
unsigned int lookup_flags = LOOKUP_DIRECTORY;
retry:
- dentry = user_path_create(dfd, pathname, &path, lookup_flags);
+ dentry = __filename_create(dfd, name, &path, lookup_flags);
+ error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ goto out_putname;
if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
+ out_putname:
+ putname(name);
return error;
}
SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
{
- return do_mkdirat(dfd, pathname, mode);
+ return do_mkdirat(dfd, getname(pathname), mode);
}
SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
{
- return do_mkdirat(AT_FDCWD, pathname, mode);
+ return do_mkdirat(AT_FDCWD, getname(pathname), mode);
}
/**
}
EXPORT_SYMBOL(vfs_rmdir);
- long do_rmdir(int dfd, struct filename *name)
+ int do_rmdir(int dfd, struct filename *name)
{
struct user_namespace *mnt_userns;
- int error = 0;
+ int error;
struct dentry *dentry;
struct path path;
struct qstr last;
int type;
unsigned int lookup_flags = 0;
retry:
- name = filename_parentat(dfd, name, lookup_flags,
- &path, &last, &type);
- if (IS_ERR(name))
- return PTR_ERR(name);
+ error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
+ if (error)
+ goto exit1;
switch (type) {
case LAST_DOTDOT:
error = -ENOTEMPTY;
- goto exit1;
+ goto exit2;
case LAST_DOT:
error = -EINVAL;
- goto exit1;
+ goto exit2;
case LAST_ROOT:
error = -EBUSY;
- goto exit1;
+ goto exit2;
}
error = mnt_want_write(path.mnt);
if (error)
- goto exit1;
+ goto exit2;
inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- goto exit2;
+ goto exit3;
if (!dentry->d_inode) {
error = -ENOENT;
- goto exit3;
+ goto exit4;
}
error = security_path_rmdir(&path, dentry);
if (error)
- goto exit3;
+ goto exit4;
mnt_userns = mnt_user_ns(path.mnt);
error = vfs_rmdir(mnt_userns, path.dentry->d_inode, dentry);
- exit3:
+ exit4:
dput(dentry);
- exit2:
+ exit3:
inode_unlock(path.dentry->d_inode);
mnt_drop_write(path.mnt);
- exit1:
+ exit2:
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
+ exit1:
putname(name);
return error;
}
* writeout happening, and we don't want to prevent access to the directory
* while waiting on the I/O.
*/
- long do_unlinkat(int dfd, struct filename *name)
+ int do_unlinkat(int dfd, struct filename *name)
{
int error;
struct dentry *dentry;
struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
retry:
- name = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
- if (IS_ERR(name))
- return PTR_ERR(name);
+ error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
+ if (error)
+ goto exit1;
error = -EISDIR;
if (type != LAST_NORM)
- goto exit1;
+ goto exit2;
error = mnt_want_write(path.mnt);
if (error)
- goto exit1;
+ goto exit2;
retry_deleg:
inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
ihold(inode);
error = security_path_unlink(&path, dentry);
if (error)
- goto exit2;
+ goto exit3;
mnt_userns = mnt_user_ns(path.mnt);
error = vfs_unlink(mnt_userns, path.dentry->d_inode, dentry,
&delegated_inode);
- exit2:
+ exit3:
dput(dentry);
}
inode_unlock(path.dentry->d_inode);
goto retry_deleg;
}
mnt_drop_write(path.mnt);
- exit1:
+ exit2:
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
inode = NULL;
goto retry;
}
+ exit1:
putname(name);
return error;
error = -EISDIR;
else
error = -ENOTDIR;
- goto exit2;
+ goto exit3;
}
SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
}
EXPORT_SYMBOL(vfs_symlink);
- static long do_symlinkat(const char __user *oldname, int newdfd,
- const char __user *newname)
+ int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
{
int error;
- struct filename *from;
struct dentry *dentry;
struct path path;
unsigned int lookup_flags = 0;
- from = getname(oldname);
- if (IS_ERR(from))
- return PTR_ERR(from);
+ if (IS_ERR(from)) {
+ error = PTR_ERR(from);
+ goto out_putnames;
+ }
retry:
- dentry = user_path_create(newdfd, newname, &path, lookup_flags);
+ dentry = __filename_create(newdfd, to, &path, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- goto out_putname;
+ goto out_putnames;
error = security_path_symlink(&path, dentry, from->name);
if (!error) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
- out_putname:
+ out_putnames:
+ putname(to);
putname(from);
return error;
}
SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
int, newdfd, const char __user *, newname)
{
- return do_symlinkat(oldname, newdfd, newname);
+ return do_symlinkat(getname(oldname), newdfd, getname(newname));
}
SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
{
- return do_symlinkat(oldname, AT_FDCWD, newname);
+ return do_symlinkat(getname(oldname), AT_FDCWD, getname(newname));
}
/**
* with linux 2.0, and to avoid hard-linking to directories
* and other special files. --ADM
*/
- static int do_linkat(int olddfd, const char __user *oldname, int newdfd,
- const char __user *newname, int flags)
+ int do_linkat(int olddfd, struct filename *old, int newdfd,
+ struct filename *new, int flags)
{
struct user_namespace *mnt_userns;
struct dentry *new_dentry;
int how = 0;
int error;
- if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
- return -EINVAL;
+ if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) {
+ error = -EINVAL;
+ goto out_putnames;
+ }
/*
* To use null names we require CAP_DAC_READ_SEARCH
* This ensures that not everyone will be able to create
* handlink using the passed filedescriptor.
*/
- if (flags & AT_EMPTY_PATH) {
- if (!capable(CAP_DAC_READ_SEARCH))
- return -ENOENT;
- how = LOOKUP_EMPTY;
+ if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) {
+ error = -ENOENT;
+ goto out_putnames;
}
if (flags & AT_SYMLINK_FOLLOW)
how |= LOOKUP_FOLLOW;
retry:
- error = user_path_at(olddfd, oldname, how, &old_path);
+ error = __filename_lookup(olddfd, old, how, &old_path, NULL);
if (error)
- return error;
+ goto out_putnames;
- new_dentry = user_path_create(newdfd, newname, &new_path,
+ new_dentry = __filename_create(newdfd, new, &new_path,
(how & LOOKUP_REVAL));
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
- goto out;
+ goto out_putpath;
error = -EXDEV;
if (old_path.mnt != new_path.mnt)
how |= LOOKUP_REVAL;
goto retry;
}
- out:
+ out_putpath:
path_put(&old_path);
+ out_putnames:
+ putname(old);
+ putname(new);
return error;
}
SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname, int, flags)
{
- return do_linkat(olddfd, oldname, newdfd, newname, flags);
+ return do_linkat(olddfd, getname_uflags(oldname, flags),
+ newdfd, getname(newname), flags);
}
SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
{
- return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
+ return do_linkat(AT_FDCWD, getname(oldname), AT_FDCWD, getname(newname), 0);
}
/**
int error = -EINVAL;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
- goto put_both;
+ goto put_names;
if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
(flags & RENAME_EXCHANGE))
- goto put_both;
+ goto put_names;
if (flags & RENAME_EXCHANGE)
target_flags = 0;
retry:
- from = filename_parentat(olddfd, from, lookup_flags, &old_path,
+ error = __filename_parentat(olddfd, from, lookup_flags, &old_path,
&old_last, &old_type);
- if (IS_ERR(from)) {
- error = PTR_ERR(from);
- goto put_new;
- }
+ if (error)
+ goto put_names;
- to = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
+ error = __filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
&new_type);
- if (IS_ERR(to)) {
- error = PTR_ERR(to);
+ if (error)
goto exit1;
- }
error = -EXDEV;
if (old_path.mnt != new_path.mnt)
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
- put_both:
- if (!IS_ERR(from))
- putname(from);
- put_new:
- if (!IS_ERR(to))
- putname(to);
+ put_names:
+ putname(from);
+ putname(to);
return error;
}