void (*update_time)(struct inode *, struct timespec *, int);
int (*atomic_open)(struct inode *, struct dentry *,
struct file *, unsigned open_flag,
- umode_t create_mode, int *opened);
+ umode_t create_mode);
int (*tmpfile) (struct inode *, struct dentry *, umode_t);
locking rules:
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
- struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
- __poll_t (*poll_mask) (struct file *, __poll_t);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
};
locking rules:
- All except for ->poll_mask may block.
+ All may block.
->llseek() locking has moved from llseek to the individual llseek
implementations. If your fs is not using generic_file_llseek, you
the lease within the individual filesystem to record the result of the
operation
-->poll_mask can be called with or without the waitqueue lock for the waitqueue
-returned from ->get_poll_head.
-
--------------------------- dquot_operations -------------------------------
prototypes:
int (*write_dquot) (struct dquot *);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
void (*update_time)(struct inode *, struct timespec *, int);
int (*atomic_open)(struct inode *, struct dentry *, struct file *,
- unsigned open_flag, umode_t create_mode, int *opened);
+ unsigned open_flag, umode_t create_mode);
int (*tmpfile) (struct inode *, struct dentry *, umode_t);
};
atomic_open: called on the last component of an open. Using this optional
method the filesystem can look up, possibly create and open the file in
- one atomic operation. If it cannot perform this (e.g. the file type
- turned out to be wrong) it may signal this by returning 1 instead of
- usual 0 or -ve . This method is only called if the last component is
- negative or needs lookup. Cached positive dentries are still handled by
- f_op->open(). If the file was created, the FILE_CREATED flag should be
- set in "opened". In case of O_EXCL the method must only succeed if the
- file didn't exist and hence FILE_CREATED shall always be set on success.
+ one atomic operation. If it wants to leave actual opening to the
+ caller (e.g. if the file turned out to be a symlink, device, or just
+ something filesystem won't do atomic open for), it may signal this by
+ returning finish_no_open(file, dentry). This method is only called if
+ the last component is negative or needs lookup. Cached positive dentries
+ are still handled by f_op->open(). If the file was created,
+ FMODE_CREATED flag should be set in file->f_mode. In case of O_EXCL
+ the method must only succeed if the file didn't exist and hence FMODE_CREATED
+ shall always be set on success.
tmpfile: called in the end of O_TMPFILE open(). Optional, equivalent to
atomically creating, opening and unlinking a file in given directory.
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
- struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
- __poll_t (*poll_mask) (struct file *, __poll_t);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
activity on this file and (optionally) go to sleep until there
is activity. Called by the select(2) and poll(2) system calls
- get_poll_head: Returns the struct wait_queue_head that callers can
- wait on. Callers need to check the returned events using ->poll_mask
- once woken. Can return NULL to indicate polling is not supported,
- or any error code using the ERR_PTR convention to indicate that a
- grave error occured and ->poll_mask shall not be called.
-
- poll_mask: return the mask of EPOLL* values describing the file descriptor
- state. Called either before going to sleep on the waitqueue returned by
- get_poll_head, or after it has been woken. If ->get_poll_head and
- ->poll_mask are implemented ->poll does not need to be implement.
-
unlocked_ioctl: called by the ioctl(2) system call.
compat_ioctl: called by the ioctl(2) system call when 32 bit system calls
* Implements an efficient asynchronous io interface.
*
* Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved.
- * Copyright 2018 Christoph Hellwig.
*
* See ../COPYING for licensing terms.
*/
bool datasync;
};
-struct poll_iocb {
- struct file *file;
- __poll_t events;
- struct wait_queue_head *head;
-
- union {
- struct wait_queue_entry wait;
- struct work_struct work;
- };
-};
-
struct aio_kiocb {
union {
struct kiocb rw;
struct fsync_iocb fsync;
- struct poll_iocb poll;
};
struct kioctx *ki_ctx;
static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
{
- struct qstr this = QSTR_INIT("[aio]", 5);
struct file *file;
- struct path path;
struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
if (IS_ERR(inode))
return ERR_CAST(inode);
inode->i_mapping->private_data = ctx;
inode->i_size = PAGE_SIZE * nr_pages;
- path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
- if (!path.dentry) {
+ file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
+ O_RDWR, &aio_ring_fops);
+ if (IS_ERR(file))
iput(inode);
- return ERR_PTR(-ENOMEM);
- }
- path.mnt = mntget(aio_mnt);
-
- d_instantiate(path.dentry, inode);
- file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
- if (IS_ERR(file)) {
- path_put(&path);
- return file;
- }
-
- file->f_flags = O_RDWR;
return file;
}
static struct dentry *aio_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- static const struct dentry_operations ops = {
- .d_dname = simple_dname,
- };
- struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops,
+ struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL,
AIO_RING_MAGIC);
if (!IS_ERR(root))
if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
iocb->aio_rw_flags))
return -EINVAL;
+
req->file = fget(iocb->aio_fildes);
if (unlikely(!req->file))
return -EBADF;
return 0;
}
-/* need to use list_del_init so we can check if item was present */
-static inline bool __aio_poll_remove(struct poll_iocb *req)
-{
- if (list_empty(&req->wait.entry))
- return false;
- list_del_init(&req->wait.entry);
- return true;
-}
-
-static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
-{
- fput(iocb->poll.file);
- aio_complete(iocb, mangle_poll(mask), 0);
-}
-
-static void aio_poll_work(struct work_struct *work)
-{
- struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work);
-
- if (!list_empty_careful(&iocb->ki_list))
- aio_remove_iocb(iocb);
- __aio_poll_complete(iocb, iocb->poll.events);
-}
-
-static int aio_poll_cancel(struct kiocb *iocb)
-{
- struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
- struct poll_iocb *req = &aiocb->poll;
- struct wait_queue_head *head = req->head;
- bool found = false;
-
- spin_lock(&head->lock);
- found = __aio_poll_remove(req);
- spin_unlock(&head->lock);
-
- if (found) {
- req->events = 0;
- INIT_WORK(&req->work, aio_poll_work);
- schedule_work(&req->work);
- }
- return 0;
-}
-
-static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
- void *key)
-{
- struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
- struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
- struct file *file = req->file;
- __poll_t mask = key_to_poll(key);
-
- assert_spin_locked(&req->head->lock);
-
- /* for instances that support it check for an event match first: */
- if (mask && !(mask & req->events))
- return 0;
-
- mask = file->f_op->poll_mask(file, req->events) & req->events;
- if (!mask)
- return 0;
-
- __aio_poll_remove(req);
-
- /*
- * Try completing without a context switch if we can acquire ctx_lock
- * without spinning. Otherwise we need to defer to a workqueue to
- * avoid a deadlock due to the lock order.
- */
- if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
- list_del_init(&iocb->ki_list);
- spin_unlock(&iocb->ki_ctx->ctx_lock);
-
- __aio_poll_complete(iocb, mask);
- } else {
- req->events = mask;
- INIT_WORK(&req->work, aio_poll_work);
- schedule_work(&req->work);
- }
-
- return 1;
-}
-
-static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
-{
- struct kioctx *ctx = aiocb->ki_ctx;
- struct poll_iocb *req = &aiocb->poll;
- __poll_t mask;
-
- /* reject any unknown events outside the normal event mask. */
- if ((u16)iocb->aio_buf != iocb->aio_buf)
- return -EINVAL;
- /* reject fields that are not defined for poll */
- if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
- return -EINVAL;
-
- req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
- req->file = fget(iocb->aio_fildes);
- if (unlikely(!req->file))
- return -EBADF;
- if (!file_has_poll_mask(req->file))
- goto out_fail;
-
- req->head = req->file->f_op->get_poll_head(req->file, req->events);
- if (!req->head)
- goto out_fail;
- if (IS_ERR(req->head)) {
- mask = EPOLLERR;
- goto done;
- }
-
- init_waitqueue_func_entry(&req->wait, aio_poll_wake);
- aiocb->ki_cancel = aio_poll_cancel;
-
- spin_lock_irq(&ctx->ctx_lock);
- spin_lock(&req->head->lock);
- mask = req->file->f_op->poll_mask(req->file, req->events) & req->events;
- if (!mask) {
- __add_wait_queue(req->head, &req->wait);
- list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
- }
- spin_unlock(&req->head->lock);
- spin_unlock_irq(&ctx->ctx_lock);
-done:
- if (mask)
- __aio_poll_complete(aiocb, mask);
- return 0;
-out_fail:
- fput(req->file);
- return -EINVAL; /* same as no support for IOCB_CMD_POLL */
-}
-
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
bool compat)
{
case IOCB_CMD_FDSYNC:
ret = aio_fsync(&req->fsync, &iocb, true);
break;
- case IOCB_CMD_POLL:
- ret = aio_poll(req, &iocb);
- break;
default:
pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
ret = -EINVAL;
return ret;
}
+struct __aio_sigset {
+ const sigset_t __user *sigmask;
+ size_t sigsetsize;
+};
+
SYSCALL_DEFINE6(io_pgetevents,
aio_context_t, ctx_id,
long, min_nr,
bool truncate_op = (lend == LLONG_MAX);
memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
+ vma_init(&pseudo_vma, current->mm);
pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pagevec_init(&pvec);
next = start;
* as input to create an allocation policy.
*/
memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
+ vma_init(&pseudo_vma, mm);
pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pseudo_vma.vm_file = file;
return h - hstates;
}
- static const struct dentry_operations anon_ops = {
- .d_dname = simple_dname
- };
-
/*
* Note that size should be aligned to proper hugepage size in caller side,
* otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
vm_flags_t acctflag, struct user_struct **user,
int creat_flags, int page_size_log)
{
- struct file *file = ERR_PTR(-ENOMEM);
struct inode *inode;
- struct path path;
- struct super_block *sb;
- struct qstr quick_string;
+ struct vfsmount *mnt;
int hstate_idx;
+ struct file *file;
hstate_idx = get_hstate_idx(page_size_log);
if (hstate_idx < 0)
return ERR_PTR(-ENODEV);
*user = NULL;
- if (!hugetlbfs_vfsmount[hstate_idx])
+ mnt = hugetlbfs_vfsmount[hstate_idx];
+ if (!mnt)
return ERR_PTR(-ENOENT);
if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
}
}
- sb = hugetlbfs_vfsmount[hstate_idx]->mnt_sb;
- quick_string.name = name;
- quick_string.len = strlen(quick_string.name);
- quick_string.hash = 0;
- path.dentry = d_alloc_pseudo(sb, &quick_string);
- if (!path.dentry)
- goto out_shm_unlock;
-
- d_set_d_op(path.dentry, &anon_ops);
- path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]);
file = ERR_PTR(-ENOSPC);
- inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0);
+ inode = hugetlbfs_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0);
if (!inode)
- goto out_dentry;
+ goto out;
if (creat_flags == HUGETLB_SHMFS_INODE)
inode->i_flags |= S_PRIVATE;
- file = ERR_PTR(-ENOMEM);
- if (hugetlb_reserve_pages(inode, 0,
- size >> huge_page_shift(hstate_inode(inode)), NULL,
- acctflag))
- goto out_inode;
-
- d_instantiate(path.dentry, inode);
inode->i_size = size;
clear_nlink(inode);
- file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
- &hugetlbfs_file_operations);
- if (IS_ERR(file))
- goto out_dentry; /* inode is already attached */
-
- return file;
+ if (hugetlb_reserve_pages(inode, 0,
+ size >> huge_page_shift(hstate_inode(inode)), NULL,
+ acctflag))
+ file = ERR_PTR(-ENOMEM);
+ else
+ file = alloc_file_pseudo(inode, mnt, name, O_RDWR,
+ &hugetlbfs_file_operations);
+ if (!IS_ERR(file))
+ return file;
- out_inode:
iput(inode);
- out_dentry:
- path_put(&path);
- out_shm_unlock:
+ out:
if (*user) {
user_shm_unlock(size, *user);
*user = NULL;
}
}
if (opened && opendata->file_created)
- *opened |= FILE_CREATED;
+ *opened = 1;
if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) {
*ctx_th = opendata->f_attr.mdsthreshold;
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
struct inode *inode = calldata->inode;
+ struct pnfs_layout_hdr *lo;
bool is_rdonly, is_wronly, is_rdwr;
int call_close = 0;
goto out_wait;
}
+ lo = calldata->arg.lr_args ? calldata->arg.lr_args->layout : NULL;
+ if (lo && !pnfs_layout_is_valid(lo)) {
+ calldata->arg.lr_args = NULL;
+ calldata->res.lr_res = NULL;
+ }
+
if (calldata->arg.fmode == 0)
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
{
struct nfs4_delegreturndata *d_data;
+ struct pnfs_layout_hdr *lo;
d_data = (struct nfs4_delegreturndata *)data;
if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task))
return;
+ lo = d_data->args.lr_args ? d_data->args.lr_args->layout : NULL;
+ if (lo && !pnfs_layout_is_valid(lo)) {
+ d_data->args.lr_args = NULL;
+ d_data->res.lr_res = NULL;
+ }
+
nfs4_setup_sequence(d_data->res.server->nfs_client,
&d_data->args.seq_args,
&d_data->res.seq_res,
if (data->arg.new_lock && !data->cancelled) {
data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
- break;
+ goto out_restart;
}
-
if (data->arg.new_lock_owner != 0) {
nfs_confirm_seqid(&lsp->ls_seqid, 0);
nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
- goto out_done;
- } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid))
- goto out_done;
-
+ } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
+ goto out_restart;
break;
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
if (data->arg.new_lock_owner != 0) {
- if (nfs4_stateid_match(&data->arg.open_stateid,
+ if (!nfs4_stateid_match(&data->arg.open_stateid,
&lsp->ls_state->open_stateid))
- goto out_done;
- } else if (nfs4_stateid_match(&data->arg.lock_stateid,
+ goto out_restart;
+ } else if (!nfs4_stateid_match(&data->arg.lock_stateid,
&lsp->ls_stateid))
- goto out_done;
+ goto out_restart;
}
- if (!data->cancelled)
- rpc_restart_call_prepare(task);
out_done:
dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
+ return;
+out_restart:
+ if (!data->cancelled)
+ rpc_restart_call_prepare(task);
+ goto out_done;
}
static void nfs4_lock_release(void *calldata)
dprintk("%s: begin!\n", __func__);
nfs_free_seqid(data->arg.open_seqid);
- if (data->cancelled) {
+ if (data->cancelled && data->rpc_status == 0) {
struct rpc_task *task;
task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
data->arg.lock_seqid);
dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
+ nfs4_sequence_free_slot(&lgp->res.seq_res);
+
switch (nfs4err) {
case 0:
goto out;
goto out;
}
- nfs4_sequence_free_slot(&lgp->res.seq_res);
err = nfs4_handle_exception(server, nfs4err, exception);
if (!status) {
if (exception->retry)
if (IS_ERR(task))
return ERR_CAST(task);
status = rpc_wait_for_completion_task(task);
- if (status == 0) {
+ if (status != 0)
+ goto out;
+
+ /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
+ if (task->tk_status < 0 || lgp->res.layoutp->len == 0) {
status = nfs4_layoutget_handle_exception(task, lgp, &exception);
*timeout = exception.timeout;
- }
-
+ } else
+ lseg = pnfs_layout_process(lgp);
+out:
trace_nfs4_layoutget(lgp->args.ctx,
&lgp->args.range,
&lgp->res.range,
&lgp->res.stateid,
status);
- /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
- if (status == 0 && lgp->res.layoutp->len)
- lseg = pnfs_layout_process(lgp);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
if (status)
&lrp->args.seq_args,
&lrp->res.seq_res,
task);
+ if (!pnfs_layout_is_valid(lrp->args.layout))
+ rpc_exit(task, 0);
}
static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
}
}
-static struct wait_queue_head *
-pipe_get_poll_head(struct file *filp, __poll_t events)
-{
- struct pipe_inode_info *pipe = filp->private_data;
-
- return &pipe->wait;
-}
-
/* No kernel lock held - fine */
-static __poll_t pipe_poll_mask(struct file *filp, __poll_t events)
+static __poll_t
+pipe_poll(struct file *filp, poll_table *wait)
{
+ __poll_t mask;
struct pipe_inode_info *pipe = filp->private_data;
- int nrbufs = pipe->nrbufs;
- __poll_t mask = 0;
+ int nrbufs;
+
+ poll_wait(filp, &pipe->wait, wait);
/* Reading only -- no need for acquiring the semaphore. */
+ nrbufs = pipe->nrbufs;
+ mask = 0;
if (filp->f_mode & FMODE_READ) {
mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0;
if (!pipe->writers && filp->f_version != pipe->w_counter)
int create_pipe_files(struct file **res, int flags)
{
- int err;
struct inode *inode = get_pipe_inode();
struct file *f;
- struct path path;
if (!inode)
return -ENFILE;
- err = -ENOMEM;
- path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &empty_name);
- if (!path.dentry)
- goto err_inode;
- path.mnt = mntget(pipe_mnt);
-
- d_instantiate(path.dentry, inode);
-
- f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
+ f = alloc_file_pseudo(inode, pipe_mnt, "",
+ O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
+ &pipefifo_fops);
if (IS_ERR(f)) {
- err = PTR_ERR(f);
- goto err_dentry;
+ free_pipe_info(inode->i_pipe);
+ iput(inode);
+ return PTR_ERR(f);
}
- f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
f->private_data = inode->i_pipe;
- res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
+ res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
+ &pipefifo_fops);
if (IS_ERR(res[0])) {
- err = PTR_ERR(res[0]);
- goto err_file;
+ put_pipe_info(inode, inode->i_pipe);
+ fput(f);
+ return PTR_ERR(res[0]);
}
-
- path_get(&path);
res[0]->private_data = inode->i_pipe;
- res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
res[1] = f;
return 0;
-
- err_file:
- put_filp(f);
- err_dentry:
- free_pipe_info(inode->i_pipe);
- path_put(&path);
- return err;
-
- err_inode:
- free_pipe_info(inode->i_pipe);
- iput(inode);
- return err;
}
static int __do_pipe_flags(int *fd, struct file **files, int flags)
.llseek = no_llseek,
.read_iter = pipe_read,
.write_iter = pipe_write,
- .get_poll_head = pipe_get_poll_head,
- .poll_mask = pipe_poll_mask,
+ .poll = pipe_poll,
.unlocked_ioctl = pipe_ioctl,
.release = pipe_release,
.fasync = pipe_fasync,
/* Has write method(s) */
#define FMODE_CAN_WRITE ((__force fmode_t)0x40000)
+ #define FMODE_OPENED ((__force fmode_t)0x80000)
+ #define FMODE_CREATED ((__force fmode_t)0x100000)
+
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
- struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
- __poll_t (*poll_mask) (struct file *, __poll_t);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
int (*update_time)(struct inode *, struct timespec64 *, int);
int (*atomic_open)(struct inode *, struct dentry *,
struct file *, unsigned open_flag,
- umode_t create_mode, int *opened);
+ umode_t create_mode);
int (*tmpfile) (struct inode *, struct dentry *, umode_t);
int (*set_acl)(struct inode *, struct posix_acl *, int);
} ____cacheline_aligned;
extern struct file *file_open_root(struct dentry *, struct vfsmount *,
const char *, int, umode_t);
extern struct file * dentry_open(const struct path *, int, const struct cred *);
- extern struct file *filp_clone_open(struct file *);
+ static inline struct file *file_clone_open(struct file *file)
+ {
+ return dentry_open(&file->f_path, file->f_flags, file->f_cred);
+ }
extern int filp_close(struct file *, fl_owner_t id);
extern struct filename *getname_flags(const char __user *, int, int *);
extern struct filename *getname_kernel(const char *);
extern void putname(struct filename *name);
- enum {
- FILE_CREATED = 1,
- FILE_OPENED = 2
- };
extern int finish_open(struct file *file, struct dentry *dentry,
- int (*open)(struct inode *, struct file *),
- int *opened);
+ int (*open)(struct inode *, struct file *));
extern int finish_no_open(struct file *file, struct dentry *dentry);
/* fs/ioctl.c */
return 0;
}
+static unsigned long shm_pagesize(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+ struct shm_file_data *sfd = shm_file_data(file);
+
+ if (sfd->vm_ops->pagesize)
+ return sfd->vm_ops->pagesize(vma);
+
+ return PAGE_SIZE;
+}
+
#ifdef CONFIG_NUMA
static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
{
.close = shm_close, /* callback for when the vm-area is released */
.fault = shm_fault,
.split = shm_split,
+ .pagesize = shm_pagesize,
#if defined(CONFIG_NUMA)
.set_policy = shm_set_policy,
.get_policy = shm_get_policy,
struct shmid_kernel *shp;
unsigned long addr = (unsigned long)shmaddr;
unsigned long size;
- struct file *file;
+ struct file *file, *base;
int err;
unsigned long flags = MAP_SHARED;
unsigned long prot;
int acc_mode;
struct ipc_namespace *ns;
struct shm_file_data *sfd;
- struct path path;
- fmode_t f_mode;
+ int f_flags;
unsigned long populate = 0;
err = -EINVAL;
if (shmflg & SHM_RDONLY) {
prot = PROT_READ;
acc_mode = S_IRUGO;
- f_mode = FMODE_READ;
+ f_flags = O_RDONLY;
} else {
prot = PROT_READ | PROT_WRITE;
acc_mode = S_IRUGO | S_IWUGO;
- f_mode = FMODE_READ | FMODE_WRITE;
+ f_flags = O_RDWR;
}
if (shmflg & SHM_EXEC) {
prot |= PROT_EXEC;
goto out_unlock;
}
- path = shp->shm_file->f_path;
- path_get(&path);
+ /*
+ * We need to take a reference to the real shm file to prevent the
+ * pointer from becoming stale in cases where the lifetime of the outer
+ * file extends beyond that of the shm segment. It's not usually
+ * possible, but it can happen during remap_file_pages() emulation as
+ * that unmaps the memory, then does ->mmap() via file reference only.
+ * We'll deny the ->mmap() if the shm segment was since removed, but to
+ * detect shm ID reuse we need to compare the file pointers.
+ */
+ base = get_file(shp->shm_file);
shp->shm_nattch++;
- size = i_size_read(d_inode(path.dentry));
+ size = i_size_read(file_inode(base));
ipc_unlock_object(&shp->shm_perm);
rcu_read_unlock();
err = -ENOMEM;
sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
if (!sfd) {
- path_put(&path);
+ fput(base);
goto out_nattch;
}
- file = alloc_file(&path, f_mode,
- is_file_hugepages(shp->shm_file) ?
+ file = alloc_file_clone(base, f_flags,
+ is_file_hugepages(base) ?
&shm_file_operations_huge :
&shm_file_operations);
err = PTR_ERR(file);
if (IS_ERR(file)) {
kfree(sfd);
- path_put(&path);
+ fput(base);
goto out_nattch;
}
- file->private_data = sfd;
- file->f_mapping = shp->shm_file->f_mapping;
sfd->id = shp->shm_perm.id;
sfd->ns = get_ipc_ns(ns);
- /*
- * We need to take a reference to the real shm file to prevent the
- * pointer from becoming stale in cases where the lifetime of the outer
- * file extends beyond that of the shm segment. It's not usually
- * possible, but it can happen during remap_file_pages() emulation as
- * that unmaps the memory, then does ->mmap() via file reference only.
- * We'll deny the ->mmap() if the shm segment was since removed, but to
- * detect shm ID reuse we need to compare the file pointers.
- */
- sfd->file = get_file(shp->shm_file);
+ sfd->file = base;
sfd->vm_ops = NULL;
+ file->private_data = sfd;
err = security_mmap_file(file, prot, flags);
if (err)
{
/* Create a pseudo vma that just contains the policy */
memset(vma, 0, sizeof(*vma));
+ vma_init(vma, NULL);
/* Bias interleave by inode number to distribute better across nodes */
vma->vm_pgoff = index + info->vfs_inode.i_ino;
vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index);
/* common code */
- static const struct dentry_operations anon_ops = {
- .d_dname = simple_dname
- };
-
static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size,
unsigned long flags, unsigned int i_flags)
{
- struct file *res;
struct inode *inode;
- struct path path;
- struct super_block *sb;
- struct qstr this;
+ struct file *res;
if (IS_ERR(mnt))
return ERR_CAST(mnt);
if (shmem_acct_size(flags, size))
return ERR_PTR(-ENOMEM);
- res = ERR_PTR(-ENOMEM);
- this.name = name;
- this.len = strlen(name);
- this.hash = 0; /* will go */
- sb = mnt->mnt_sb;
- path.mnt = mntget(mnt);
- path.dentry = d_alloc_pseudo(sb, &this);
- if (!path.dentry)
- goto put_memory;
- d_set_d_op(path.dentry, &anon_ops);
-
- res = ERR_PTR(-ENOSPC);
- inode = shmem_get_inode(sb, NULL, S_IFREG | 0777, 0, flags);
- if (!inode)
- goto put_memory;
-
+ inode = shmem_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0,
+ flags);
+ if (unlikely(!inode)) {
+ shmem_unacct_size(flags, size);
+ return ERR_PTR(-ENOSPC);
+ }
inode->i_flags |= i_flags;
- d_instantiate(path.dentry, inode);
inode->i_size = size;
clear_nlink(inode); /* It is unlinked */
res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
+ if (!IS_ERR(res))
+ res = alloc_file_pseudo(inode, mnt, name, O_RDWR,
+ &shmem_file_operations);
if (IS_ERR(res))
- goto put_path;
-
- res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
- &shmem_file_operations);
- if (IS_ERR(res))
- goto put_path;
-
- return res;
-
- put_memory:
- shmem_unacct_size(flags, size);
- put_path:
- path_put(&path);
+ iput(inode);
return res;
}
#include <linux/magic.h>
#include <linux/slab.h>
#include <linux/xattr.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
static int sock_mmap(struct file *file, struct vm_area_struct *vma);
static int sock_close(struct inode *inode, struct file *file);
-static struct wait_queue_head *sock_get_poll_head(struct file *file,
- __poll_t events);
-static __poll_t sock_poll_mask(struct file *file, __poll_t);
-static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait);
+static __poll_t sock_poll(struct file *file,
+ struct poll_table_struct *wait);
static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
static long compat_sock_ioctl(struct file *file,
.llseek = no_llseek,
.read_iter = sock_read_iter,
.write_iter = sock_write_iter,
- .get_poll_head = sock_get_poll_head,
- .poll_mask = sock_poll_mask,
.poll = sock_poll,
.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{
- struct qstr name = { .name = "" };
- struct path path;
struct file *file;
- if (dname) {
- name.name = dname;
- name.len = strlen(name.name);
- } else if (sock->sk) {
- name.name = sock->sk->sk_prot_creator->name;
- name.len = strlen(name.name);
- }
- path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
- if (unlikely(!path.dentry)) {
- sock_release(sock);
- return ERR_PTR(-ENOMEM);
- }
- path.mnt = mntget(sock_mnt);
-
- d_instantiate(path.dentry, SOCK_INODE(sock));
+ if (!dname)
+ dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
- file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
- &socket_file_ops);
+ file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
+ O_RDWR | (flags & O_NONBLOCK),
+ &socket_file_ops);
if (IS_ERR(file)) {
- /* drop dentry, keep inode for a bit */
- ihold(d_inode(path.dentry));
- path_put(&path);
- /* ... and now kill it properly */
sock_release(sock);
return file;
}
sock->file = file;
- file->f_flags = O_RDWR | (flags & O_NONBLOCK);
file->private_data = sock;
return file;
}
}
EXPORT_SYMBOL(sock_create_lite);
-static struct wait_queue_head *sock_get_poll_head(struct file *file,
- __poll_t events)
-{
- struct socket *sock = file->private_data;
-
- if (!sock->ops->poll_mask)
- return NULL;
- sock_poll_busy_loop(sock, events);
- return sk_sleep(sock->sk);
-}
-
-static __poll_t sock_poll_mask(struct file *file, __poll_t events)
-{
- struct socket *sock = file->private_data;
-
- /*
- * We need to be sure we are in sync with the socket flags modification.
- *
- * This memory barrier is paired in the wq_has_sleeper.
- */
- smp_mb();
-
- /* this socket can poll_ll so tell the system call */
- return sock->ops->poll_mask(sock, events) |
- (sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0);
-}
-
/* No kernel lock held - perfect */
static __poll_t sock_poll(struct file *file, poll_table *wait)
{
struct socket *sock = file->private_data;
- __poll_t events = poll_requested_events(wait), mask = 0;
-
- if (sock->ops->poll) {
- sock_poll_busy_loop(sock, events);
- mask = sock->ops->poll(file, sock, wait);
- } else if (sock->ops->poll_mask) {
- sock_poll_wait(file, sock_get_poll_head(file, events), wait);
- mask = sock->ops->poll_mask(sock, events);
- }
+ __poll_t events = poll_requested_events(wait);
- return mask | sock_poll_busy_flag(sock);
+ sock_poll_busy_loop(sock, events);
+ if (!sock->ops->poll)
+ return 0;
+ return sock->ops->poll(file, sock, wait) | sock_poll_busy_flag(sock);
}
static int sock_mmap(struct file *file, struct vm_area_struct *vma)
if (call < 1 || call > SYS_SENDMMSG)
return -EINVAL;
+ call = array_index_nospec(call, SYS_SENDMMSG + 1);
len = nargs[call];
if (len > sizeof(a))
bool sock_is_registered(int family)
{
- return family < NPROTO && rcu_access_pointer(net_families[family]);
+ return family < NPROTO &&
+ rcu_access_pointer(net_families[array_index_nospec(family, NPROTO)]);
}
static int __init sock_init(void)
*
* Returns 0
*/
- static int smack_file_open(struct file *file, const struct cred *cred)
+ static int smack_file_open(struct file *file)
{
- struct task_smack *tsp = cred->security;
+ struct task_smack *tsp = file->f_cred->security;
struct inode *inode = file_inode(file);
struct smk_audit_info ad;
int rc;
smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
smk_ad_setfield_u_fs_path(&ad, file->f_path);
rc = smk_tskacc(tsp, smk_of_inode(inode), MAY_READ, &ad);
- rc = smk_bu_credfile(cred, file, MAY_READ, rc);
+ rc = smk_bu_credfile(file->f_cred, file, MAY_READ, rc);
return rc;
}
struct smack_known *skp = smk_of_task_struct(p);
isp->smk_inode = skp;
+ isp->smk_flags |= SMK_INODE_INSTANT;
}
/*