A while ago it was reported that posix file locking goes wrong when a
multi-threaded process calls exec. I looked into the history and this
is definitely a regression, that should be fixed if we can.
This set of changes cleanups of the code in exec so hopefully this code
will not regress again. Then it adds helpers and fixes the users of
files_struct so the reference count is only incremented if COPY_FILES is
passed to clone (or if io_uring takes a reference). Then it removes
helpers (get_files_struct, __install_fd, __alloc_fd, __close_fd) that
are no longer needed and if used would encourage code that increments
the count of files_struct somewhere besides in clone when COPY_FILES is
passed.
In addition to fixing the bug in exec and simplifing the code this set
of changes by virtue of getting files_struct.count correct it optimizes
fdget. With proc and other places not temporarily increasing the count
on files_struct __fget_light should succeed more often in being able to
return a struct file without touching it's reference count.
Fixing the count in files_struct was suggested by Oleg[1].
For those that are interested in the history of this issue I have
included as much of it as I could find in the first change.
Since v1:
- Renamed the functions
__fcheck_files -> files_lookup_fd_raw
fcheck_files -> files_lookup_fd_locked
fcheck_files -> files_lookup_fd_rcu
fcheck_files -> lookup_fd_rcu
fcheck_task -> task_lookup_fd_rcu
fnext_task -> task_lookup_next_fd_rcu
__close_fd_get_file -> close_fd_get_file
- Simplified get_file_raw_ptr
- Removed ksys_close
- Examined the penalty for taking task_lock. The helper
task_lookup_next_fd_rcu takes task_lock each iteration. Concern was
expressed that this might be a problem. The function tid_fd_mode
isn called from tid_fd_revalidate which is called when ever a file
descriptor file is stat'ed, opened, or otherwise accessed. The
function tid_fd_mode histrocally called get_files_struct which took
and dropped task_lock. So the volume of task_lock calls is already
proportional to the number of file descriptors. A micro benchmark
did not see the move to task_lookup_next_fd_rcu making a difference
in performance. Which suggests that the change to taking the task
lock for every file descriptor found in task_lookup_next_fd will not
be a problem.
- Reviewed the code for conflicts with io_uring (especially the
removal of get_files_struct). To my surprise no conflicts were
found as io_uring does not use standard helpers but instead rolls
it's own version of get_files_struct by hand.
Documentation/filesystems/files.rst | 8 +-
arch/powerpc/platforms/cell/spufs/coredump.c | 2 +-
drivers/android/binder.c | 2 +-
fs/autofs/dev-ioctl.c | 5 +-
fs/coredump.c | 5 +-
fs/exec.c | 29 +++----
fs/file.c | 124 +++++++++++++--------------
fs/io_uring.c | 2 +-
fs/locks.c | 14 +--
fs/notify/dnotify/dnotify.c | 2 +-
fs/open.c | 2 +-
fs/proc/fd.c | 48 ++++-------
include/linux/fdtable.h | 40 +++++----
include/linux/syscalls.h | 12 ---
kernel/bpf/syscall.c | 20 +----
kernel/bpf/task_iter.c | 44 +++-------
kernel/fork.c | 12 +--
kernel/kcmp.c | 29 ++-----
18 files changed, 153 insertions(+), 247 deletions(-)
Eric W. Biederman (25):
exec: Don't open code get_close_on_exec
exec: Move unshare_files to fix posix file locking during exec
exec: Simplify unshare_files
exec: Remove reset_files_struct
kcmp: In kcmp_epoll_target use fget_task
bpf: In bpf_task_fd_query use fget_task
proc/fd: In proc_fd_link use fget_task
file: Rename __fcheck_files to files_lookup_fd_raw
file: Factor files_lookup_fd_locked out of fcheck_files
file: Replace fcheck_files with files_lookup_fd_rcu
file: Rename fcheck lookup_fd_rcu
file: Implement task_lookup_fd_rcu
proc/fd: In tid_fd_mode use task_lookup_fd_rcu
kcmp: In get_file_raw_ptr use task_lookup_fd_rcu
file: Implement task_lookup_next_fd_rcu
proc/fd: In proc_readfd_common use task_lookup_next_fd_rcu
bpf/task_iter: In task_file_seq_get_next use task_lookup_next_fd_rcu
proc/fd: In fdinfo seq_show don't use get_files_struct
file: Merge __fd_install into fd_install
file: In f_dupfd read RLIMIT_NOFILE once.
file: Merge __alloc_fd into alloc_fd
file: Rename __close_fd to close_fd and remove the files parameter
file: Replace ksys_close with close_fd
file: Rename __close_fd_get_file close_fd_get_file
file: Remove get_files_struct
[1] https://lkml.kernel.org/r/
20180915160423.GA31461@redhat.com
v1: https://lkml.kernel.org/r/87ft8l6ic3.fsf@x220.int.ebiederm.org
Reported-by: Jeff Layton <jlayton@redhat.com>
Reported-by: Daniel P. Berrangé <berrange@redhat.com>
Suggested-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lkml.kernel.org/r/87r1on1v62.fsf@x220.int.ebiederm.org
Link: https://lists.openvz.org/pipermail/criu/2020-November/045123.html
Link: https://marc.info/?l=openvz-criu&m=160591423214257
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
be held.
4. To look up the file structure given an fd, a reader
- must use either fcheck() or fcheck_files() APIs. These
+ must use either lookup_fd_rcu() or files_lookup_fd_rcu() APIs. These
take care of barrier requirements due to lock-free lookup.
An example::
struct file *file;
rcu_read_lock();
- file = fcheck(fd);
+ file = lookup_fd_rcu(fd);
if (file) {
...
}
on ->f_count::
rcu_read_lock();
- file = fcheck_files(files, fd);
+ file = files_lookup_fd_rcu(files, fd);
if (file) {
if (atomic_long_inc_not_zero(&file->f_count))
*fput_needed = 1;
lock-free, they must be installed using rcu_assign_pointer()
API. If they are looked up lock-free, rcu_dereference()
must be used. However it is advisable to use files_fdtable()
- and fcheck()/fcheck_files() which take care of these issues.
+ and lookup_fd_rcu()/files_lookup_fd_rcu() which take care of these issues.
7. While updating, the fdtable pointer must be looked up while
holding files->file_lock. If ->file_lock is dropped, then
*fd = n - 1;
rcu_read_lock();
- file = fcheck(*fd);
+ file = lookup_fd_rcu(*fd);
ctx = SPUFS_I(file_inode(file))->i_ctx;
get_spu_context(ctx);
rcu_read_unlock();
if (!twcb)
return;
init_task_work(&twcb->twork, binder_do_fd_close);
- __close_fd_get_file(fd, &twcb->file);
+ close_fd_get_file(fd, &twcb->file);
if (twcb->file) {
filp_close(twcb->file, current->files);
task_work_add(current, &twcb->twork, TWA_RESUME);
* Copyright 2008 Ian Kent <raven@themaw.net>
*/
+#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/compat.h>
-#include <linux/syscalls.h>
+#include <linux/fdtable.h>
#include <linux/magic.h>
#include <linux/nospec.h>
struct autofs_sb_info *sbi,
struct autofs_dev_ioctl *param)
{
- return ksys_close(param->ioctlfd);
+ return close_fd(param->ioctlfd);
}
/*
int ispipe;
size_t *argv = NULL;
int argc = 0;
- struct files_struct *displaced;
/* require nonrelative corefile path and be extra careful */
bool need_suid_safe = false;
bool core_dumped = false;
}
/* get us an unshared descriptor table; almost always a no-op */
- retval = unshare_files(&displaced);
+ retval = unshare_files();
if (retval)
goto close_fail;
- if (displaced)
- put_files_struct(displaced);
if (!dump_interrupted()) {
/*
* umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
if (retval)
goto out;
+ /* Ensure the files table is not shared. */
+ retval = unshare_files();
+ if (retval)
+ goto out;
+
/*
* Must be called _before_ exec_mmap() as bprm->mm is
* not visibile until then. This also enables the update
int fd, struct filename *filename, int flags)
{
struct file *file;
- struct files_struct *displaced;
int retval;
/*
*/
io_uring_task_cancel();
- retval = unshare_files(&displaced);
- if (retval)
- return retval;
-
retval = prepare_bprm_creds(bprm);
if (retval)
- goto out_files;
+ return retval;
check_unsafe_exec(bprm);
current->in_execve = 1;
bprm->file = file;
/*
* Record that a name derived from an O_CLOEXEC fd will be
- * inaccessible after exec. Relies on having exclusive access to
- * current->files (due to unshare_files above).
+ * inaccessible after exec. This allows the code in exec to
+ * choose to fail when the executable is not mmaped into the
+ * interpreter and an open file descriptor is not passed to
+ * the interpreter. This makes for a better user experience
+ * than having the interpreter start and then immediately fail
+ * when it finds the executable is inaccessible.
*/
- if (bprm->fdpath &&
- close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
+ if (bprm->fdpath && get_close_on_exec(fd))
bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
/* Set the unchanging part of bprm->cred */
rseq_execve(current);
acct_update_integrals(current);
task_numa_free(current, false);
- if (displaced)
- put_files_struct(displaced);
return retval;
out:
current->fs->in_exec = 0;
current->in_execve = 0;
-out_files:
- if (displaced)
- reset_files_struct(displaced);
-
return retval;
}
spin_unlock(&files->file_lock);
new_fdt = alloc_fdtable(nr);
- /* make sure all __fd_install() have seen resize_in_progress
+ /* make sure all fd_install() have seen resize_in_progress
* or have finished their rcu_read_lock_sched() section.
*/
if (atomic_read(&files->count) > 1)
rcu_assign_pointer(files->fdt, new_fdt);
if (cur_fdt != &files->fdtab)
call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
- /* coupled with smp_rmb() in __fd_install() */
+ /* coupled with smp_rmb() in fd_install() */
smp_wmb();
return 1;
}
return fdt;
}
-struct files_struct *get_files_struct(struct task_struct *task)
-{
- struct files_struct *files;
-
- task_lock(task);
- files = task->files;
- if (files)
- atomic_inc(&files->count);
- task_unlock(task);
-
- return files;
-}
-
void put_files_struct(struct files_struct *files)
{
if (atomic_dec_and_test(&files->count)) {
}
}
-void reset_files_struct(struct files_struct *files)
-{
- struct task_struct *tsk = current;
- struct files_struct *old;
-
- old = tsk->files;
- task_lock(tsk);
- tsk->files = files;
- task_unlock(tsk);
- put_files_struct(old);
-}
-
void exit_files(struct task_struct *tsk)
{
struct files_struct * files = tsk->files;
/*
* allocate a file descriptor, mark it busy.
*/
-int __alloc_fd(struct files_struct *files,
- unsigned start, unsigned end, unsigned flags)
+static int alloc_fd(unsigned start, unsigned end, unsigned flags)
{
+ struct files_struct *files = current->files;
unsigned int fd;
int error;
struct fdtable *fdt;
return error;
}
-static int alloc_fd(unsigned start, unsigned flags)
-{
- return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
-}
-
int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
{
- return __alloc_fd(current->files, 0, nofile, flags);
+ return alloc_fd(0, nofile, flags);
}
int get_unused_fd_flags(unsigned flags)
* It should never happen - if we allow dup2() do it, _really_ bad things
* will follow.
*
- * NOTE: __fd_install() variant is really, really low-level; don't
- * use it unless you are forced to by truly lousy API shoved down
- * your throat. 'files' *MUST* be either current->files or obtained
- * by get_files_struct(current) done by whoever had given it to you,
- * or really bad things will happen. Normally you want to use
- * fd_install() instead.
+ * This consumes the "file" refcount, so callers should treat it
+ * as if they had called fput(file).
*/
-void __fd_install(struct files_struct *files, unsigned int fd,
- struct file *file)
+void fd_install(unsigned int fd, struct file *file)
{
+ struct files_struct *files = current->files;
struct fdtable *fdt;
rcu_read_lock_sched();
rcu_read_unlock_sched();
}
-/*
- * This consumes the "file" refcount, so callers should treat it
- * as if they had called fput(file).
- */
-void fd_install(unsigned int fd, struct file *file)
-{
- __fd_install(current->files, fd, file);
-}
-
EXPORT_SYMBOL(fd_install);
static struct file *pick_file(struct files_struct *files, unsigned fd)
return file;
}
-/*
- * The same warnings as for __alloc_fd()/__fd_install() apply here...
- */
-int __close_fd(struct files_struct *files, unsigned fd)
+int close_fd(unsigned fd)
{
+ struct files_struct *files = current->files;
struct file *file;
file = pick_file(files, fd);
return filp_close(file, files);
}
-EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
+EXPORT_SYMBOL(close_fd); /* for ksys_close() */
/**
* __close_range() - Close all file descriptors in a given range.
}
/*
- * variant of __close_fd that gets a ref on the file for later fput.
+ * variant of close_fd that gets a ref on the file for later fput.
* The caller must ensure that filp_close() called on the file, and then
* an fput().
*/
-int __close_fd_get_file(unsigned int fd, struct file **res)
+int close_fd_get_file(unsigned int fd, struct file **res)
{
struct files_struct *files = current->files;
struct file *file;
rcu_read_lock();
loop:
- file = fcheck_files(files, fd);
+ file = files_lookup_fd_rcu(files, fd);
if (file) {
/* File object ref couldn't be taken.
* dup2() atomicity guarantee is the reason
return file;
}
+struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd)
+{
+ /* Must be called with rcu_read_lock held */
+ struct files_struct *files;
+ struct file *file = NULL;
+
+ task_lock(task);
+ files = task->files;
+ if (files)
+ file = files_lookup_fd_rcu(files, fd);
+ task_unlock(task);
+
+ return file;
+}
+
+struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret_fd)
+{
+ /* Must be called with rcu_read_lock held */
+ struct files_struct *files;
+ unsigned int fd = *ret_fd;
+ struct file *file = NULL;
+
+ task_lock(task);
+ files = task->files;
+ if (files) {
+ for (; fd < files_fdtable(files)->max_fds; fd++) {
+ file = files_lookup_fd_rcu(files, fd);
+ if (file)
+ break;
+ }
+ }
+ task_unlock(task);
+ *ret_fd = fd;
+ return file;
+}
+
/*
* Lightweight file lookup - no refcnt increment if fd table isn't shared.
*
struct file *file;
if (atomic_read(&files->count) == 1) {
- file = __fcheck_files(files, fd);
+ file = files_lookup_fd_raw(files, fd);
if (!file || unlikely(file->f_mode & mask))
return 0;
return (unsigned long)file;
struct files_struct *files = current->files;
if (!file)
- return __close_fd(files, fd);
+ return close_fd(fd);
if (fd >= rlimit(RLIMIT_NOFILE))
return -EBADF;
spin_lock(&files->file_lock);
err = expand_files(files, newfd);
- file = fcheck(oldfd);
+ file = files_lookup_fd_locked(files, oldfd);
if (unlikely(!file))
goto Ebadf;
if (unlikely(err < 0)) {
int retval = oldfd;
rcu_read_lock();
- if (!fcheck_files(files, oldfd))
+ if (!files_lookup_fd_rcu(files, oldfd))
retval = -EBADF;
rcu_read_unlock();
return retval;
int f_dupfd(unsigned int from, struct file *file, unsigned flags)
{
+ unsigned long nofile = rlimit(RLIMIT_NOFILE);
int err;
- if (from >= rlimit(RLIMIT_NOFILE))
+ if (from >= nofile)
return -EINVAL;
- err = alloc_fd(from, flags);
+ err = alloc_fd(from, nofile, flags);
if (err >= 0) {
get_file(file);
fd_install(err, file);
/* might be already done during nonblock submission */
if (!close->put_file) {
- ret = __close_fd_get_file(close->fd, &close->put_file);
+ ret = close_fd_get_file(close->fd, &close->put_file);
if (ret < 0)
return (ret == -ENOENT) ? -EBADF : ret;
}
*/
if (!error && file_lock->fl_type != F_UNLCK &&
!(file_lock->fl_flags & FL_OFDLCK)) {
+ struct files_struct *files = current->files;
/*
* We need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in
* close(). rcu_read_lock() wouldn't do.
*/
- spin_lock(¤t->files->file_lock);
- f = fcheck(fd);
- spin_unlock(¤t->files->file_lock);
+ spin_lock(&files->file_lock);
+ f = files_lookup_fd_locked(files, fd);
+ spin_unlock(&files->file_lock);
if (f != filp) {
file_lock->fl_type = F_UNLCK;
error = do_lock_file_wait(filp, cmd, file_lock);
*/
if (!error && file_lock->fl_type != F_UNLCK &&
!(file_lock->fl_flags & FL_OFDLCK)) {
+ struct files_struct *files = current->files;
/*
* We need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in
* close(). rcu_read_lock() wouldn't do.
*/
- spin_lock(¤t->files->file_lock);
- f = fcheck(fd);
- spin_unlock(¤t->files->file_lock);
+ spin_lock(&files->file_lock);
+ f = files_lookup_fd_locked(files, fd);
+ spin_unlock(&files->file_lock);
if (f != filp) {
file_lock->fl_type = F_UNLCK;
error = do_lock_file_wait(filp, cmd, file_lock);
}
rcu_read_lock();
- f = fcheck(fd);
+ f = lookup_fd_rcu(fd);
rcu_read_unlock();
/* if (f != filp) means that we lost a race and another task/thread
*/
SYSCALL_DEFINE1(close, unsigned int, fd)
{
- int retval = __close_fd(current->files, fd);
+ int retval = close_fd(fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
if (!task)
return -ENOENT;
- files = get_files_struct(task);
- put_task_struct(task);
-
+ task_lock(task);
+ files = task->files;
if (files) {
unsigned int fd = proc_fd(m->private);
spin_lock(&files->file_lock);
- file = fcheck_files(files, fd);
+ file = files_lookup_fd_locked(files, fd);
if (file) {
struct fdtable *fdt = files_fdtable(files);
ret = 0;
}
spin_unlock(&files->file_lock);
- put_files_struct(files);
}
+ task_unlock(task);
+ put_task_struct(task);
if (ret)
return ret;
(long long)file->f_pos, f_flags,
real_mount(file->f_path.mnt)->mnt_id);
+ /* show_fd_locks() never deferences files so a stale value is safe */
show_fd_locks(m, file, files);
if (seq_has_overflowed(m))
goto out;
static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
{
- struct files_struct *files = get_files_struct(task);
struct file *file;
- if (!files)
- return false;
-
rcu_read_lock();
- file = fcheck_files(files, fd);
+ file = task_lookup_fd_rcu(task, fd);
if (file)
*mode = file->f_mode;
rcu_read_unlock();
- put_files_struct(files);
return !!file;
}
static int proc_fd_link(struct dentry *dentry, struct path *path)
{
- struct files_struct *files = NULL;
struct task_struct *task;
int ret = -ENOENT;
task = get_proc_task(d_inode(dentry));
if (task) {
- files = get_files_struct(task);
- put_task_struct(task);
- }
-
- if (files) {
unsigned int fd = proc_fd(d_inode(dentry));
struct file *fd_file;
- spin_lock(&files->file_lock);
- fd_file = fcheck_files(files, fd);
+ fd_file = fget_task(task, fd);
if (fd_file) {
*path = fd_file->f_path;
path_get(&fd_file->f_path);
ret = 0;
+ fput(fd_file);
}
- spin_unlock(&files->file_lock);
- put_files_struct(files);
+ put_task_struct(task);
}
return ret;
instantiate_t instantiate)
{
struct task_struct *p = get_proc_task(file_inode(file));
- struct files_struct *files;
unsigned int fd;
if (!p)
if (!dir_emit_dots(file, ctx))
goto out;
- files = get_files_struct(p);
- if (!files)
- goto out;
rcu_read_lock();
- for (fd = ctx->pos - 2;
- fd < files_fdtable(files)->max_fds;
- fd++, ctx->pos++) {
+ for (fd = ctx->pos - 2;; fd++) {
struct file *f;
struct fd_data data;
char name[10 + 1];
unsigned int len;
- f = fcheck_files(files, fd);
+ f = task_lookup_next_fd_rcu(p, &fd);
+ ctx->pos = fd + 2LL;
if (!f)
- continue;
+ break;
data.mode = f->f_mode;
rcu_read_unlock();
data.fd = fd;
if (!proc_fill_cache(file, ctx,
name, len, instantiate, p,
&data))
- goto out_fd_loop;
+ goto out;
cond_resched();
rcu_read_lock();
}
rcu_read_unlock();
-out_fd_loop:
- put_files_struct(files);
out:
put_task_struct(p);
return 0;
/*
* The caller must ensure that fd table isn't shared or hold rcu or file lock
*/
-static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd)
+static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
return NULL;
}
-static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd)
+static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd)
{
- RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
- !lockdep_is_held(&files->file_lock),
+ RCU_LOCKDEP_WARN(!lockdep_is_held(&files->file_lock),
"suspicious rcu_dereference_check() usage");
- return __fcheck_files(files, fd);
+ return files_lookup_fd_raw(files, fd);
}
-/*
- * Check whether the specified fd has an open file.
- */
-#define fcheck(fd) fcheck_files(current->files, fd)
+static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsigned int fd)
+{
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
+ "suspicious rcu_dereference_check() usage");
+ return files_lookup_fd_raw(files, fd);
+}
+
+static inline struct file *lookup_fd_rcu(unsigned int fd)
+{
+ return files_lookup_fd_rcu(current->files, fd);
+}
+
+struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd);
+struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd);
struct task_struct;
-struct files_struct *get_files_struct(struct task_struct *);
void put_files_struct(struct files_struct *fs);
-void reset_files_struct(struct files_struct *);
-int unshare_files(struct files_struct **);
+int unshare_files(void);
struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
void do_close_on_exec(struct files_struct *);
int iterate_fd(struct files_struct *, unsigned,
int (*)(const void *, struct file *, unsigned),
const void *);
-extern int __alloc_fd(struct files_struct *files,
- unsigned start, unsigned end, unsigned flags);
-extern void __fd_install(struct files_struct *files,
- unsigned int fd, struct file *file);
-extern int __close_fd(struct files_struct *files,
- unsigned int fd);
+extern int close_fd(unsigned int fd);
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
-extern int __close_fd_get_file(unsigned int fd, struct file **res);
+extern int close_fd_get_file(unsigned int fd, struct file **res);
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
struct files_struct **new_fdp);
return do_sys_ftruncate(fd, length, 1);
}
-extern int __close_fd(struct files_struct *files, unsigned int fd);
-
-/*
- * In contrast to sys_close(), this stub does not check whether the syscall
- * should or should not be restarted, but returns the raw error codes from
- * __close_fd().
- */
-static inline int ksys_close(unsigned int fd)
-{
- return __close_fd(current->files, fd);
-}
-
extern long do_sys_truncate(const char __user *pathname, loff_t length);
static inline long ksys_truncate(const char __user *pathname, loff_t length)
pid_t pid = attr->task_fd_query.pid;
u32 fd = attr->task_fd_query.fd;
const struct perf_event *event;
- struct files_struct *files;
struct task_struct *task;
struct file *file;
int err;
if (!task)
return -ENOENT;
- files = get_files_struct(task);
- put_task_struct(task);
- if (!files)
- return -ENOENT;
-
err = 0;
- spin_lock(&files->file_lock);
- file = fcheck_files(files, fd);
+ file = fget_task(task, fd);
+ put_task_struct(task);
if (!file)
- err = -EBADF;
- else
- get_file(file);
- spin_unlock(&files->file_lock);
- put_files_struct(files);
-
- if (err)
- goto out;
+ return -EBADF;
if (file->f_op == &bpf_link_fops) {
struct bpf_link *link = file->private_data;
err = -ENOTSUPP;
put_file:
fput(file);
-out:
return err;
}
*/
struct bpf_iter_seq_task_common common;
struct task_struct *task;
- struct files_struct *files;
u32 tid;
u32 fd;
};
static struct file *
task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
- struct task_struct **task, struct files_struct **fstruct)
+ struct task_struct **task)
{
struct pid_namespace *ns = info->common.ns;
- u32 curr_tid = info->tid, max_fds;
- struct files_struct *curr_files;
+ u32 curr_tid = info->tid;
struct task_struct *curr_task;
- int curr_fd = info->fd;
+ unsigned int curr_fd = info->fd;
/* If this function returns a non-NULL file object,
- * it held a reference to the task/files_struct/file.
+ * it held a reference to the task/file.
* Otherwise, it does not hold any reference.
*/
again:
if (*task) {
curr_task = *task;
- curr_files = *fstruct;
curr_fd = info->fd;
} else {
curr_task = task_seq_get_next(ns, &curr_tid, true);
if (!curr_task)
return NULL;
- curr_files = get_files_struct(curr_task);
- if (!curr_files) {
- put_task_struct(curr_task);
- curr_tid = ++(info->tid);
- info->fd = 0;
- goto again;
- }
-
- /* set *fstruct, *task and info->tid */
- *fstruct = curr_files;
+ /* set *task and info->tid */
*task = curr_task;
if (curr_tid == info->tid) {
curr_fd = info->fd;
}
rcu_read_lock();
- max_fds = files_fdtable(curr_files)->max_fds;
- for (; curr_fd < max_fds; curr_fd++) {
+ for (;; curr_fd++) {
struct file *f;
-
- f = fcheck_files(curr_files, curr_fd);
+ f = task_lookup_next_fd_rcu(curr_task, &curr_fd);
if (!f)
- continue;
+ break;
if (!get_file_rcu(f))
continue;
/* the current task is done, go to the next task */
rcu_read_unlock();
- put_files_struct(curr_files);
put_task_struct(curr_task);
*task = NULL;
- *fstruct = NULL;
info->fd = 0;
curr_tid = ++(info->tid);
goto again;
static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
{
struct bpf_iter_seq_task_file_info *info = seq->private;
- struct files_struct *files = NULL;
struct task_struct *task = NULL;
struct file *file;
- file = task_file_seq_get_next(info, &task, &files);
+ file = task_file_seq_get_next(info, &task);
if (!file) {
- info->files = NULL;
info->task = NULL;
return NULL;
}
if (*pos == 0)
++*pos;
info->task = task;
- info->files = files;
return file;
}
static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct bpf_iter_seq_task_file_info *info = seq->private;
- struct files_struct *files = info->files;
struct task_struct *task = info->task;
struct file *file;
++*pos;
++info->fd;
fput((struct file *)v);
- file = task_file_seq_get_next(info, &task, &files);
+ file = task_file_seq_get_next(info, &task);
if (!file) {
- info->files = NULL;
info->task = NULL;
return NULL;
}
info->task = task;
- info->files = files;
return file;
}
(void)__task_file_seq_show(seq, v, true);
} else {
fput((struct file *)v);
- put_files_struct(info->files);
put_task_struct(info->task);
- info->files = NULL;
info->task = NULL;
}
}
* the exec layer of the kernel.
*/
-int unshare_files(struct files_struct **displaced)
+int unshare_files(void)
{
struct task_struct *task = current;
- struct files_struct *copy = NULL;
+ struct files_struct *old, *copy = NULL;
int error;
error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, ©);
- if (error || !copy) {
- *displaced = NULL;
+ if (error || !copy)
return error;
- }
- *displaced = task->files;
+
+ old = task->files;
task_lock(task);
task->files = copy;
task_unlock(task);
+ put_files_struct(old);
return 0;
}
static struct file *
get_file_raw_ptr(struct task_struct *task, unsigned int idx)
{
- struct file *file = NULL;
+ struct file *file;
- task_lock(task);
rcu_read_lock();
-
- if (task->files)
- file = fcheck_files(task->files, idx);
-
+ file = task_lookup_fd_rcu(task, idx);
rcu_read_unlock();
- task_unlock(task);
return file;
}
{
struct file *filp, *filp_epoll, *filp_tgt;
struct kcmp_epoll_slot slot;
- struct files_struct *files;
if (copy_from_user(&slot, uslot, sizeof(slot)))
return -EFAULT;
if (!filp)
return -EBADF;
- files = get_files_struct(task2);
- if (!files)
+ filp_epoll = fget_task(task2, slot.efd);
+ if (!filp_epoll)
return -EBADF;
- spin_lock(&files->file_lock);
- filp_epoll = fcheck_files(files, slot.efd);
- if (filp_epoll)
- get_file(filp_epoll);
- else
- filp_tgt = ERR_PTR(-EBADF);
- spin_unlock(&files->file_lock);
- put_files_struct(files);
-
- if (filp_epoll) {
- filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
- fput(filp_epoll);
- }
+ filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
+ fput(filp_epoll);
if (IS_ERR(filp_tgt))
return PTR_ERR(filp_tgt);