* formats.
*/
+#include <linux/kernel_read_file.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/oom.h>
#include <linux/compat.h>
#include <linux/vmalloc.h>
+#include <linux/io_uring.h>
+#include <linux/syscall_user_dispatch.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
#ifdef CONFIG_STACK_GROWSUP
/* Limit stack size */
stack_base = bprm->rlim_stack.rlim_max;
- if (stack_base > STACK_SIZE_MAX)
- stack_base = STACK_SIZE_MAX;
+
+ stack_base = calc_max_stack_size(stack_base);
/* Add space for stack randomization. */
stack_base += (STACK_RND_MASK << PAGE_SHIFT);
}
EXPORT_SYMBOL(open_exec);
-int kernel_read_file(struct file *file, void **buf, loff_t *size,
- loff_t max_size, enum kernel_read_file_id id)
-{
- loff_t i_size, pos;
- ssize_t bytes = 0;
- int ret;
-
- if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0)
- return -EINVAL;
-
- ret = deny_write_access(file);
- if (ret)
- return ret;
-
- ret = security_kernel_read_file(file, id);
- if (ret)
- goto out;
-
- i_size = i_size_read(file_inode(file));
- if (i_size <= 0) {
- ret = -EINVAL;
- goto out;
- }
- if (i_size > SIZE_MAX || (max_size > 0 && i_size > max_size)) {
- ret = -EFBIG;
- goto out;
- }
-
- if (id != READING_FIRMWARE_PREALLOC_BUFFER)
- *buf = vmalloc(i_size);
- if (!*buf) {
- ret = -ENOMEM;
- goto out;
- }
-
- pos = 0;
- while (pos < i_size) {
- bytes = kernel_read(file, *buf + pos, i_size - pos, &pos);
- if (bytes < 0) {
- ret = bytes;
- goto out_free;
- }
-
- if (bytes == 0)
- break;
- }
-
- if (pos != i_size) {
- ret = -EIO;
- goto out_free;
- }
-
- ret = security_kernel_post_read_file(file, *buf, i_size, id);
- if (!ret)
- *size = pos;
-
-out_free:
- if (ret < 0) {
- if (id != READING_FIRMWARE_PREALLOC_BUFFER) {
- vfree(*buf);
- *buf = NULL;
- }
- }
-
-out:
- allow_write_access(file);
- return ret;
-}
-EXPORT_SYMBOL_GPL(kernel_read_file);
-
-int kernel_read_file_from_path(const char *path, void **buf, loff_t *size,
- loff_t max_size, enum kernel_read_file_id id)
-{
- struct file *file;
- int ret;
-
- if (!path || !*path)
- return -EINVAL;
-
- file = filp_open(path, O_RDONLY, 0);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- ret = kernel_read_file(file, buf, size, max_size, id);
- fput(file);
- return ret;
-}
-EXPORT_SYMBOL_GPL(kernel_read_file_from_path);
-
-int kernel_read_file_from_path_initns(const char *path, void **buf,
- loff_t *size, loff_t max_size,
- enum kernel_read_file_id id)
-{
- struct file *file;
- struct path root;
- int ret;
-
- if (!path || !*path)
- return -EINVAL;
-
- task_lock(&init_task);
- get_fs_root(init_task.fs, &root);
- task_unlock(&init_task);
-
- file = file_open_root(root.dentry, root.mnt, path, O_RDONLY, 0);
- path_put(&root);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- ret = kernel_read_file(file, buf, size, max_size, id);
- fput(file);
- return ret;
-}
-EXPORT_SYMBOL_GPL(kernel_read_file_from_path_initns);
-
-int kernel_read_file_from_fd(int fd, void **buf, loff_t *size, loff_t max_size,
- enum kernel_read_file_id id)
-{
- struct fd f = fdget(fd);
- int ret = -EBADF;
-
- if (!f.file)
- goto out;
-
- ret = kernel_read_file(f.file, buf, size, max_size, id);
-out:
- fdput(f);
- return ret;
-}
-EXPORT_SYMBOL_GPL(kernel_read_file_from_fd);
-
#if defined(CONFIG_HAVE_AOUT) || defined(CONFIG_BINFMT_FLAT) || \
defined(CONFIG_BINFMT_ELF_FDPIC)
ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
/*
* Maps the mm_struct mm into the current task struct.
- * On success, this function returns with the mutex
- * exec_update_mutex locked.
+ * On success, this function returns with exec_update_lock
+ * held for writing.
*/
static int exec_mmap(struct mm_struct *mm)
{
if (old_mm)
sync_mm_rss(old_mm);
- ret = mutex_lock_killable(&tsk->signal->exec_update_mutex);
+ ret = down_write_killable(&tsk->signal->exec_update_lock);
if (ret)
return ret;
mmap_read_lock(old_mm);
if (unlikely(old_mm->core_state)) {
mmap_read_unlock(old_mm);
- mutex_unlock(&tsk->signal->exec_update_mutex);
+ up_write(&tsk->signal->exec_update_lock);
return -EINTR;
}
}
task_lock(tsk);
- active_mm = tsk->active_mm;
membarrier_exec_mmap(mm);
- tsk->mm = mm;
+
+ local_irq_disable();
+ active_mm = tsk->active_mm;
tsk->active_mm = mm;
+ tsk->mm = mm;
+ /*
+ * This prevents preemption while active_mm is being loaded and
+ * it and mm are being updated, which could cause problems for
+ * lazy tlb mm refcounting when these are updated by context
+ * switches. Not all architectures can handle irqs off over
+ * activate_mm yet.
+ */
+ if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
activate_mm(active_mm, mm);
+ if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
tsk->mm->vmacache_seqnum = 0;
vmacache_flush(tsk);
task_unlock(tsk);
if (retval)
goto out;
+ /*
+ * Cancel any io_uring activity across execve
+ */
+ io_uring_task_cancel();
+
+ /* Ensure the files table is not shared. */
+ retval = unshare_files();
+ if (retval)
+ goto out;
+
/*
* Must be called _before_ exec_mmap() as bprm->mm is
* not visibile until then. This also enables the update
flush_thread();
me->personality &= ~bprm->per_clear;
+ clear_syscall_work_syscall_user_dispatch(me);
+
/*
* We have to apply CLOEXEC before we change whether the process is
* dumpable (in setup_new_exec) to avoid a race with a process in userspace
return 0;
out_unlock:
- mutex_unlock(&me->signal->exec_update_mutex);
+ up_write(&me->signal->exec_update_lock);
out:
return retval;
}
* some architectures like powerpc
*/
me->mm->task_size = TASK_SIZE;
- mutex_unlock(&me->signal->exec_update_mutex);
+ up_write(&me->signal->exec_update_lock);
mutex_unlock(&me->signal->cred_guard_mutex);
}
EXPORT_SYMBOL(setup_new_exec);
int fd, struct filename *filename, int flags)
{
struct file *file;
- struct files_struct *displaced;
int retval;
- retval = unshare_files(&displaced);
- if (retval)
- return retval;
-
retval = prepare_bprm_creds(bprm);
if (retval)
- goto out_files;
+ return retval;
check_unsafe_exec(bprm);
current->in_execve = 1;
bprm->file = file;
/*
* Record that a name derived from an O_CLOEXEC fd will be
- * inaccessible after exec. Relies on having exclusive access to
- * current->files (due to unshare_files above).
+ * inaccessible after exec. This allows the code in exec to
+ * choose to fail when the executable is not mmaped into the
+ * interpreter and an open file descriptor is not passed to
+ * the interpreter. This makes for a better user experience
+ * than having the interpreter start and then immediately fail
+ * when it finds the executable is inaccessible.
*/
- if (bprm->fdpath &&
- close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
+ if (bprm->fdpath && get_close_on_exec(fd))
bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
/* Set the unchanging part of bprm->cred */
rseq_execve(current);
acct_update_integrals(current);
task_numa_free(current, false);
- if (displaced)
- put_files_struct(displaced);
return retval;
out:
current->fs->in_exec = 0;
current->in_execve = 0;
-out_files:
- if (displaced)
- reset_files_struct(displaced);
-
return retval;
}