kernel/fork: always deny write access to current MM exe_file
authorDavid Hildenbrand <david@redhat.com>
Fri, 23 Apr 2021 08:29:59 +0000 (10:29 +0200)
committerDavid Hildenbrand <david@redhat.com>
Fri, 3 Sep 2021 16:42:01 +0000 (18:42 +0200)
We want to remove VM_DENYWRITE only currently only used when mapping the
executable during exec. During exec, we already deny_write_access() the
executable, however, after exec completes the VMAs mapped
with VM_DENYWRITE effectively keeps write access denied via
deny_write_access().

Let's deny write access when setting or replacing the MM exe_file. With
this change, we can remove VM_DENYWRITE for mapping executables.

Make set_mm_exe_file() return an error in case deny_write_access()
fails; note that this should never happen, because exec code does a
deny_write_access() early and keeps write access denied when calling
set_mm_exe_file. However, it makes the code easier to read and makes
set_mm_exe_file() and replace_mm_exe_file() look more similar.

This represents a minor user space visible change:
sys_prctl(PR_SET_MM_MAP/EXE_FILE) can now fail if the file is already
opened writable. Also, after sys_prctl(PR_SET_MM_MAP/EXE_FILE) the file
cannot be opened writable. Note that we can already fail with -EACCES if
the file doesn't have execute permissions.

Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
fs/exec.c
include/linux/mm.h
kernel/fork.c

index 38f6345..9294049 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1270,7 +1270,9 @@ int begin_new_exec(struct linux_binprm * bprm)
         * not visibile until then. This also enables the update
         * to be lockless.
         */
-       set_mm_exe_file(bprm->mm, bprm->file);
+       retval = set_mm_exe_file(bprm->mm, bprm->file);
+       if (retval)
+               goto out;
 
        /* If the binary is not readable then enforce mm->dumpable=0 */
        would_dump(bprm, bprm->file);
index 48c6fa9..56b1cd4 100644 (file)
@@ -2580,7 +2580,7 @@ static inline int check_data_rlimit(unsigned long rlim,
 extern int mm_take_all_locks(struct mm_struct *mm);
 extern void mm_drop_all_locks(struct mm_struct *mm);
 
-extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
+extern int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
 extern int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
 extern struct file *get_mm_exe_file(struct mm_struct *mm);
 extern struct file *get_task_exe_file(struct task_struct *task);
index f4ac883..7677f89 100644 (file)
@@ -470,6 +470,20 @@ void free_task(struct task_struct *tsk)
 }
 EXPORT_SYMBOL(free_task);
 
+static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+       struct file *exe_file;
+
+       exe_file = get_mm_exe_file(oldmm);
+       RCU_INIT_POINTER(mm->exe_file, exe_file);
+       /*
+        * We depend on the oldmm having properly denied write access to the
+        * exe_file already.
+        */
+       if (exe_file && deny_write_access(exe_file))
+               pr_warn_once("deny_write_access() failed in %s\n", __func__);
+}
+
 #ifdef CONFIG_MMU
 static __latent_entropy int dup_mmap(struct mm_struct *mm,
                                        struct mm_struct *oldmm)
@@ -493,7 +507,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
        mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING);
 
        /* No ordering required: file already has been exposed. */
-       RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+       dup_mm_exe_file(mm, oldmm);
 
        mm->total_vm = oldmm->total_vm;
        mm->data_vm = oldmm->data_vm;
@@ -639,7 +653,7 @@ static inline void mm_free_pgd(struct mm_struct *mm)
 static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 {
        mmap_write_lock(oldmm);
-       RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+       dup_mm_exe_file(mm, oldmm);
        mmap_write_unlock(oldmm);
        return 0;
 }
@@ -1149,8 +1163,10 @@ void mmput_async(struct mm_struct *mm)
  * Main users are mmput() and sys_execve(). Callers prevent concurrent
  * invocations: in mmput() nobody alive left, in execve task is single
  * threaded.
+ *
+ * Can only fail if new_exe_file != NULL.
  */
-void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
+int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
 {
        struct file *old_exe_file;
 
@@ -1161,11 +1177,21 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
         */
        old_exe_file = rcu_dereference_raw(mm->exe_file);
 
-       if (new_exe_file)
+       if (new_exe_file) {
+               /*
+                * We expect the caller (i.e., sys_execve) to already denied
+                * write access, so this is unlikely to fail.
+                */
+               if (unlikely(deny_write_access(new_exe_file)))
+                       return -EACCES;
                get_file(new_exe_file);
+       }
        rcu_assign_pointer(mm->exe_file, new_exe_file);
-       if (old_exe_file)
+       if (old_exe_file) {
+               allow_write_access(old_exe_file);
                fput(old_exe_file);
+       }
+       return 0;
 }
 
 /**
@@ -1201,10 +1227,22 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
        }
 
        /* set the new file, lockless */
+       ret = deny_write_access(new_exe_file);
+       if (ret)
+               return -EACCES;
        get_file(new_exe_file);
+
        old_exe_file = xchg(&mm->exe_file, new_exe_file);
-       if (old_exe_file)
+       if (old_exe_file) {
+               /*
+                * Don't race with dup_mmap() getting the file and disallowing
+                * write access while someone might open the file writable.
+                */
+               mmap_read_lock(mm);
+               allow_write_access(old_exe_file);
                fput(old_exe_file);
+               mmap_read_unlock(mm);
+       }
        return 0;
 }