Merge branch 'akpm' (patches from Andrew)
[linux-2.6-microblaze.git] / kernel / fork.c
index df72964..ff5be23 100644 (file)
@@ -446,6 +446,7 @@ void put_task_stack(struct task_struct *tsk)
 
 void free_task(struct task_struct *tsk)
 {
+       release_user_cpus_ptr(tsk);
        scs_release(tsk);
 
 #ifndef CONFIG_THREAD_INFO_IN_TASK
@@ -470,6 +471,20 @@ void free_task(struct task_struct *tsk)
 }
 EXPORT_SYMBOL(free_task);
 
+static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+       struct file *exe_file;
+
+       exe_file = get_mm_exe_file(oldmm);
+       RCU_INIT_POINTER(mm->exe_file, exe_file);
+       /*
+        * We depend on the oldmm having properly denied write access to the
+        * exe_file already.
+        */
+       if (exe_file && deny_write_access(exe_file))
+               pr_warn_once("deny_write_access() failed in %s\n", __func__);
+}
+
 #ifdef CONFIG_MMU
 static __latent_entropy int dup_mmap(struct mm_struct *mm,
                                        struct mm_struct *oldmm)
@@ -493,7 +508,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
        mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING);
 
        /* No ordering required: file already has been exposed. */
-       RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+       dup_mm_exe_file(mm, oldmm);
 
        mm->total_vm = oldmm->total_vm;
        mm->data_vm = oldmm->data_vm;
@@ -556,12 +571,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
                file = tmp->vm_file;
                if (file) {
-                       struct inode *inode = file_inode(file);
                        struct address_space *mapping = file->f_mapping;
 
                        get_file(file);
-                       if (tmp->vm_flags & VM_DENYWRITE)
-                               put_write_access(inode);
                        i_mmap_lock_write(mapping);
                        if (tmp->vm_flags & VM_SHARED)
                                mapping_allow_writable(mapping);
@@ -639,7 +651,7 @@ static inline void mm_free_pgd(struct mm_struct *mm)
 static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 {
        mmap_write_lock(oldmm);
-       RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+       dup_mm_exe_file(mm, oldmm);
        mmap_write_unlock(oldmm);
        return 0;
 }
@@ -924,6 +936,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #endif
        if (orig->cpus_ptr == &orig->cpus_mask)
                tsk->cpus_ptr = &tsk->cpus_mask;
+       dup_user_cpus_ptr(tsk, orig, node);
 
        /*
         * One for the user space visible state that goes away when reaped.
@@ -1148,11 +1161,11 @@ void mmput_async(struct mm_struct *mm)
  *
  * Main users are mmput() and sys_execve(). Callers prevent concurrent
  * invocations: in mmput() nobody alive left, in execve task is single
- * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the
- * mm->exe_file, but does so without using set_mm_exe_file() in order
- * to avoid the need for any locks.
+ * threaded.
+ *
+ * Can only fail if new_exe_file != NULL.
  */
-void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
+int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
 {
        struct file *old_exe_file;
 
@@ -1163,11 +1176,73 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
         */
        old_exe_file = rcu_dereference_raw(mm->exe_file);
 
-       if (new_exe_file)
+       if (new_exe_file) {
+               /*
+                * We expect the caller (i.e., sys_execve) to already denied
+                * write access, so this is unlikely to fail.
+                */
+               if (unlikely(deny_write_access(new_exe_file)))
+                       return -EACCES;
                get_file(new_exe_file);
+       }
        rcu_assign_pointer(mm->exe_file, new_exe_file);
-       if (old_exe_file)
+       if (old_exe_file) {
+               allow_write_access(old_exe_file);
                fput(old_exe_file);
+       }
+       return 0;
+}
+
+/**
+ * replace_mm_exe_file - replace a reference to the mm's executable file
+ *
+ * This changes mm's executable file (shown as symlink /proc/[pid]/exe),
+ * dealing with concurrent invocation and without grabbing the mmap lock in
+ * write mode.
+ *
+ * Main user is sys_prctl(PR_SET_MM_MAP/EXE_FILE).
+ */
+int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
+{
+       struct vm_area_struct *vma;
+       struct file *old_exe_file;
+       int ret = 0;
+
+       /* Forbid mm->exe_file change if old file still mapped. */
+       old_exe_file = get_mm_exe_file(mm);
+       if (old_exe_file) {
+               mmap_read_lock(mm);
+               for (vma = mm->mmap; vma && !ret; vma = vma->vm_next) {
+                       if (!vma->vm_file)
+                               continue;
+                       if (path_equal(&vma->vm_file->f_path,
+                                      &old_exe_file->f_path))
+                               ret = -EBUSY;
+               }
+               mmap_read_unlock(mm);
+               fput(old_exe_file);
+               if (ret)
+                       return ret;
+       }
+
+       /* set the new file, lockless */
+       ret = deny_write_access(new_exe_file);
+       if (ret)
+               return -EACCES;
+       get_file(new_exe_file);
+
+       old_exe_file = xchg(&mm->exe_file, new_exe_file);
+       if (old_exe_file) {
+               /*
+                * Don't race with dup_mmap() getting the file and disallowing
+                * write access while someone might open the file writable.
+                */
+               mmap_read_lock(mm);
+               allow_write_access(old_exe_file);
+               fput(old_exe_file);
+               mmap_read_unlock(mm);
+       }
+       return 0;
 }
 
 /**
@@ -2081,6 +2156,7 @@ static __latent_entropy struct task_struct *copy_process(
 #endif
 #ifdef CONFIG_BPF_SYSCALL
        RCU_INIT_POINTER(p->bpf_storage, NULL);
+       p->bpf_ctx = NULL;
 #endif
 
        /* Perform scheduler related setup. Assign this task to a CPU. */