mm: introduce VM_POPULATE flag to better deal with racy userspace programs
authorMichel Lespinasse <walken@google.com>
Sat, 23 Feb 2013 00:32:46 +0000 (16:32 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 24 Feb 2013 01:50:11 +0000 (17:50 -0800)
The vm_populate() code populates user mappings without constantly
holding the mmap_sem.  This makes it susceptible to racy userspace
programs: the user mappings may change while vm_populate() is running,
and in this case vm_populate() may end up populating the new mapping
instead of the old one.

In order to reduce the possibility of userspace getting surprised by
this behavior, this change introduces the VM_POPULATE vma flag which
gets set on vmas we want vm_populate() to work on.  This way
vm_populate() may still end up populating the new mapping after such a
race, but only if the new mapping is also one that the user has
requested (using MAP_SHARED, MAP_LOCKED or mlock) to be populated.

Signed-off-by: Michel Lespinasse <walken@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Andy Lutomirski <luto@amacapital.net>
Cc: Greg Ungerer <gregungerer@westnet.com.au>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/mm.h
include/linux/mman.h
mm/fremap.c
mm/mlock.c
mm/mmap.c

index 0c34d34..9a5fcde 100644 (file)
@@ -87,6 +87,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_PFNMAP      0x00000400      /* Page-ranges managed without "struct page", just pure PFN */
 #define VM_DENYWRITE   0x00000800      /* ETXTBSY on write attempts.. */
 
+#define VM_POPULATE     0x00001000
 #define VM_LOCKED      0x00002000
 #define VM_IO           0x00004000     /* Memory mapped I/O or similar */
 
index 9aa863d..61c7a87 100644 (file)
@@ -79,6 +79,8 @@ calc_vm_flag_bits(unsigned long flags)
 {
        return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
               _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) |
-              _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    );
+              ((flags & MAP_LOCKED) ? (VM_LOCKED | VM_POPULATE) : 0) |
+              (((flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE) ?
+                                                       VM_POPULATE : 0);
 }
 #endif /* _LINUX_MMAN_H */
index 503a723..0cd4c11 100644 (file)
@@ -204,8 +204,10 @@ get_write_lock:
                        unsigned long addr;
                        struct file *file = get_file(vma->vm_file);
 
-                       addr = mmap_region(file, start, size,
-                                       vma->vm_flags, pgoff);
+                       vm_flags = vma->vm_flags;
+                       if (!(flags & MAP_NONBLOCK))
+                               vm_flags |= VM_POPULATE;
+                       addr = mmap_region(file, start, size, vm_flags, pgoff);
                        fput(file);
                        if (IS_ERR_VALUE(addr)) {
                                err = addr;
@@ -224,6 +226,12 @@ get_write_lock:
                mutex_unlock(&mapping->i_mmap_mutex);
        }
 
+       if (!(flags & MAP_NONBLOCK) && !(vma->vm_flags & VM_POPULATE)) {
+               if (!has_write_lock)
+                       goto get_write_lock;
+               vma->vm_flags |= VM_POPULATE;
+       }
+
        if (vma->vm_flags & VM_LOCKED) {
                /*
                 * drop PG_Mlocked flag for over-mapped range
index 569400a..d6378fe 100644 (file)
@@ -340,9 +340,9 @@ static int do_mlock(unsigned long start, size_t len, int on)
 
                /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
 
-               newflags = vma->vm_flags VM_LOCKED;
-               if (!on)
-                       newflags &= ~VM_LOCKED;
+               newflags = vma->vm_flags & ~VM_LOCKED;
+               if (on)
+                       newflags |= VM_LOCKED | VM_POPULATE;
 
                tmp = vma->vm_end;
                if (tmp > end)
@@ -402,7 +402,8 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
                 * range with the first VMA. Also, skip undesirable VMA types.
                 */
                nend = min(end, vma->vm_end);
-               if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+               if ((vma->vm_flags & (VM_IO | VM_PFNMAP | VM_POPULATE)) !=
+                   VM_POPULATE)
                        continue;
                if (nstart < vma->vm_start)
                        nstart = vma->vm_start;
@@ -475,18 +476,18 @@ static int do_mlockall(int flags)
        struct vm_area_struct * vma, * prev = NULL;
 
        if (flags & MCL_FUTURE)
-               current->mm->def_flags |= VM_LOCKED;
+               current->mm->def_flags |= VM_LOCKED | VM_POPULATE;
        else
-               current->mm->def_flags &= ~VM_LOCKED;
+               current->mm->def_flags &= ~(VM_LOCKED | VM_POPULATE);
        if (flags == MCL_FUTURE)
                goto out;
 
        for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
                vm_flags_t newflags;
 
-               newflags = vma->vm_flags VM_LOCKED;
-               if (!(flags & MCL_CURRENT))
-                       newflags &= ~VM_LOCKED;
+               newflags = vma->vm_flags & ~VM_LOCKED;
+               if (flags & MCL_CURRENT)
+                       newflags |= VM_LOCKED | VM_POPULATE;
 
                /* Ignore errors */
                mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
index 8826c77..39a3944 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1306,9 +1306,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
        }
 
        addr = mmap_region(file, addr, len, vm_flags, pgoff);
-       if (!IS_ERR_VALUE(addr) &&
-           ((vm_flags & VM_LOCKED) ||
-            (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
+       if (!IS_ERR_VALUE(addr) && (vm_flags & VM_POPULATE))
                *populate = true;
        return addr;
 }