hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization
[linux-2.6-microblaze.git] / mm / rmap.c
index b3e3819..2df75a1 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
  *
  * inode->i_mutex      (while writing or truncating, not reading or faulting)
  *   mm->mmap_sem
- *     page->flags PG_locked (lock_page)
+ *     page->flags PG_locked (lock_page)   * (see huegtlbfs below)
  *       hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
  *         mapping->i_mmap_rwsem
+ *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
  *           anon_vma->rwsem
  *             mm->page_table_lock or pte_lock
  *               pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
  * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
  *   ->tasklist_lock
  *     pte map lock
+ *
+ * * hugetlbfs PageHuge() pages take locks in this order:
+ *         mapping->i_mmap_rwsem
+ *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
+ *             page->flags PG_locked (lock_page)
  */
 
 #include <linux/mm.h>
@@ -1178,6 +1184,9 @@ void page_add_new_anon_rmap(struct page *page,
                VM_BUG_ON_PAGE(!PageTransHuge(page), page);
                /* increment count (starts at -1) */
                atomic_set(compound_mapcount_ptr(page), 0);
+               if (hpage_pincount_available(page))
+                       atomic_set(compound_pincount_ptr(page), 0);
+
                __inc_node_page_state(page, NR_ANON_THPS);
        } else {
                /* Anon THP always mapped first with PMD */
@@ -1406,6 +1415,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                /*
                 * If sharing is possible, start and end will be adjusted
                 * accordingly.
+                *
+                * If called for a huge page, caller must hold i_mmap_rwsem
+                * in write mode as it is possible to call huge_pmd_unshare.
                 */
                adjust_range_if_pmd_sharing_possible(vma, &range.start,
                                                     &range.end);
@@ -1453,6 +1465,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                address = pvmw.address;
 
                if (PageHuge(page)) {
+                       /*
+                        * To call huge_pmd_unshare, i_mmap_rwsem must be
+                        * held in write mode.  Caller needs to explicitly
+                        * do this outside rmap routines.
+                        */
+                       VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
                        if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
                                /*
                                 * huge_pmd_unshare unmapped an entire PMD
@@ -1696,23 +1714,9 @@ discard:
        return ret;
 }
 
-bool is_vma_temporary_stack(struct vm_area_struct *vma)
-{
-       int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
-
-       if (!maybe_stack)
-               return false;
-
-       if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
-                                               VM_STACK_INCOMPLETE_SETUP)
-               return true;
-
-       return false;
-}
-
 static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
 {
-       return is_vma_temporary_stack(vma);
+       return vma_is_temporary_stack(vma);
 }
 
 static int page_mapcount_is_zero(struct page *page)
@@ -1974,6 +1978,9 @@ void hugepage_add_new_anon_rmap(struct page *page,
 {
        BUG_ON(address < vma->vm_start || address >= vma->vm_end);
        atomic_set(compound_mapcount_ptr(page), 0);
+       if (hpage_pincount_available(page))
+               atomic_set(compound_pincount_ptr(page), 0);
+
        __page_set_anon_rmap(page, vma, address, 1);
 }
 #endif /* CONFIG_HUGETLB_PAGE */