mm/hugetlb: fix deadlock in hugetlb_cow error path
[linux-2.6-microblaze.git] / mm / hugetlb.c
index d029d93..a260296 100644 (file)
@@ -1944,13 +1944,14 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
  * Increase the hugetlb pool such that it can accommodate a reservation
  * of size 'delta'.
  */
-static int gather_surplus_pages(struct hstate *h, int delta)
+static int gather_surplus_pages(struct hstate *h, long delta)
        __must_hold(&hugetlb_lock)
 {
        struct list_head surplus_list;
        struct page *page, *tmp;
-       int ret, i;
-       int needed, allocated;
+       int ret;
+       long i;
+       long needed, allocated;
        bool alloc_ok = true;
 
        needed = (h->resv_huge_pages + delta) - h->free_huge_pages;
@@ -2014,8 +2015,7 @@ retry:
                 * This page is now managed by the hugetlb allocator and has
                 * no users -- drop the buddy allocator's reference.
                 */
-               put_page_testzero(page);
-               VM_BUG_ON_PAGE(page_count(page), page);
+               VM_BUG_ON_PAGE(!put_page_testzero(page), page);
                enqueue_huge_page(h, page);
        }
 free:
@@ -2760,7 +2760,7 @@ static ssize_t nr_hugepages_show_common(struct kobject *kobj,
        else
                nr_huge_pages = h->nr_huge_pages_node[nid];
 
-       return sprintf(buf, "%lu\n", nr_huge_pages);
+       return sysfs_emit(buf, "%lu\n", nr_huge_pages);
 }
 
 static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
@@ -2833,7 +2833,8 @@ HSTATE_ATTR(nr_hugepages);
  * huge page alloc/free.
  */
 static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
-                                      struct kobj_attribute *attr, char *buf)
+                                          struct kobj_attribute *attr,
+                                          char *buf)
 {
        return nr_hugepages_show_common(kobj, attr, buf);
 }
@@ -2851,7 +2852,7 @@ static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
                                        struct kobj_attribute *attr, char *buf)
 {
        struct hstate *h = kobj_to_hstate(kobj, NULL);
-       return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
+       return sysfs_emit(buf, "%lu\n", h->nr_overcommit_huge_pages);
 }
 
 static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
@@ -2889,7 +2890,7 @@ static ssize_t free_hugepages_show(struct kobject *kobj,
        else
                free_huge_pages = h->free_huge_pages_node[nid];
 
-       return sprintf(buf, "%lu\n", free_huge_pages);
+       return sysfs_emit(buf, "%lu\n", free_huge_pages);
 }
 HSTATE_ATTR_RO(free_hugepages);
 
@@ -2897,7 +2898,7 @@ static ssize_t resv_hugepages_show(struct kobject *kobj,
                                        struct kobj_attribute *attr, char *buf)
 {
        struct hstate *h = kobj_to_hstate(kobj, NULL);
-       return sprintf(buf, "%lu\n", h->resv_huge_pages);
+       return sysfs_emit(buf, "%lu\n", h->resv_huge_pages);
 }
 HSTATE_ATTR_RO(resv_hugepages);
 
@@ -2914,7 +2915,7 @@ static ssize_t surplus_hugepages_show(struct kobject *kobj,
        else
                surplus_huge_pages = h->surplus_huge_pages_node[nid];
 
-       return sprintf(buf, "%lu\n", surplus_huge_pages);
+       return sysfs_emit(buf, "%lu\n", surplus_huge_pages);
 }
 HSTATE_ATTR_RO(surplus_hugepages);
 
@@ -3198,8 +3199,6 @@ void __init hugetlb_add_hstate(unsigned int order)
        h = &hstates[hugetlb_max_hstate++];
        h->order = order;
        h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
-       h->nr_huge_pages = 0;
-       h->free_huge_pages = 0;
        for (i = 0; i < MAX_NUMNODES; ++i)
                INIT_LIST_HEAD(&h->hugepage_freelists[i]);
        INIT_LIST_HEAD(&h->hugepage_activelist);
@@ -3673,7 +3672,7 @@ const struct vm_operations_struct hugetlb_vm_ops = {
        .fault = hugetlb_vm_op_fault,
        .open = hugetlb_vm_op_open,
        .close = hugetlb_vm_op_close,
-       .split = hugetlb_vm_op_split,
+       .may_split = hugetlb_vm_op_split,
        .pagesize = hugetlb_vm_op_pagesize,
 };
 
@@ -4106,10 +4105,30 @@ retry_avoidcopy:
                 * may get SIGKILLed if it later faults.
                 */
                if (outside_reserve) {
+                       struct address_space *mapping = vma->vm_file->f_mapping;
+                       pgoff_t idx;
+                       u32 hash;
+
                        put_page(old_page);
                        BUG_ON(huge_pte_none(pte));
+                       /*
+                        * Drop hugetlb_fault_mutex and i_mmap_rwsem before
+                        * unmapping.  unmapping needs to hold i_mmap_rwsem
+                        * in write mode.  Dropping i_mmap_rwsem in read mode
+                        * here is OK as COW mappings do not interact with
+                        * PMD sharing.
+                        *
+                        * Reacquire both after unmap operation.
+                        */
+                       idx = vma_hugecache_offset(h, vma, haddr);
+                       hash = hugetlb_fault_mutex_hash(mapping, idx);
+                       mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+                       i_mmap_unlock_read(mapping);
+
                        unmap_ref_private(mm, vma, old_page, haddr);
-                       BUG_ON(huge_pte_none(pte));
+
+                       i_mmap_lock_read(mapping);
+                       mutex_lock(&hugetlb_fault_mutex_table[hash]);
                        spin_lock(ptl);
                        ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
                        if (likely(ptep &&
@@ -5115,6 +5134,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 
                if (unlikely(add < 0)) {
                        hugetlb_acct_memory(h, -gbl_reserve);
+                       ret = add;
                        goto out_put_pages;
                } else if (unlikely(chg > add)) {
                        /*