hugetlb_cgroup: support noreserve mappings
authorMina Almasry <almasrymina@google.com>
Thu, 2 Apr 2020 04:11:31 +0000 (21:11 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Apr 2020 16:35:32 +0000 (09:35 -0700)
Support MAP_NORESERVE accounting as part of the new counter.

For each hugepage allocation, at allocation time we check if there is a
reservation for this allocation or not.  If there is a reservation for
this allocation, then this allocation was charged at reservation time, and
we don't re-account it.  If there is no reserevation for this allocation,
we charge the appropriate hugetlb_cgroup.

The hugetlb_cgroup to uncharge for this allocation is stored in
page[3].private.  We use new APIs added in an earlier patch to set this
pointer.

Signed-off-by: Mina Almasry <almasrymina@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Sandipan Das <sandipan@linux.ibm.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Link: http://lkml.kernel.org/r/20200211213128.73302-6-almasrymina@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/hugetlb.c

index 0accbff..79f4c0f 100644 (file)
@@ -1345,6 +1345,8 @@ static void __free_huge_page(struct page *page)
        clear_page_huge_active(page);
        hugetlb_cgroup_uncharge_page(hstate_index(h),
                                     pages_per_huge_page(h), page);
+       hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
+                                         pages_per_huge_page(h), page);
        if (restore_reserve)
                h->resv_huge_pages++;
 
@@ -2281,6 +2283,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
        long gbl_chg;
        int ret, idx;
        struct hugetlb_cgroup *h_cg;
+       bool deferred_reserve;
 
        idx = hstate_index(h);
        /*
@@ -2318,9 +2321,19 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                        gbl_chg = 1;
        }
 
+       /* If this allocation is not consuming a reservation, charge it now.
+        */
+       deferred_reserve = map_chg || avoid_reserve || !vma_resv_map(vma);
+       if (deferred_reserve) {
+               ret = hugetlb_cgroup_charge_cgroup_rsvd(
+                       idx, pages_per_huge_page(h), &h_cg);
+               if (ret)
+                       goto out_subpool_put;
+       }
+
        ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
        if (ret)
-               goto out_subpool_put;
+               goto out_uncharge_cgroup_reservation;
 
        spin_lock(&hugetlb_lock);
        /*
@@ -2343,6 +2356,14 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                /* Fall through */
        }
        hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
+       /* If allocation is not consuming a reservation, also store the
+        * hugetlb_cgroup pointer on the page.
+        */
+       if (deferred_reserve) {
+               hugetlb_cgroup_commit_charge_rsvd(idx, pages_per_huge_page(h),
+                                                 h_cg, page);
+       }
+
        spin_unlock(&hugetlb_lock);
 
        set_page_private(page, (unsigned long)spool);
@@ -2367,6 +2388,10 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 
 out_uncharge_cgroup:
        hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg);
+out_uncharge_cgroup_reservation:
+       if (deferred_reserve)
+               hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
+                                                   h_cg);
 out_subpool_put:
        if (map_chg || avoid_reserve)
                hugepage_subpool_put_pages(spool, 1);