blk-throttle: Fix that bps of child could exceed bps limited in parent

[linux-2.6-microblaze.git] / mm / hugetlb.c
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 9b26055..582ec75 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1014,15 +1014,23 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
         VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
         /*
          * Clear vm_private_data
+        * - For shared mappings this is a per-vma semaphore that may be
+        *   allocated in a subsequent call to hugetlb_vm_op_open.
+        *   Before clearing, make sure pointer is not associated with vma
+        *   as this will leak the structure.  This is the case when called
+        *   via clear_vma_resv_huge_pages() and hugetlb_vm_op_open has already
+        *   been called to allocate a new structure.
          * - For MAP_PRIVATE mappings, this is the reserve map which does
          *   not apply to children.  Faults generated by the children are
          *   not guaranteed to succeed, even if read-only.
-        * - For shared mappings this is a per-vma semaphore that may be
-        *   allocated in a subsequent call to hugetlb_vm_op_open.
          */
-       vma->vm_private_data = (void *)0;
-       if (!(vma->vm_flags & VM_MAYSHARE))
-               return;
+       if (vma->vm_flags & VM_MAYSHARE) {
+               struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+               if (vma_lock && vma_lock->vma != vma)
+                       vma->vm_private_data = NULL;
+       } else
+               vma->vm_private_data = NULL;
  }
  
  /*
@@ -2924,11 +2932,11 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                 page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
                 if (!page)
                         goto out_uncharge_cgroup;
+               spin_lock_irq(&hugetlb_lock);
                 if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
                         SetHPageRestoreReserve(page);
                         h->resv_huge_pages--;
                 }
-               spin_lock_irq(&hugetlb_lock);
                 list_add(&page->lru, &h->hugepage_activelist);
                 set_page_refcounted(page);
                 /* Fall through */
@@ -4601,6 +4609,7 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
         struct resv_map *resv = vma_resv_map(vma);
  
         /*
+        * HPAGE_RESV_OWNER indicates a private mapping.
          * This new VMA should share its siblings reservation map if present.
          * The VMA will only ever have a valid reservation map pointer where
          * it is being copied for another still existing VMA.  As that VMA
@@ -4615,11 +4624,21 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
  
         /*
          * vma_lock structure for sharable mappings is vma specific.
-        * Clear old pointer (if copied via vm_area_dup) and create new.
+        * Clear old pointer (if copied via vm_area_dup) and allocate
+        * new structure.  Before clearing, make sure vma_lock is not
+        * for this vma.
          */
         if (vma->vm_flags & VM_MAYSHARE) {
-               vma->vm_private_data = NULL;
-               hugetlb_vma_lock_alloc(vma);
+               struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+               if (vma_lock) {
+                       if (vma_lock->vma != vma) {
+                               vma->vm_private_data = NULL;
+                               hugetlb_vma_lock_alloc(vma);
+                       } else
+                               pr_warn("HugeTLB: vma_lock already exists in %s.\n", __func__);
+               } else
+                       hugetlb_vma_lock_alloc(vma);
         }
  }
  
@@ -6342,8 +6361,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                          * tables. If the huge page is present, then the tail
                          * pages must also be present. The ptl prevents the
                          * head page and tail pages from being rearranged in
-                        * any way. So this page must be available at this
-                        * point, unless the page refcount overflowed:
+                        * any way. As this is hugetlb, the pages will never
+                        * be p2pdma or not longterm pinable. So this page
+                        * must be available at this point, unless the page
+                        * refcount overflowed:
                          */
                         if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs,
                                                          flags))) {
@@ -7198,12 +7219,13 @@ follow_huge_pd(struct vm_area_struct *vma,
  }
  
  struct page * __weak
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-               pmd_t *pmd, int flags)
+follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags)
  {
+       struct hstate *h = hstate_vma(vma);
+       struct mm_struct *mm = vma->vm_mm;
         struct page *page = NULL;
         spinlock_t *ptl;
-       pte_t pte;
+       pte_t *ptep, pte;
  
         /*
          * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
@@ -7213,33 +7235,32 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
                 return NULL;
  
  retry:
-       ptl = pmd_lockptr(mm, pmd);
-       spin_lock(ptl);
-       /*
-        * make sure that the address range covered by this pmd is not
-        * unmapped from other threads.
-        */
-       if (!pmd_huge(*pmd))
-               goto out;
-       pte = huge_ptep_get((pte_t *)pmd);
+       ptep = huge_pte_offset(mm, address, huge_page_size(h));
+       if (!ptep)
+               return NULL;
+
+       ptl = huge_pte_lock(h, mm, ptep);
+       pte = huge_ptep_get(ptep);
         if (pte_present(pte)) {
-               page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
+               page = pte_page(pte) +
+                       ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
                 /*
-                * try_grab_page() should always succeed here, because: a) we
-                * hold the pmd (ptl) lock, and b) we've just checked that the
-                * huge pmd (head) page is present in the page tables. The ptl
-                * prevents the head page and tail pages from being rearranged
-                * in any way. So this page must be available at this point,
-                * unless the page refcount overflowed:
+                * try_grab_page() should always be able to get the page here,
+                * because: a) we hold the pmd (ptl) lock, and b) we've just
+                * checked that the huge pmd (head) page is present in the
+                * page tables. The ptl prevents the head page and tail pages
+                * from being rearranged in any way. So this page must be
+                * available at this point, unless the page refcount
+                * overflowed:
                  */
-               if (WARN_ON_ONCE(!try_grab_page(page, flags))) {
+               if (try_grab_page(page, flags)) {
                         page = NULL;
                         goto out;
                 }
         } else {
                 if (is_hugetlb_entry_migration(pte)) {
                         spin_unlock(ptl);
-                       __migration_entry_wait_huge((pte_t *)pmd, ptl);
+                       __migration_entry_wait_huge(ptep, ptl);
                         goto retry;
                 }
                 /*
@@ -7270,7 +7291,7 @@ retry:
         pte = huge_ptep_get((pte_t *)pud);
         if (pte_present(pte)) {
                 page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
-               if (WARN_ON_ONCE(!try_grab_page(page, flags))) {
+               if (try_grab_page(page, flags)) {
                         page = NULL;
                         goto out;
                 }