Merge branch 'ptp-virtual-clocks-and-timestamping'
[linux-2.6-microblaze.git] / mm / gup.c
index 3ded6a5..8651309 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -44,6 +44,23 @@ static void hpage_pincount_sub(struct page *page, int refs)
        atomic_sub(refs, compound_pincount_ptr(page));
 }
 
+/* Equivalent to calling put_page() @refs times. */
+static void put_page_refs(struct page *page, int refs)
+{
+#ifdef CONFIG_DEBUG_VM
+       if (VM_WARN_ON_ONCE_PAGE(page_ref_count(page) < refs, page))
+               return;
+#endif
+
+       /*
+        * Calling put_page() for each ref is unnecessarily slow. Only the last
+        * ref needs a put_page().
+        */
+       if (refs > 1)
+               page_ref_sub(page, refs - 1);
+       put_page(page);
+}
+
 /*
  * Return the compound head page with ref appropriately incremented,
  * or NULL if that failed.
@@ -56,6 +73,21 @@ static inline struct page *try_get_compound_head(struct page *page, int refs)
                return NULL;
        if (unlikely(!page_cache_add_speculative(head, refs)))
                return NULL;
+
+       /*
+        * At this point we have a stable reference to the head page; but it
+        * could be that between the compound_head() lookup and the refcount
+        * increment, the compound page was split, in which case we'd end up
+        * holding a reference on a page that has nothing to do with the page
+        * we were given anymore.
+        * So now that the head page is stable, recheck that the pages still
+        * belong together.
+        */
+       if (unlikely(compound_head(page) != head)) {
+               put_page_refs(head, refs);
+               return NULL;
+       }
+
        return head;
 }
 
@@ -95,6 +127,14 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page,
                             !is_pinnable_page(page)))
                        return NULL;
 
+               /*
+                * CAUTION: Don't use compound_head() on the page before this
+                * point, the result won't be stable.
+                */
+               page = try_get_compound_head(page, refs);
+               if (!page)
+                       return NULL;
+
                /*
                 * When pinning a compound page of order > 1 (which is what
                 * hpage_pincount_available() checks for), use an exact count to
@@ -103,15 +143,10 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page,
                 * However, be sure to *also* increment the normal page refcount
                 * field at least once, so that the page really is pinned.
                 */
-               if (!hpage_pincount_available(page))
-                       refs *= GUP_PIN_COUNTING_BIAS;
-
-               page = try_get_compound_head(page, refs);
-               if (!page)
-                       return NULL;
-
                if (hpage_pincount_available(page))
                        hpage_pincount_add(page, refs);
+               else
+                       page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1));
 
                mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED,
                                    orig_refs);
@@ -135,14 +170,7 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags)
                        refs *= GUP_PIN_COUNTING_BIAS;
        }
 
-       VM_BUG_ON_PAGE(page_ref_count(page) < refs, page);
-       /*
-        * Calling put_page() for each ref is unnecessarily slow. Only the last
-        * ref needs a put_page().
-        */
-       if (refs > 1)
-               page_ref_sub(page, refs - 1);
-       put_page(page);
+       put_page_refs(page, refs);
 }
 
 /**
@@ -392,6 +420,17 @@ void unpin_user_pages(struct page **pages, unsigned long npages)
 }
 EXPORT_SYMBOL(unpin_user_pages);
 
+/*
+ * Set the MMF_HAS_PINNED if not set yet; after set it'll be there for the mm's
+ * lifecycle.  Avoid setting the bit unless necessary, or it might cause write
+ * cache bouncing on large SMP machines for concurrent pinned gups.
+ */
+static inline void mm_set_has_pinned_flag(unsigned long *mm_flags)
+{
+       if (!test_bit(MMF_HAS_PINNED, mm_flags))
+               set_bit(MMF_HAS_PINNED, mm_flags);
+}
+
 #ifdef CONFIG_MMU
 static struct page *no_page_table(struct vm_area_struct *vma,
                unsigned int flags)
@@ -1293,7 +1332,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
        }
 
        if (flags & FOLL_PIN)
-               atomic_set(&mm->has_pinned, 1);
+               mm_set_has_pinned_flag(&mm->flags);
 
        /*
         * FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior
@@ -2614,7 +2653,7 @@ static int internal_get_user_pages_fast(unsigned long start,
                return -EINVAL;
 
        if (gup_flags & FOLL_PIN)
-               atomic_set(&current->mm->has_pinned, 1);
+               mm_set_has_pinned_flag(&current->mm->flags);
 
        if (!(gup_flags & FOLL_FAST_ONLY))
                might_lock_read(&current->mm->mmap_lock);