mm,thp,rmap: handle the normal !PageCompound case first
[linux-2.6-microblaze.git] / mm / rmap.c
index 2ec925e..4833d28 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -315,8 +315,8 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 
  enomem_failure:
        /*
-        * dst->anon_vma is dropped here otherwise its degree can be incorrectly
-        * decremented in unlink_anon_vmas().
+        * dst->anon_vma is dropped here otherwise its num_active_vmas can
+        * be incorrectly decremented in unlink_anon_vmas().
         * We can safely do this because callers of anon_vma_clone() don't care
         * about dst->anon_vma if anon_vma_clone() failed.
         */
@@ -1085,6 +1085,113 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
        return page_vma_mkclean_one(&pvmw);
 }
 
+struct compound_mapcounts {
+       unsigned int compound_mapcount;
+       unsigned int subpages_mapcount;
+};
+
+/*
+ * lock_compound_mapcounts() first locks, then copies subpages_mapcount and
+ * compound_mapcount from head[1].compound_mapcount and subpages_mapcount,
+ * converting from struct page's internal representation to logical count
+ * (that is, adding 1 to compound_mapcount to hide its offset by -1).
+ */
+static void lock_compound_mapcounts(struct page *head,
+               struct compound_mapcounts *local)
+{
+       bit_spin_lock(PG_locked, &head[1].flags);
+       local->compound_mapcount = atomic_read(compound_mapcount_ptr(head)) + 1;
+       local->subpages_mapcount = atomic_read(subpages_mapcount_ptr(head));
+}
+
+/*
+ * After caller has updated subpage._mapcount, local subpages_mapcount and
+ * local compound_mapcount, as necessary, unlock_compound_mapcounts() converts
+ * and copies them back to the compound head[1] fields, and then unlocks.
+ */
+static void unlock_compound_mapcounts(struct page *head,
+               struct compound_mapcounts *local)
+{
+       atomic_set(compound_mapcount_ptr(head), local->compound_mapcount - 1);
+       atomic_set(subpages_mapcount_ptr(head), local->subpages_mapcount);
+       bit_spin_unlock(PG_locked, &head[1].flags);
+}
+
+/*
+ * When acting on a compound page under lock_compound_mapcounts(), avoid the
+ * unnecessary overhead of an actual atomic operation on its subpage mapcount.
+ * Return true if this is the first increment or the last decrement
+ * (remembering that page->_mapcount -1 represents logical mapcount 0).
+ */
+static bool subpage_mapcount_inc(struct page *page)
+{
+       int orig_mapcount = atomic_read(&page->_mapcount);
+
+       atomic_set(&page->_mapcount, orig_mapcount + 1);
+       return orig_mapcount < 0;
+}
+
+static bool subpage_mapcount_dec(struct page *page)
+{
+       int orig_mapcount = atomic_read(&page->_mapcount);
+
+       atomic_set(&page->_mapcount, orig_mapcount - 1);
+       return orig_mapcount == 0;
+}
+
+/*
+ * When mapping a THP's first pmd, or unmapping its last pmd, if that THP
+ * also has pte mappings, then those must be discounted: in order to maintain
+ * NR_ANON_MAPPED and NR_FILE_MAPPED statistics exactly, without any drift,
+ * and to decide when an anon THP should be put on the deferred split queue.
+ * This function must be called between lock_ and unlock_compound_mapcounts().
+ */
+static int nr_subpages_unmapped(struct page *head, int nr_subpages)
+{
+       int nr = nr_subpages;
+       int i;
+
+       /* Discount those subpages mapped by pte */
+       for (i = 0; i < nr_subpages; i++)
+               if (atomic_read(&head[i]._mapcount) >= 0)
+                       nr--;
+       return nr;
+}
+
+/*
+ * page_dup_compound_rmap(), used when copying mm, or when splitting pmd,
+ * provides a simple example of using lock_ and unlock_compound_mapcounts().
+ */
+void page_dup_compound_rmap(struct page *page, bool compound)
+{
+       struct compound_mapcounts mapcounts;
+       struct page *head;
+
+       /*
+        * Hugetlb pages could use lock_compound_mapcounts(), like THPs do;
+        * but at present they are still being managed by atomic operations:
+        * which are likely to be somewhat faster, so don't rush to convert
+        * them over without evaluating the effect.
+        *
+        * Note that hugetlb does not call page_add_file_rmap():
+        * here is where hugetlb shared page mapcount is raised.
+        */
+       if (PageHuge(page)) {
+               atomic_inc(compound_mapcount_ptr(page));
+               return;
+       }
+
+       head = compound_head(page);
+       lock_compound_mapcounts(head, &mapcounts);
+       if (compound) {
+               mapcounts.compound_mapcount++;
+       } else {
+               mapcounts.subpages_mapcount++;
+               subpage_mapcount_inc(page);
+       }
+       unlock_compound_mapcounts(head, &mapcounts);
+}
+
 /**
  * page_move_anon_rmap - move a page to our anon_vma
  * @page:      the page to move to our anon_vma
@@ -1194,6 +1301,8 @@ static void __page_check_anon_rmap(struct page *page,
 void page_add_anon_rmap(struct page *page,
        struct vm_area_struct *vma, unsigned long address, rmap_t flags)
 {
+       struct compound_mapcounts mapcounts;
+       int nr = 0, nr_pmdmapped = 0;
        bool compound = flags & RMAP_COMPOUND;
        bool first;
 
@@ -1202,30 +1311,37 @@ void page_add_anon_rmap(struct page *page,
        else
                VM_BUG_ON_PAGE(!PageLocked(page), page);
 
-       if (compound) {
-               atomic_t *mapcount;
-               VM_BUG_ON_PAGE(!PageLocked(page), page);
-               VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-               mapcount = compound_mapcount_ptr(page);
-               first = atomic_inc_and_test(mapcount);
-       } else {
+       if (likely(!PageCompound(page))) {
                first = atomic_inc_and_test(&page->_mapcount);
+               nr = first;
+
+       } else if (compound && PageTransHuge(page)) {
+               lock_compound_mapcounts(page, &mapcounts);
+               first = !mapcounts.compound_mapcount;
+               mapcounts.compound_mapcount++;
+               if (first) {
+                       nr = nr_pmdmapped = thp_nr_pages(page);
+                       if (mapcounts.subpages_mapcount)
+                               nr = nr_subpages_unmapped(page, nr_pmdmapped);
+               }
+               unlock_compound_mapcounts(page, &mapcounts);
+       } else {
+               struct page *head = compound_head(page);
+
+               lock_compound_mapcounts(head, &mapcounts);
+               mapcounts.subpages_mapcount++;
+               first = subpage_mapcount_inc(page);
+               nr = first && !mapcounts.compound_mapcount;
+               unlock_compound_mapcounts(head, &mapcounts);
        }
+
        VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page);
        VM_BUG_ON_PAGE(!first && PageAnonExclusive(page), page);
 
-       if (first) {
-               int nr = compound ? thp_nr_pages(page) : 1;
-               /*
-                * We use the irq-unsafe __{inc|mod}_zone_page_stat because
-                * these counters are not modified in interrupt context, and
-                * pte lock(a spinlock) is held, which implies preemption
-                * disabled.
-                */
-               if (compound)
-                       __mod_lruvec_page_state(page, NR_ANON_THPS, nr);
+       if (nr_pmdmapped)
+               __mod_lruvec_page_state(page, NR_ANON_THPS, nr_pmdmapped);
+       if (nr)
                __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
-       }
 
        if (unlikely(PageKsm(page)))
                unlock_page_memcg(page);
@@ -1256,22 +1372,23 @@ void page_add_anon_rmap(struct page *page,
 void page_add_new_anon_rmap(struct page *page,
        struct vm_area_struct *vma, unsigned long address)
 {
-       const bool compound = PageCompound(page);
-       int nr = compound ? thp_nr_pages(page) : 1;
+       int nr;
 
        VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
        __SetPageSwapBacked(page);
-       if (compound) {
+
+       if (likely(!PageCompound(page))) {
+               /* increment count (starts at -1) */
+               atomic_set(&page->_mapcount, 0);
+               nr = 1;
+       } else {
                VM_BUG_ON_PAGE(!PageTransHuge(page), page);
                /* increment count (starts at -1) */
                atomic_set(compound_mapcount_ptr(page), 0);
-               atomic_set(compound_pincount_ptr(page), 0);
-
+               nr = thp_nr_pages(page);
                __mod_lruvec_page_state(page, NR_ANON_THPS, nr);
-       } else {
-               /* increment count (starts at -1) */
-               atomic_set(&page->_mapcount, 0);
        }
+
        __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
        __page_set_anon_rmap(page, vma, address, 1);
 }
@@ -1287,45 +1404,40 @@ void page_add_new_anon_rmap(struct page *page,
 void page_add_file_rmap(struct page *page,
        struct vm_area_struct *vma, bool compound)
 {
-       int i, nr = 0;
+       struct compound_mapcounts mapcounts;
+       int nr = 0, nr_pmdmapped = 0;
+       bool first;
 
        VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
        lock_page_memcg(page);
-       if (compound && PageTransHuge(page)) {
-               int nr_pages = thp_nr_pages(page);
 
-               for (i = 0; i < nr_pages; i++) {
-                       if (atomic_inc_and_test(&page[i]._mapcount))
-                               nr++;
+       if (likely(!PageCompound(page))) {
+               first = atomic_inc_and_test(&page->_mapcount);
+               nr = first;
+
+       } else if (compound && PageTransHuge(page)) {
+               lock_compound_mapcounts(page, &mapcounts);
+               first = !mapcounts.compound_mapcount;
+               mapcounts.compound_mapcount++;
+               if (first) {
+                       nr = nr_pmdmapped = thp_nr_pages(page);
+                       if (mapcounts.subpages_mapcount)
+                               nr = nr_subpages_unmapped(page, nr_pmdmapped);
                }
-               if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
-                       goto out;
-
-               /*
-                * It is racy to ClearPageDoubleMap in page_remove_file_rmap();
-                * but page lock is held by all page_add_file_rmap() compound
-                * callers, and SetPageDoubleMap below warns if !PageLocked:
-                * so here is a place that DoubleMap can be safely cleared.
-                */
-               VM_WARN_ON_ONCE(!PageLocked(page));
-               if (nr == nr_pages && PageDoubleMap(page))
-                       ClearPageDoubleMap(page);
-
-               if (PageSwapBacked(page))
-                       __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
-                                               nr_pages);
-               else
-                       __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
-                                               nr_pages);
+               unlock_compound_mapcounts(page, &mapcounts);
        } else {
-               if (PageTransCompound(page) && page_mapping(page)) {
-                       VM_WARN_ON_ONCE(!PageLocked(page));
-                       SetPageDoubleMap(compound_head(page));
-               }
-               if (atomic_inc_and_test(&page->_mapcount))
-                       nr++;
+               struct page *head = compound_head(page);
+
+               lock_compound_mapcounts(head, &mapcounts);
+               mapcounts.subpages_mapcount++;
+               first = subpage_mapcount_inc(page);
+               nr = first && !mapcounts.compound_mapcount;
+               unlock_compound_mapcounts(head, &mapcounts);
        }
-out:
+
+       if (nr_pmdmapped)
+               __mod_lruvec_page_state(page, PageSwapBacked(page) ?
+                       NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
        if (nr)
                __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
        unlock_page_memcg(page);
@@ -1333,132 +1445,83 @@ out:
        mlock_vma_page(page, vma, compound);
 }
 
-static void page_remove_file_rmap(struct page *page, bool compound)
+/**
+ * page_remove_rmap - take down pte mapping from a page
+ * @page:      page to remove mapping from
+ * @vma:       the vm area from which the mapping is removed
+ * @compound:  uncharge the page as compound or small page
+ *
+ * The caller needs to hold the pte lock.
+ */
+void page_remove_rmap(struct page *page,
+       struct vm_area_struct *vma, bool compound)
 {
-       int i, nr = 0;
+       struct compound_mapcounts mapcounts;
+       int nr = 0, nr_pmdmapped = 0;
+       bool last;
 
        VM_BUG_ON_PAGE(compound && !PageHead(page), page);
 
-       /* Hugepages are not counted in NR_FILE_MAPPED for now. */
+       /* Hugetlb pages are not counted in NR_*MAPPED */
        if (unlikely(PageHuge(page))) {
                /* hugetlb pages are always mapped with pmds */
                atomic_dec(compound_mapcount_ptr(page));
                return;
        }
 
-       /* page still mapped by someone else? */
-       if (compound && PageTransHuge(page)) {
-               int nr_pages = thp_nr_pages(page);
+       lock_page_memcg(page);
 
-               for (i = 0; i < nr_pages; i++) {
-                       if (atomic_add_negative(-1, &page[i]._mapcount))
-                               nr++;
+       /* page still mapped by someone else? */
+       if (likely(!PageCompound(page))) {
+               last = atomic_add_negative(-1, &page->_mapcount);
+               nr = last;
+
+       } else if (compound && PageTransHuge(page)) {
+               lock_compound_mapcounts(page, &mapcounts);
+               mapcounts.compound_mapcount--;
+               last = !mapcounts.compound_mapcount;
+               if (last) {
+                       nr = nr_pmdmapped = thp_nr_pages(page);
+                       if (mapcounts.subpages_mapcount)
+                               nr = nr_subpages_unmapped(page, nr_pmdmapped);
                }
-               if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
-                       goto out;
-               if (PageSwapBacked(page))
-                       __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
-                                               -nr_pages);
-               else
-                       __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
-                                               -nr_pages);
+               unlock_compound_mapcounts(page, &mapcounts);
        } else {
-               if (atomic_add_negative(-1, &page->_mapcount))
-                       nr++;
-       }
-out:
-       if (nr)
-               __mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
-}
-
-static void page_remove_anon_compound_rmap(struct page *page)
-{
-       int i, nr;
-
-       if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
-               return;
-
-       /* Hugepages are not counted in NR_ANON_PAGES for now. */
-       if (unlikely(PageHuge(page)))
-               return;
-
-       if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
-               return;
+               struct page *head = compound_head(page);
 
-       __mod_lruvec_page_state(page, NR_ANON_THPS, -thp_nr_pages(page));
-
-       if (TestClearPageDoubleMap(page)) {
-               /*
-                * Subpages can be mapped with PTEs too. Check how many of
-                * them are still mapped.
-                */
-               for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
-                       if (atomic_add_negative(-1, &page[i]._mapcount))
-                               nr++;
-               }
+               lock_compound_mapcounts(head, &mapcounts);
+               mapcounts.subpages_mapcount--;
+               last = subpage_mapcount_dec(page);
+               nr = last && !mapcounts.compound_mapcount;
+               unlock_compound_mapcounts(head, &mapcounts);
+       }
 
+       if (nr_pmdmapped) {
+               __mod_lruvec_page_state(page, PageAnon(page) ? NR_ANON_THPS :
+                               (PageSwapBacked(page) ? NR_SHMEM_PMDMAPPED :
+                               NR_FILE_PMDMAPPED), -nr_pmdmapped);
+       }
+       if (nr) {
+               __mod_lruvec_page_state(page, PageAnon(page) ? NR_ANON_MAPPED :
+                               NR_FILE_MAPPED, -nr);
                /*
-                * Queue the page for deferred split if at least one small
+                * Queue anon THP for deferred split if at least one small
                 * page of the compound page is unmapped, but at least one
                 * small page is still mapped.
                 */
-               if (nr && nr < thp_nr_pages(page))
-                       deferred_split_huge_page(page);
-       } else {
-               nr = thp_nr_pages(page);
+               if (PageTransCompound(page) && PageAnon(page))
+                       if (!compound || nr < nr_pmdmapped)
+                               deferred_split_huge_page(compound_head(page));
        }
 
-       if (nr)
-               __mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
-}
-
-/**
- * page_remove_rmap - take down pte mapping from a page
- * @page:      page to remove mapping from
- * @vma:       the vm area from which the mapping is removed
- * @compound:  uncharge the page as compound or small page
- *
- * The caller needs to hold the pte lock.
- */
-void page_remove_rmap(struct page *page,
-       struct vm_area_struct *vma, bool compound)
-{
-       lock_page_memcg(page);
-
-       if (!PageAnon(page)) {
-               page_remove_file_rmap(page, compound);
-               goto out;
-       }
-
-       if (compound) {
-               page_remove_anon_compound_rmap(page);
-               goto out;
-       }
-
-       /* page still mapped by someone else? */
-       if (!atomic_add_negative(-1, &page->_mapcount))
-               goto out;
-
-       /*
-        * We use the irq-unsafe __{inc|mod}_zone_page_stat because
-        * these counters are not modified in interrupt context, and
-        * pte lock(a spinlock) is held, which implies preemption disabled.
-        */
-       __dec_lruvec_page_state(page, NR_ANON_MAPPED);
-
-       if (PageTransCompound(page))
-               deferred_split_huge_page(compound_head(page));
-
        /*
-        * It would be tidy to reset the PageAnon mapping here,
+        * It would be tidy to reset PageAnon mapping when fully unmapped,
         * but that might overwrite a racing page_add_anon_rmap
         * which increments mapcount after us but sets mapping
-        * before us: so leave the reset to free_unref_page,
+        * before us: so leave the reset to free_pages_prepare,
         * and remember that it's only reliable while mapped.
-        * Leaving it set also helps swapoff to reinstate ptes
-        * faster for those pages still in swapcache.
         */
-out:
+
        unlock_page_memcg(page);
 
        munlock_vma_page(page, vma, compound);
@@ -1801,7 +1864,7 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
        return vma_is_temporary_stack(vma);
 }
 
-static int page_not_mapped(struct folio *folio)
+static int folio_not_mapped(struct folio *folio)
 {
        return !folio_mapped(folio);
 }
@@ -1822,7 +1885,7 @@ void try_to_unmap(struct folio *folio, enum ttu_flags flags)
        struct rmap_walk_control rwc = {
                .rmap_one = try_to_unmap_one,
                .arg = (void *)flags,
-               .done = page_not_mapped,
+               .done = folio_not_mapped,
                .anon_lock = folio_lock_anon_vma_read,
        };
 
@@ -2150,7 +2213,7 @@ void try_to_migrate(struct folio *folio, enum ttu_flags flags)
        struct rmap_walk_control rwc = {
                .rmap_one = try_to_migrate_one,
                .arg = (void *)flags,
-               .done = page_not_mapped,
+               .done = folio_not_mapped,
                .anon_lock = folio_lock_anon_vma_read,
        };
 
@@ -2297,7 +2360,7 @@ static bool folio_make_device_exclusive(struct folio *folio,
        };
        struct rmap_walk_control rwc = {
                .rmap_one = page_make_device_exclusive_one,
-               .done = page_not_mapped,
+               .done = folio_not_mapped,
                .anon_lock = folio_lock_anon_vma_read,
                .arg = &args,
        };
@@ -2569,9 +2632,9 @@ void hugepage_add_new_anon_rmap(struct page *page,
                        struct vm_area_struct *vma, unsigned long address)
 {
        BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+       /* increment count (starts at -1) */
        atomic_set(compound_mapcount_ptr(page), 0);
-       atomic_set(compound_pincount_ptr(page), 0);
-
+       ClearHPageRestoreReserve(page);
        __page_set_anon_rmap(page, vma, address, 1);
 }
 #endif /* CONFIG_HUGETLB_PAGE */