Merge tag 'devprop-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[linux-2.6-microblaze.git] / mm / memory.c
index 6666bc2..be44d0b 100644 (file)
@@ -3287,19 +3287,35 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
        if (PageAnon(vmf->page)) {
                struct page *page = vmf->page;
 
-               /* PageKsm() doesn't necessarily raise the page refcount */
-               if (PageKsm(page) || page_count(page) != 1)
+               /*
+                * We have to verify under page lock: these early checks are
+                * just an optimization to avoid locking the page and freeing
+                * the swapcache if there is little hope that we can reuse.
+                *
+                * PageKsm() doesn't necessarily raise the page refcount.
+                */
+               if (PageKsm(page) || page_count(page) > 3)
+                       goto copy;
+               if (!PageLRU(page))
+                       /*
+                        * Note: We cannot easily detect+handle references from
+                        * remote LRU pagevecs or references to PageLRU() pages.
+                        */
+                       lru_add_drain();
+               if (page_count(page) > 1 + PageSwapCache(page))
                        goto copy;
                if (!trylock_page(page))
                        goto copy;
-               if (PageKsm(page) || page_mapcount(page) != 1 || page_count(page) != 1) {
+               if (PageSwapCache(page))
+                       try_to_free_swap(page);
+               if (PageKsm(page) || page_count(page) != 1) {
                        unlock_page(page);
                        goto copy;
                }
                /*
-                * Ok, we've got the only map reference, and the only
-                * page count reference, and the page is locked,
-                * it's dark out, and we're wearing sunglasses. Hit it.
+                * Ok, we've got the only page reference from our mapping
+                * and the page is locked, it's dark out, and we're wearing
+                * sunglasses. Hit it.
                 */
                unlock_page(page);
                wp_page_reuse(vmf);
@@ -3372,11 +3388,11 @@ void unmap_mapping_folio(struct folio *folio)
        details.even_cows = false;
        details.single_folio = folio;
 
-       i_mmap_lock_write(mapping);
+       i_mmap_lock_read(mapping);
        if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
                unmap_mapping_range_tree(&mapping->i_mmap, first_index,
                                         last_index, &details);
-       i_mmap_unlock_write(mapping);
+       i_mmap_unlock_read(mapping);
 }
 
 /**
@@ -3402,11 +3418,11 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
        if (last_index < first_index)
                last_index = ULONG_MAX;
 
-       i_mmap_lock_write(mapping);
+       i_mmap_lock_read(mapping);
        if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
                unmap_mapping_range_tree(&mapping->i_mmap, first_index,
                                         last_index, &details);
-       i_mmap_unlock_write(mapping);
+       i_mmap_unlock_read(mapping);
 }
 EXPORT_SYMBOL_GPL(unmap_mapping_pages);
 
@@ -3473,6 +3489,25 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
        return 0;
 }
 
+static inline bool should_try_to_free_swap(struct page *page,
+                                          struct vm_area_struct *vma,
+                                          unsigned int fault_flags)
+{
+       if (!PageSwapCache(page))
+               return false;
+       if (mem_cgroup_swap_full(page) || (vma->vm_flags & VM_LOCKED) ||
+           PageMlocked(page))
+               return true;
+       /*
+        * If we want to map a page that's in the swapcache writable, we
+        * have to detect via the refcount if we're really the exclusive
+        * user. Try freeing the swapcache to get rid of the swapcache
+        * reference only in case it's likely that we'll be the exlusive user.
+        */
+       return (fault_flags & FAULT_FLAG_WRITE) && !PageKsm(page) &&
+               page_count(page) == 2;
+}
+
 /*
  * We enter with non-exclusive mmap_lock (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
@@ -3591,21 +3626,39 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                goto out_release;
        }
 
-       /*
-        * Make sure try_to_free_swap or reuse_swap_page or swapoff did not
-        * release the swapcache from under us.  The page pin, and pte_same
-        * test below, are not enough to exclude that.  Even if it is still
-        * swapcache, we need to check that the page's swap has not changed.
-        */
-       if (unlikely((!PageSwapCache(page) ||
-                       page_private(page) != entry.val)) && swapcache)
-               goto out_page;
-
-       page = ksm_might_need_to_copy(page, vma, vmf->address);
-       if (unlikely(!page)) {
-               ret = VM_FAULT_OOM;
-               page = swapcache;
-               goto out_page;
+       if (swapcache) {
+               /*
+                * Make sure try_to_free_swap or swapoff did not release the
+                * swapcache from under us.  The page pin, and pte_same test
+                * below, are not enough to exclude that.  Even if it is still
+                * swapcache, we need to check that the page's swap has not
+                * changed.
+                */
+               if (unlikely(!PageSwapCache(page) ||
+                            page_private(page) != entry.val))
+                       goto out_page;
+
+               /*
+                * KSM sometimes has to copy on read faults, for example, if
+                * page->index of !PageKSM() pages would be nonlinear inside the
+                * anon VMA -- PageKSM() is lost on actual swapout.
+                */
+               page = ksm_might_need_to_copy(page, vma, vmf->address);
+               if (unlikely(!page)) {
+                       ret = VM_FAULT_OOM;
+                       page = swapcache;
+                       goto out_page;
+               }
+
+               /*
+                * If we want to map a page that's in the swapcache writable, we
+                * have to detect via the refcount if we're really the exclusive
+                * owner. Try removing the extra reference from the local LRU
+                * pagevecs if required.
+                */
+               if ((vmf->flags & FAULT_FLAG_WRITE) && page == swapcache &&
+                   !PageKsm(page) && !PageLRU(page))
+                       lru_add_drain();
        }
 
        cgroup_throttle_swaprate(page, GFP_KERNEL);
@@ -3624,19 +3677,25 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
        }
 
        /*
-        * The page isn't present yet, go ahead with the fault.
-        *
-        * Be careful about the sequence of operations here.
-        * To get its accounting right, reuse_swap_page() must be called
-        * while the page is counted on swap but not yet in mapcount i.e.
-        * before page_add_anon_rmap() and swap_free(); try_to_free_swap()
-        * must be called after the swap_free(), or it will never succeed.
+        * Remove the swap entry and conditionally try to free up the swapcache.
+        * We're already holding a reference on the page but haven't mapped it
+        * yet.
         */
+       swap_free(entry);
+       if (should_try_to_free_swap(page, vma, vmf->flags))
+               try_to_free_swap(page);
 
        inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
        dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
        pte = mk_pte(page, vma->vm_page_prot);
-       if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
+
+       /*
+        * Same logic as in do_wp_page(); however, optimize for fresh pages
+        * that are certainly not shared because we just allocated them without
+        * exposing them to the swapcache.
+        */
+       if ((vmf->flags & FAULT_FLAG_WRITE) && !PageKsm(page) &&
+           (page != swapcache || page_count(page) == 1)) {
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                vmf->flags &= ~FAULT_FLAG_WRITE;
                ret |= VM_FAULT_WRITE;
@@ -3662,10 +3721,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
        set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
        arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
 
-       swap_free(entry);
-       if (mem_cgroup_swap_full(page) ||
-           (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
-               try_to_free_swap(page);
        unlock_page(page);
        if (page != swapcache && swapcache) {
                /*