Merge tag 'nfs-for-5.13-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

[linux-2.6-microblaze.git] / mm / hugetlb.c
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index b5977d9..3db405d 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -39,7 +39,6 @@
  #include <linux/hugetlb.h>
  #include <linux/hugetlb_cgroup.h>
  #include <linux/node.h>
-#include <linux/userfaultfd_k.h>
  #include <linux/page_owner.h>
  #include "internal.h"
  
@@ -467,7 +466,7 @@ static int allocate_file_region_entries(struct resv_map *resv,
                               resv->region_cache_count;
  
                 /* At this point, we should have enough entries in the cache
-                * for all the existings adds_in_progress. We should only be
+                * for all the existing adds_in_progress. We should only be
                  * needing to allocate for regions_needed.
                  */
                 VM_BUG_ON(resv->region_cache_count < resv->adds_in_progress);
@@ -1080,11 +1079,11 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
  static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
  {
         struct page *page;
-       bool nocma = !!(current->flags & PF_MEMALLOC_NOCMA);
+       bool pin = !!(current->flags & PF_MEMALLOC_PIN);
  
         lockdep_assert_held(&hugetlb_lock);
         list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
-               if (nocma && is_migrate_cma_page(page))
+               if (pin && !is_pinnable_page(page))
                         continue;
  
                 if (PageHWPoison(page))
@@ -4469,6 +4468,44 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
         return 0;
  }
  
+static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
+                                                 struct address_space *mapping,
+                                                 pgoff_t idx,
+                                                 unsigned int flags,
+                                                 unsigned long haddr,
+                                                 unsigned long reason)
+{
+       vm_fault_t ret;
+       u32 hash;
+       struct vm_fault vmf = {
+               .vma = vma,
+               .address = haddr,
+               .flags = flags,
+
+               /*
+                * Hard to debug if it ends up being
+                * used by a callee that assumes
+                * something about the other
+                * uninitialized fields... same as in
+                * memory.c
+                */
+       };
+
+       /*
+        * hugetlb_fault_mutex and i_mmap_rwsem must be
+        * dropped before handling userfault.  Reacquire
+        * after handling fault to make calling code simpler.
+        */
+       hash = hugetlb_fault_mutex_hash(mapping, idx);
+       mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+       i_mmap_unlock_read(mapping);
+       ret = handle_userfault(&vmf, reason);
+       i_mmap_lock_read(mapping);
+       mutex_lock(&hugetlb_fault_mutex_table[hash]);
+
+       return ret;
+}
+
  static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
                         struct vm_area_struct *vma,
                         struct address_space *mapping, pgoff_t idx,
@@ -4507,35 +4544,11 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
  retry:
         page = find_lock_page(mapping, idx);
         if (!page) {
-               /*
-                * Check for page in userfault range
-                */
+               /* Check for page in userfault range */
                 if (userfaultfd_missing(vma)) {
-                       u32 hash;
-                       struct vm_fault vmf = {
-                               .vma = vma,
-                               .address = haddr,
-                               .flags = flags,
-                               /*
-                                * Hard to debug if it ends up being
-                                * used by a callee that assumes
-                                * something about the other
-                                * uninitialized fields... same as in
-                                * memory.c
-                                */
-                       };
-
-                       /*
-                        * hugetlb_fault_mutex and i_mmap_rwsem must be
-                        * dropped before handling userfault.  Reacquire
-                        * after handling fault to make calling code simpler.
-                        */
-                       hash = hugetlb_fault_mutex_hash(mapping, idx);
-                       mutex_unlock(&hugetlb_fault_mutex_table[hash]);
-                       i_mmap_unlock_read(mapping);
-                       ret = handle_userfault(&vmf, VM_UFFD_MISSING);
-                       i_mmap_lock_read(mapping);
-                       mutex_lock(&hugetlb_fault_mutex_table[hash]);
+                       ret = hugetlb_handle_userfault(vma, mapping, idx,
+                                                      flags, haddr,
+                                                      VM_UFFD_MISSING);
                         goto out;
                 }
  
@@ -4591,6 +4604,16 @@ retry:
                                 VM_FAULT_SET_HINDEX(hstate_index(h));
                         goto backout_unlocked;
                 }
+
+               /* Check for page in userfault range. */
+               if (userfaultfd_minor(vma)) {
+                       unlock_page(page);
+                       put_page(page);
+                       ret = hugetlb_handle_userfault(vma, mapping, idx,
+                                                      flags, haddr,
+                                                      VM_UFFD_MINOR);
+                       goto out;
+               }
         }
  
         /*
@@ -4831,6 +4854,7 @@ out_mutex:
         return ret;
  }
  
+#ifdef CONFIG_USERFAULTFD
  /*
   * Used by userfaultfd UFFDIO_COPY.  Based on mcopy_atomic_pte with
   * modifications for huge pages.
@@ -4840,8 +4864,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
                             struct vm_area_struct *dst_vma,
                             unsigned long dst_addr,
                             unsigned long src_addr,
+                           enum mcopy_atomic_mode mode,
                             struct page **pagep)
  {
+       bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
         struct address_space *mapping;
         pgoff_t idx;
         unsigned long size;
@@ -4851,8 +4877,17 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
         spinlock_t *ptl;
         int ret;
         struct page *page;
+       int writable;
+
+       mapping = dst_vma->vm_file->f_mapping;
+       idx = vma_hugecache_offset(h, dst_vma, dst_addr);
  
-       if (!*pagep) {
+       if (is_continue) {
+               ret = -EFAULT;
+               page = find_lock_page(mapping, idx);
+               if (!page)
+                       goto out;
+       } else if (!*pagep) {
                 ret = -ENOMEM;
                 page = alloc_huge_page(dst_vma, dst_addr, 0);
                 if (IS_ERR(page))
@@ -4881,13 +4916,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
          */
         __SetPageUptodate(page);
  
-       mapping = dst_vma->vm_file->f_mapping;
-       idx = vma_hugecache_offset(h, dst_vma, dst_addr);
-
-       /*
-        * If shared, add to page cache
-        */
-       if (vm_shared) {
+       /* Add shared, newly allocated pages to the page cache. */
+       if (vm_shared && !is_continue) {
                 size = i_size_read(mapping->host) >> huge_page_shift(h);
                 ret = -EFAULT;
                 if (idx >= size)
@@ -4932,8 +4962,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
                 hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
         }
  
-       _dst_pte = make_huge_pte(dst_vma, page, dst_vma->vm_flags & VM_WRITE);
-       if (dst_vma->vm_flags & VM_WRITE)
+       /* For CONTINUE on a non-shared VMA, don't set VM_WRITE for CoW. */
+       if (is_continue && !vm_shared)
+               writable = 0;
+       else
+               writable = dst_vma->vm_flags & VM_WRITE;
+
+       _dst_pte = make_huge_pte(dst_vma, page, writable);
+       if (writable)
                 _dst_pte = huge_pte_mkdirty(_dst_pte);
         _dst_pte = pte_mkyoung(_dst_pte);
  
@@ -4947,20 +4983,22 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
         update_mmu_cache(dst_vma, dst_addr, dst_pte);
  
         spin_unlock(ptl);
-       SetHPageMigratable(page);
-       if (vm_shared)
+       if (!is_continue)
+               SetHPageMigratable(page);
+       if (vm_shared || is_continue)
                 unlock_page(page);
         ret = 0;
  out:
         return ret;
  out_release_unlock:
         spin_unlock(ptl);
-       if (vm_shared)
+       if (vm_shared || is_continue)
                 unlock_page(page);
  out_release_nounlock:
         put_page(page);
         goto out;
  }
+#endif /* CONFIG_USERFAULTFD */
  
  static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma,
                                  int refs, struct page **pages,
@@ -5498,8 +5536,8 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
                 v_end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
  
         /*
-        * vma need span at least one aligned PUD size and the start,end range
-        * must at least partialy within it.
+        * vma needs to span at least one aligned PUD size, and the range
+        * must be at least partially within in.
          */
         if (!(vma->vm_flags & VM_MAYSHARE) || !(v_end > v_start) ||
                 (*end <= v_start) || (*start >= v_end))