io_uring: reinstate the inflight tracking
[linux-2.6-microblaze.git] / mm / userfaultfd.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  mm/userfaultfd.c
4  *
5  *  Copyright (C) 2015  Red Hat, Inc.
6  */
7
8 #include <linux/mm.h>
9 #include <linux/sched/signal.h>
10 #include <linux/pagemap.h>
11 #include <linux/rmap.h>
12 #include <linux/swap.h>
13 #include <linux/swapops.h>
14 #include <linux/userfaultfd_k.h>
15 #include <linux/mmu_notifier.h>
16 #include <linux/hugetlb.h>
17 #include <linux/shmem_fs.h>
18 #include <asm/tlbflush.h>
19 #include "internal.h"
20
21 static __always_inline
22 struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
23                                     unsigned long dst_start,
24                                     unsigned long len)
25 {
26         /*
27          * Make sure that the dst range is both valid and fully within a
28          * single existing vma.
29          */
30         struct vm_area_struct *dst_vma;
31
32         dst_vma = find_vma(dst_mm, dst_start);
33         if (!dst_vma)
34                 return NULL;
35
36         if (dst_start < dst_vma->vm_start ||
37             dst_start + len > dst_vma->vm_end)
38                 return NULL;
39
40         /*
41          * Check the vma is registered in uffd, this is required to
42          * enforce the VM_MAYWRITE check done at uffd registration
43          * time.
44          */
45         if (!dst_vma->vm_userfaultfd_ctx.ctx)
46                 return NULL;
47
48         return dst_vma;
49 }
50
51 /*
52  * Install PTEs, to map dst_addr (within dst_vma) to page.
53  *
54  * This function handles both MCOPY_ATOMIC_NORMAL and _CONTINUE for both shmem
55  * and anon, and for both shared and private VMAs.
56  */
57 int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
58                              struct vm_area_struct *dst_vma,
59                              unsigned long dst_addr, struct page *page,
60                              bool newly_allocated, bool wp_copy)
61 {
62         int ret;
63         pte_t _dst_pte, *dst_pte;
64         bool writable = dst_vma->vm_flags & VM_WRITE;
65         bool vm_shared = dst_vma->vm_flags & VM_SHARED;
66         bool page_in_cache = page->mapping;
67         spinlock_t *ptl;
68         struct inode *inode;
69         pgoff_t offset, max_off;
70
71         _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
72         _dst_pte = pte_mkdirty(_dst_pte);
73         if (page_in_cache && !vm_shared)
74                 writable = false;
75
76         /*
77          * Always mark a PTE as write-protected when needed, regardless of
78          * VM_WRITE, which the user might change.
79          */
80         if (wp_copy)
81                 _dst_pte = pte_mkuffd_wp(_dst_pte);
82         else if (writable)
83                 _dst_pte = pte_mkwrite(_dst_pte);
84
85         dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
86
87         if (vma_is_shmem(dst_vma)) {
88                 /* serialize against truncate with the page table lock */
89                 inode = dst_vma->vm_file->f_inode;
90                 offset = linear_page_index(dst_vma, dst_addr);
91                 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
92                 ret = -EFAULT;
93                 if (unlikely(offset >= max_off))
94                         goto out_unlock;
95         }
96
97         ret = -EEXIST;
98         if (!pte_none(*dst_pte))
99                 goto out_unlock;
100
101         if (page_in_cache) {
102                 /* Usually, cache pages are already added to LRU */
103                 if (newly_allocated)
104                         lru_cache_add(page);
105                 page_add_file_rmap(page, dst_vma, false);
106         } else {
107                 page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
108                 lru_cache_add_inactive_or_unevictable(page, dst_vma);
109         }
110
111         /*
112          * Must happen after rmap, as mm_counter() checks mapping (via
113          * PageAnon()), which is set by __page_set_anon_rmap().
114          */
115         inc_mm_counter(dst_mm, mm_counter(page));
116
117         set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
118
119         /* No need to invalidate - it was non-present before */
120         update_mmu_cache(dst_vma, dst_addr, dst_pte);
121         ret = 0;
122 out_unlock:
123         pte_unmap_unlock(dst_pte, ptl);
124         return ret;
125 }
126
127 static int mcopy_atomic_pte(struct mm_struct *dst_mm,
128                             pmd_t *dst_pmd,
129                             struct vm_area_struct *dst_vma,
130                             unsigned long dst_addr,
131                             unsigned long src_addr,
132                             struct page **pagep,
133                             bool wp_copy)
134 {
135         void *page_kaddr;
136         int ret;
137         struct page *page;
138
139         if (!*pagep) {
140                 ret = -ENOMEM;
141                 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
142                 if (!page)
143                         goto out;
144
145                 page_kaddr = kmap_atomic(page);
146                 ret = copy_from_user(page_kaddr,
147                                      (const void __user *) src_addr,
148                                      PAGE_SIZE);
149                 kunmap_atomic(page_kaddr);
150
151                 /* fallback to copy_from_user outside mmap_lock */
152                 if (unlikely(ret)) {
153                         ret = -ENOENT;
154                         *pagep = page;
155                         /* don't free the page */
156                         goto out;
157                 }
158
159                 flush_dcache_page(page);
160         } else {
161                 page = *pagep;
162                 *pagep = NULL;
163         }
164
165         /*
166          * The memory barrier inside __SetPageUptodate makes sure that
167          * preceding stores to the page contents become visible before
168          * the set_pte_at() write.
169          */
170         __SetPageUptodate(page);
171
172         ret = -ENOMEM;
173         if (mem_cgroup_charge(page_folio(page), dst_mm, GFP_KERNEL))
174                 goto out_release;
175
176         ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
177                                        page, true, wp_copy);
178         if (ret)
179                 goto out_release;
180 out:
181         return ret;
182 out_release:
183         put_page(page);
184         goto out;
185 }
186
187 static int mfill_zeropage_pte(struct mm_struct *dst_mm,
188                               pmd_t *dst_pmd,
189                               struct vm_area_struct *dst_vma,
190                               unsigned long dst_addr)
191 {
192         pte_t _dst_pte, *dst_pte;
193         spinlock_t *ptl;
194         int ret;
195         pgoff_t offset, max_off;
196         struct inode *inode;
197
198         _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
199                                          dst_vma->vm_page_prot));
200         dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
201         if (dst_vma->vm_file) {
202                 /* the shmem MAP_PRIVATE case requires checking the i_size */
203                 inode = dst_vma->vm_file->f_inode;
204                 offset = linear_page_index(dst_vma, dst_addr);
205                 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
206                 ret = -EFAULT;
207                 if (unlikely(offset >= max_off))
208                         goto out_unlock;
209         }
210         ret = -EEXIST;
211         if (!pte_none(*dst_pte))
212                 goto out_unlock;
213         set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
214         /* No need to invalidate - it was non-present before */
215         update_mmu_cache(dst_vma, dst_addr, dst_pte);
216         ret = 0;
217 out_unlock:
218         pte_unmap_unlock(dst_pte, ptl);
219         return ret;
220 }
221
222 /* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
223 static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
224                                 pmd_t *dst_pmd,
225                                 struct vm_area_struct *dst_vma,
226                                 unsigned long dst_addr,
227                                 bool wp_copy)
228 {
229         struct inode *inode = file_inode(dst_vma->vm_file);
230         pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
231         struct page *page;
232         int ret;
233
234         ret = shmem_getpage(inode, pgoff, &page, SGP_READ);
235         if (ret)
236                 goto out;
237         if (!page) {
238                 ret = -EFAULT;
239                 goto out;
240         }
241
242         if (PageHWPoison(page)) {
243                 ret = -EIO;
244                 goto out_release;
245         }
246
247         ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
248                                        page, false, wp_copy);
249         if (ret)
250                 goto out_release;
251
252         unlock_page(page);
253         ret = 0;
254 out:
255         return ret;
256 out_release:
257         unlock_page(page);
258         put_page(page);
259         goto out;
260 }
261
262 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
263 {
264         pgd_t *pgd;
265         p4d_t *p4d;
266         pud_t *pud;
267
268         pgd = pgd_offset(mm, address);
269         p4d = p4d_alloc(mm, pgd, address);
270         if (!p4d)
271                 return NULL;
272         pud = pud_alloc(mm, p4d, address);
273         if (!pud)
274                 return NULL;
275         /*
276          * Note that we didn't run this because the pmd was
277          * missing, the *pmd may be already established and in
278          * turn it may also be a trans_huge_pmd.
279          */
280         return pmd_alloc(mm, pud, address);
281 }
282
283 #ifdef CONFIG_HUGETLB_PAGE
284 /*
285  * __mcopy_atomic processing for HUGETLB vmas.  Note that this routine is
286  * called with mmap_lock held, it will release mmap_lock before returning.
287  */
288 static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
289                                               struct vm_area_struct *dst_vma,
290                                               unsigned long dst_start,
291                                               unsigned long src_start,
292                                               unsigned long len,
293                                               enum mcopy_atomic_mode mode)
294 {
295         int vm_shared = dst_vma->vm_flags & VM_SHARED;
296         ssize_t err;
297         pte_t *dst_pte;
298         unsigned long src_addr, dst_addr;
299         long copied;
300         struct page *page;
301         unsigned long vma_hpagesize;
302         pgoff_t idx;
303         u32 hash;
304         struct address_space *mapping;
305
306         /*
307          * There is no default zero huge page for all huge page sizes as
308          * supported by hugetlb.  A PMD_SIZE huge pages may exist as used
309          * by THP.  Since we can not reliably insert a zero page, this
310          * feature is not supported.
311          */
312         if (mode == MCOPY_ATOMIC_ZEROPAGE) {
313                 mmap_read_unlock(dst_mm);
314                 return -EINVAL;
315         }
316
317         src_addr = src_start;
318         dst_addr = dst_start;
319         copied = 0;
320         page = NULL;
321         vma_hpagesize = vma_kernel_pagesize(dst_vma);
322
323         /*
324          * Validate alignment based on huge page size
325          */
326         err = -EINVAL;
327         if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
328                 goto out_unlock;
329
330 retry:
331         /*
332          * On routine entry dst_vma is set.  If we had to drop mmap_lock and
333          * retry, dst_vma will be set to NULL and we must lookup again.
334          */
335         if (!dst_vma) {
336                 err = -ENOENT;
337                 dst_vma = find_dst_vma(dst_mm, dst_start, len);
338                 if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
339                         goto out_unlock;
340
341                 err = -EINVAL;
342                 if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
343                         goto out_unlock;
344
345                 vm_shared = dst_vma->vm_flags & VM_SHARED;
346         }
347
348         /*
349          * If not shared, ensure the dst_vma has a anon_vma.
350          */
351         err = -ENOMEM;
352         if (!vm_shared) {
353                 if (unlikely(anon_vma_prepare(dst_vma)))
354                         goto out_unlock;
355         }
356
357         while (src_addr < src_start + len) {
358                 BUG_ON(dst_addr >= dst_start + len);
359
360                 /*
361                  * Serialize via i_mmap_rwsem and hugetlb_fault_mutex.
362                  * i_mmap_rwsem ensures the dst_pte remains valid even
363                  * in the case of shared pmds.  fault mutex prevents
364                  * races with other faulting threads.
365                  */
366                 mapping = dst_vma->vm_file->f_mapping;
367                 i_mmap_lock_read(mapping);
368                 idx = linear_page_index(dst_vma, dst_addr);
369                 hash = hugetlb_fault_mutex_hash(mapping, idx);
370                 mutex_lock(&hugetlb_fault_mutex_table[hash]);
371
372                 err = -ENOMEM;
373                 dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize);
374                 if (!dst_pte) {
375                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
376                         i_mmap_unlock_read(mapping);
377                         goto out_unlock;
378                 }
379
380                 if (mode != MCOPY_ATOMIC_CONTINUE &&
381                     !huge_pte_none(huge_ptep_get(dst_pte))) {
382                         err = -EEXIST;
383                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
384                         i_mmap_unlock_read(mapping);
385                         goto out_unlock;
386                 }
387
388                 err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
389                                                dst_addr, src_addr, mode, &page);
390
391                 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
392                 i_mmap_unlock_read(mapping);
393
394                 cond_resched();
395
396                 if (unlikely(err == -ENOENT)) {
397                         mmap_read_unlock(dst_mm);
398                         BUG_ON(!page);
399
400                         err = copy_huge_page_from_user(page,
401                                                 (const void __user *)src_addr,
402                                                 vma_hpagesize / PAGE_SIZE,
403                                                 true);
404                         if (unlikely(err)) {
405                                 err = -EFAULT;
406                                 goto out;
407                         }
408                         mmap_read_lock(dst_mm);
409
410                         dst_vma = NULL;
411                         goto retry;
412                 } else
413                         BUG_ON(page);
414
415                 if (!err) {
416                         dst_addr += vma_hpagesize;
417                         src_addr += vma_hpagesize;
418                         copied += vma_hpagesize;
419
420                         if (fatal_signal_pending(current))
421                                 err = -EINTR;
422                 }
423                 if (err)
424                         break;
425         }
426
427 out_unlock:
428         mmap_read_unlock(dst_mm);
429 out:
430         if (page)
431                 put_page(page);
432         BUG_ON(copied < 0);
433         BUG_ON(err > 0);
434         BUG_ON(!copied && !err);
435         return copied ? copied : err;
436 }
437 #else /* !CONFIG_HUGETLB_PAGE */
438 /* fail at build time if gcc attempts to use this */
439 extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
440                                       struct vm_area_struct *dst_vma,
441                                       unsigned long dst_start,
442                                       unsigned long src_start,
443                                       unsigned long len,
444                                       enum mcopy_atomic_mode mode);
445 #endif /* CONFIG_HUGETLB_PAGE */
446
447 static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
448                                                 pmd_t *dst_pmd,
449                                                 struct vm_area_struct *dst_vma,
450                                                 unsigned long dst_addr,
451                                                 unsigned long src_addr,
452                                                 struct page **page,
453                                                 enum mcopy_atomic_mode mode,
454                                                 bool wp_copy)
455 {
456         ssize_t err;
457
458         if (mode == MCOPY_ATOMIC_CONTINUE) {
459                 return mcontinue_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
460                                             wp_copy);
461         }
462
463         /*
464          * The normal page fault path for a shmem will invoke the
465          * fault, fill the hole in the file and COW it right away. The
466          * result generates plain anonymous memory. So when we are
467          * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
468          * generate anonymous memory directly without actually filling
469          * the hole. For the MAP_PRIVATE case the robustness check
470          * only happens in the pagetable (to verify it's still none)
471          * and not in the radix tree.
472          */
473         if (!(dst_vma->vm_flags & VM_SHARED)) {
474                 if (mode == MCOPY_ATOMIC_NORMAL)
475                         err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
476                                                dst_addr, src_addr, page,
477                                                wp_copy);
478                 else
479                         err = mfill_zeropage_pte(dst_mm, dst_pmd,
480                                                  dst_vma, dst_addr);
481         } else {
482                 VM_WARN_ON_ONCE(wp_copy);
483                 err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
484                                              dst_addr, src_addr,
485                                              mode != MCOPY_ATOMIC_NORMAL,
486                                              page);
487         }
488
489         return err;
490 }
491
492 static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
493                                               unsigned long dst_start,
494                                               unsigned long src_start,
495                                               unsigned long len,
496                                               enum mcopy_atomic_mode mcopy_mode,
497                                               atomic_t *mmap_changing,
498                                               __u64 mode)
499 {
500         struct vm_area_struct *dst_vma;
501         ssize_t err;
502         pmd_t *dst_pmd;
503         unsigned long src_addr, dst_addr;
504         long copied;
505         struct page *page;
506         bool wp_copy;
507
508         /*
509          * Sanitize the command parameters:
510          */
511         BUG_ON(dst_start & ~PAGE_MASK);
512         BUG_ON(len & ~PAGE_MASK);
513
514         /* Does the address range wrap, or is the span zero-sized? */
515         BUG_ON(src_start + len <= src_start);
516         BUG_ON(dst_start + len <= dst_start);
517
518         src_addr = src_start;
519         dst_addr = dst_start;
520         copied = 0;
521         page = NULL;
522 retry:
523         mmap_read_lock(dst_mm);
524
525         /*
526          * If memory mappings are changing because of non-cooperative
527          * operation (e.g. mremap) running in parallel, bail out and
528          * request the user to retry later
529          */
530         err = -EAGAIN;
531         if (mmap_changing && atomic_read(mmap_changing))
532                 goto out_unlock;
533
534         /*
535          * Make sure the vma is not shared, that the dst range is
536          * both valid and fully within a single existing vma.
537          */
538         err = -ENOENT;
539         dst_vma = find_dst_vma(dst_mm, dst_start, len);
540         if (!dst_vma)
541                 goto out_unlock;
542
543         err = -EINVAL;
544         /*
545          * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
546          * it will overwrite vm_ops, so vma_is_anonymous must return false.
547          */
548         if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
549             dst_vma->vm_flags & VM_SHARED))
550                 goto out_unlock;
551
552         /*
553          * validate 'mode' now that we know the dst_vma: don't allow
554          * a wrprotect copy if the userfaultfd didn't register as WP.
555          */
556         wp_copy = mode & UFFDIO_COPY_MODE_WP;
557         if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
558                 goto out_unlock;
559
560         /*
561          * If this is a HUGETLB vma, pass off to appropriate routine
562          */
563         if (is_vm_hugetlb_page(dst_vma))
564                 return  __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
565                                                 src_start, len, mcopy_mode);
566
567         if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
568                 goto out_unlock;
569         if (!vma_is_shmem(dst_vma) && mcopy_mode == MCOPY_ATOMIC_CONTINUE)
570                 goto out_unlock;
571
572         /*
573          * Ensure the dst_vma has a anon_vma or this page
574          * would get a NULL anon_vma when moved in the
575          * dst_vma.
576          */
577         err = -ENOMEM;
578         if (!(dst_vma->vm_flags & VM_SHARED) &&
579             unlikely(anon_vma_prepare(dst_vma)))
580                 goto out_unlock;
581
582         while (src_addr < src_start + len) {
583                 pmd_t dst_pmdval;
584
585                 BUG_ON(dst_addr >= dst_start + len);
586
587                 dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
588                 if (unlikely(!dst_pmd)) {
589                         err = -ENOMEM;
590                         break;
591                 }
592
593                 dst_pmdval = pmd_read_atomic(dst_pmd);
594                 /*
595                  * If the dst_pmd is mapped as THP don't
596                  * override it and just be strict.
597                  */
598                 if (unlikely(pmd_trans_huge(dst_pmdval))) {
599                         err = -EEXIST;
600                         break;
601                 }
602                 if (unlikely(pmd_none(dst_pmdval)) &&
603                     unlikely(__pte_alloc(dst_mm, dst_pmd))) {
604                         err = -ENOMEM;
605                         break;
606                 }
607                 /* If an huge pmd materialized from under us fail */
608                 if (unlikely(pmd_trans_huge(*dst_pmd))) {
609                         err = -EFAULT;
610                         break;
611                 }
612
613                 BUG_ON(pmd_none(*dst_pmd));
614                 BUG_ON(pmd_trans_huge(*dst_pmd));
615
616                 err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
617                                        src_addr, &page, mcopy_mode, wp_copy);
618                 cond_resched();
619
620                 if (unlikely(err == -ENOENT)) {
621                         void *page_kaddr;
622
623                         mmap_read_unlock(dst_mm);
624                         BUG_ON(!page);
625
626                         page_kaddr = kmap(page);
627                         err = copy_from_user(page_kaddr,
628                                              (const void __user *) src_addr,
629                                              PAGE_SIZE);
630                         kunmap(page);
631                         if (unlikely(err)) {
632                                 err = -EFAULT;
633                                 goto out;
634                         }
635                         flush_dcache_page(page);
636                         goto retry;
637                 } else
638                         BUG_ON(page);
639
640                 if (!err) {
641                         dst_addr += PAGE_SIZE;
642                         src_addr += PAGE_SIZE;
643                         copied += PAGE_SIZE;
644
645                         if (fatal_signal_pending(current))
646                                 err = -EINTR;
647                 }
648                 if (err)
649                         break;
650         }
651
652 out_unlock:
653         mmap_read_unlock(dst_mm);
654 out:
655         if (page)
656                 put_page(page);
657         BUG_ON(copied < 0);
658         BUG_ON(err > 0);
659         BUG_ON(!copied && !err);
660         return copied ? copied : err;
661 }
662
663 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
664                      unsigned long src_start, unsigned long len,
665                      atomic_t *mmap_changing, __u64 mode)
666 {
667         return __mcopy_atomic(dst_mm, dst_start, src_start, len,
668                               MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
669 }
670
671 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
672                        unsigned long len, atomic_t *mmap_changing)
673 {
674         return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
675                               mmap_changing, 0);
676 }
677
678 ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
679                        unsigned long len, atomic_t *mmap_changing)
680 {
681         return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
682                               mmap_changing, 0);
683 }
684
685 int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
686                         unsigned long len, bool enable_wp,
687                         atomic_t *mmap_changing)
688 {
689         struct vm_area_struct *dst_vma;
690         pgprot_t newprot;
691         int err;
692
693         /*
694          * Sanitize the command parameters:
695          */
696         BUG_ON(start & ~PAGE_MASK);
697         BUG_ON(len & ~PAGE_MASK);
698
699         /* Does the address range wrap, or is the span zero-sized? */
700         BUG_ON(start + len <= start);
701
702         mmap_read_lock(dst_mm);
703
704         /*
705          * If memory mappings are changing because of non-cooperative
706          * operation (e.g. mremap) running in parallel, bail out and
707          * request the user to retry later
708          */
709         err = -EAGAIN;
710         if (mmap_changing && atomic_read(mmap_changing))
711                 goto out_unlock;
712
713         err = -ENOENT;
714         dst_vma = find_dst_vma(dst_mm, start, len);
715         /*
716          * Make sure the vma is not shared, that the dst range is
717          * both valid and fully within a single existing vma.
718          */
719         if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
720                 goto out_unlock;
721         if (!userfaultfd_wp(dst_vma))
722                 goto out_unlock;
723         if (!vma_is_anonymous(dst_vma))
724                 goto out_unlock;
725
726         if (enable_wp)
727                 newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
728         else
729                 newprot = vm_get_page_prot(dst_vma->vm_flags);
730
731         change_protection(dst_vma, start, start + len, newprot,
732                           enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
733
734         err = 0;
735 out_unlock:
736         mmap_read_unlock(dst_mm);
737         return err;
738 }