net: Replace deprecated CPU-hotplug functions.
[linux-2.6-microblaze.git] / mm / rmap.c
index e05c300..b9eb5c1 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1405,24 +1405,14 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
        /*
         * When racing against e.g. zap_pte_range() on another cpu,
         * in between its ptep_get_and_clear_full() and page_remove_rmap(),
-        * try_to_unmap() may return false when it is about to become true,
+        * try_to_unmap() may return before page_mapped() has become false,
         * if page table locking is skipped: use TTU_SYNC to wait for that.
         */
        if (flags & TTU_SYNC)
                pvmw.flags = PVMW_SYNC;
 
-       /* munlock has nothing to gain from examining un-locked vmas */
-       if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
-               return true;
-
-       if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
-           is_zone_device_page(page) && !is_device_private_page(page))
-               return true;
-
-       if (flags & TTU_SPLIT_HUGE_PMD) {
-               split_huge_pmd_address(vma, address,
-                               flags & TTU_SPLIT_FREEZE, page);
-       }
+       if (flags & TTU_SPLIT_HUGE_PMD)
+               split_huge_pmd_address(vma, address, false, page);
 
        /*
         * For THP, we have to assume the worse case ie pmd for invalidation.
@@ -1447,37 +1437,23 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
        mmu_notifier_invalidate_range_start(&range);
 
        while (page_vma_mapped_walk(&pvmw)) {
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
-               /* PMD-mapped THP migration entry */
-               if (!pvmw.pte && (flags & TTU_MIGRATION)) {
-                       VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
-
-                       set_pmd_migration_entry(&pvmw, page);
-                       continue;
-               }
-#endif
-
                /*
                 * If the page is mlock()d, we cannot swap it out.
-                * If it's recently referenced (perhaps page_referenced
-                * skipped over this mm) then we should reactivate it.
                 */
-               if (!(flags & TTU_IGNORE_MLOCK)) {
-                       if (vma->vm_flags & VM_LOCKED) {
-                               /* PTE-mapped THP are never mlocked */
-                               if (!PageTransCompound(page)) {
-                                       /*
-                                        * Holding pte lock, we do *not* need
-                                        * mmap_lock here
-                                        */
-                                       mlock_vma_page(page);
-                               }
-                               ret = false;
-                               page_vma_mapped_walk_done(&pvmw);
-                               break;
-                       }
-                       if (flags & TTU_MUNLOCK)
-                               continue;
+               if (!(flags & TTU_IGNORE_MLOCK) &&
+                   (vma->vm_flags & VM_LOCKED)) {
+                       /*
+                        * PTE-mapped THP are never marked as mlocked: so do
+                        * not set it on a DoubleMap THP, nor on an Anon THP
+                        * (which may still be PTE-mapped after DoubleMap was
+                        * cleared).  But stop unmapping even in those cases.
+                        */
+                       if (!PageTransCompound(page) || (PageHead(page) &&
+                            !PageDoubleMap(page) && !PageAnon(page)))
+                               mlock_vma_page(page);
+                       page_vma_mapped_walk_done(&pvmw);
+                       ret = false;
+                       break;
                }
 
                /* Unexpected PMD-mapped THP? */
@@ -1520,46 +1496,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                        }
                }
 
-               if (IS_ENABLED(CONFIG_MIGRATION) &&
-                   (flags & TTU_MIGRATION) &&
-                   is_zone_device_page(page)) {
-                       swp_entry_t entry;
-                       pte_t swp_pte;
-
-                       pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
-
-                       /*
-                        * Store the pfn of the page in a special migration
-                        * pte. do_swap_page() will wait until the migration
-                        * pte is removed and then restart fault handling.
-                        */
-                       entry = make_migration_entry(page, 0);
-                       swp_pte = swp_entry_to_pte(entry);
-
-                       /*
-                        * pteval maps a zone device page and is therefore
-                        * a swap pte.
-                        */
-                       if (pte_swp_soft_dirty(pteval))
-                               swp_pte = pte_swp_mksoft_dirty(swp_pte);
-                       if (pte_swp_uffd_wp(pteval))
-                               swp_pte = pte_swp_mkuffd_wp(swp_pte);
-                       set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
-                       /*
-                        * No need to invalidate here it will synchronize on
-                        * against the special swap migration pte.
-                        *
-                        * The assignment to subpage above was computed from a
-                        * swap PTE which results in an invalid pointer.
-                        * Since only PAGE_SIZE pages can currently be
-                        * migrated, just set it to page. This will need to be
-                        * changed when hugepage migrations to device private
-                        * memory are supported.
-                        */
-                       subpage = page;
-                       goto discard;
-               }
-
                /* Nuke the page table entry. */
                flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
                if (should_defer_flush(mm, flags)) {
@@ -1612,35 +1548,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                        /* We have to invalidate as we cleared the pte */
                        mmu_notifier_invalidate_range(mm, address,
                                                      address + PAGE_SIZE);
-               } else if (IS_ENABLED(CONFIG_MIGRATION) &&
-                               (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
-                       swp_entry_t entry;
-                       pte_t swp_pte;
-
-                       if (arch_unmap_one(mm, vma, address, pteval) < 0) {
-                               set_pte_at(mm, address, pvmw.pte, pteval);
-                               ret = false;
-                               page_vma_mapped_walk_done(&pvmw);
-                               break;
-                       }
-
-                       /*
-                        * Store the pfn of the page in a special migration
-                        * pte. do_swap_page() will wait until the migration
-                        * pte is removed and then restart fault handling.
-                        */
-                       entry = make_migration_entry(subpage,
-                                       pte_write(pteval));
-                       swp_pte = swp_entry_to_pte(entry);
-                       if (pte_soft_dirty(pteval))
-                               swp_pte = pte_swp_mksoft_dirty(swp_pte);
-                       if (pte_uffd_wp(pteval))
-                               swp_pte = pte_swp_mkuffd_wp(swp_pte);
-                       set_pte_at(mm, address, pvmw.pte, swp_pte);
-                       /*
-                        * No need to invalidate here it will synchronize on
-                        * against the special swap migration pte.
-                        */
                } else if (PageAnon(page)) {
                        swp_entry_t entry = { .val = page_private(subpage) };
                        pte_t swp_pte;
@@ -1756,9 +1663,10 @@ static int page_not_mapped(struct page *page)
  * Tries to remove all the page table entries which are mapping this
  * page, used in the pageout path.  Caller must hold the page lock.
  *
- * If unmap is successful, return true. Otherwise, false.
+ * It is the caller's responsibility to check if the page is still
+ * mapped when needed (use TTU_SYNC to prevent accounting races).
  */
-bool try_to_unmap(struct page *page, enum ttu_flags flags)
+void try_to_unmap(struct page *page, enum ttu_flags flags)
 {
        struct rmap_walk_control rwc = {
                .rmap_one = try_to_unmap_one,
@@ -1767,6 +1675,274 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
                .anon_lock = page_lock_anon_vma_read,
        };
 
+       if (flags & TTU_RMAP_LOCKED)
+               rmap_walk_locked(page, &rwc);
+       else
+               rmap_walk(page, &rwc);
+}
+
+/*
+ * @arg: enum ttu_flags will be passed to this argument.
+ *
+ * If TTU_SPLIT_HUGE_PMD is specified any PMD mappings will be split into PTEs
+ * containing migration entries.
+ */
+static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
+                    unsigned long address, void *arg)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct page_vma_mapped_walk pvmw = {
+               .page = page,
+               .vma = vma,
+               .address = address,
+       };
+       pte_t pteval;
+       struct page *subpage;
+       bool ret = true;
+       struct mmu_notifier_range range;
+       enum ttu_flags flags = (enum ttu_flags)(long)arg;
+
+       /*
+        * When racing against e.g. zap_pte_range() on another cpu,
+        * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+        * try_to_migrate() may return before page_mapped() has become false,
+        * if page table locking is skipped: use TTU_SYNC to wait for that.
+        */
+       if (flags & TTU_SYNC)
+               pvmw.flags = PVMW_SYNC;
+
+       /*
+        * unmap_page() in mm/huge_memory.c is the only user of migration with
+        * TTU_SPLIT_HUGE_PMD and it wants to freeze.
+        */
+       if (flags & TTU_SPLIT_HUGE_PMD)
+               split_huge_pmd_address(vma, address, true, page);
+
+       /*
+        * For THP, we have to assume the worse case ie pmd for invalidation.
+        * For hugetlb, it could be much worse if we need to do pud
+        * invalidation in the case of pmd sharing.
+        *
+        * Note that the page can not be free in this function as call of
+        * try_to_unmap() must hold a reference on the page.
+        */
+       range.end = PageKsm(page) ?
+                       address + PAGE_SIZE : vma_address_end(page, vma);
+       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+                               address, range.end);
+       if (PageHuge(page)) {
+               /*
+                * If sharing is possible, start and end will be adjusted
+                * accordingly.
+                */
+               adjust_range_if_pmd_sharing_possible(vma, &range.start,
+                                                    &range.end);
+       }
+       mmu_notifier_invalidate_range_start(&range);
+
+       while (page_vma_mapped_walk(&pvmw)) {
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+               /* PMD-mapped THP migration entry */
+               if (!pvmw.pte) {
+                       VM_BUG_ON_PAGE(PageHuge(page) ||
+                                      !PageTransCompound(page), page);
+
+                       set_pmd_migration_entry(&pvmw, page);
+                       continue;
+               }
+#endif
+
+               /* Unexpected PMD-mapped THP? */
+               VM_BUG_ON_PAGE(!pvmw.pte, page);
+
+               subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+               address = pvmw.address;
+
+               if (PageHuge(page) && !PageAnon(page)) {
+                       /*
+                        * To call huge_pmd_unshare, i_mmap_rwsem must be
+                        * held in write mode.  Caller needs to explicitly
+                        * do this outside rmap routines.
+                        */
+                       VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
+                       if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
+                               /*
+                                * huge_pmd_unshare unmapped an entire PMD
+                                * page.  There is no way of knowing exactly
+                                * which PMDs may be cached for this mm, so
+                                * we must flush them all.  start/end were
+                                * already adjusted above to cover this range.
+                                */
+                               flush_cache_range(vma, range.start, range.end);
+                               flush_tlb_range(vma, range.start, range.end);
+                               mmu_notifier_invalidate_range(mm, range.start,
+                                                             range.end);
+
+                               /*
+                                * The ref count of the PMD page was dropped
+                                * which is part of the way map counting
+                                * is done for shared PMDs.  Return 'true'
+                                * here.  When there is no other sharing,
+                                * huge_pmd_unshare returns false and we will
+                                * unmap the actual page and drop map count
+                                * to zero.
+                                */
+                               page_vma_mapped_walk_done(&pvmw);
+                               break;
+                       }
+               }
+
+               /* Nuke the page table entry. */
+               flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+               pteval = ptep_clear_flush(vma, address, pvmw.pte);
+
+               /* Move the dirty bit to the page. Now the pte is gone. */
+               if (pte_dirty(pteval))
+                       set_page_dirty(page);
+
+               /* Update high watermark before we lower rss */
+               update_hiwater_rss(mm);
+
+               if (is_zone_device_page(page)) {
+                       swp_entry_t entry;
+                       pte_t swp_pte;
+
+                       /*
+                        * Store the pfn of the page in a special migration
+                        * pte. do_swap_page() will wait until the migration
+                        * pte is removed and then restart fault handling.
+                        */
+                       entry = make_readable_migration_entry(
+                                                       page_to_pfn(page));
+                       swp_pte = swp_entry_to_pte(entry);
+
+                       /*
+                        * pteval maps a zone device page and is therefore
+                        * a swap pte.
+                        */
+                       if (pte_swp_soft_dirty(pteval))
+                               swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                       if (pte_swp_uffd_wp(pteval))
+                               swp_pte = pte_swp_mkuffd_wp(swp_pte);
+                       set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
+                       /*
+                        * No need to invalidate here it will synchronize on
+                        * against the special swap migration pte.
+                        *
+                        * The assignment to subpage above was computed from a
+                        * swap PTE which results in an invalid pointer.
+                        * Since only PAGE_SIZE pages can currently be
+                        * migrated, just set it to page. This will need to be
+                        * changed when hugepage migrations to device private
+                        * memory are supported.
+                        */
+                       subpage = page;
+               } else if (PageHWPoison(page)) {
+                       pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
+                       if (PageHuge(page)) {
+                               hugetlb_count_sub(compound_nr(page), mm);
+                               set_huge_swap_pte_at(mm, address,
+                                                    pvmw.pte, pteval,
+                                                    vma_mmu_pagesize(vma));
+                       } else {
+                               dec_mm_counter(mm, mm_counter(page));
+                               set_pte_at(mm, address, pvmw.pte, pteval);
+                       }
+
+               } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
+                       /*
+                        * The guest indicated that the page content is of no
+                        * interest anymore. Simply discard the pte, vmscan
+                        * will take care of the rest.
+                        * A future reference will then fault in a new zero
+                        * page. When userfaultfd is active, we must not drop
+                        * this page though, as its main user (postcopy
+                        * migration) will not expect userfaults on already
+                        * copied pages.
+                        */
+                       dec_mm_counter(mm, mm_counter(page));
+                       /* We have to invalidate as we cleared the pte */
+                       mmu_notifier_invalidate_range(mm, address,
+                                                     address + PAGE_SIZE);
+               } else {
+                       swp_entry_t entry;
+                       pte_t swp_pte;
+
+                       if (arch_unmap_one(mm, vma, address, pteval) < 0) {
+                               set_pte_at(mm, address, pvmw.pte, pteval);
+                               ret = false;
+                               page_vma_mapped_walk_done(&pvmw);
+                               break;
+                       }
+
+                       /*
+                        * Store the pfn of the page in a special migration
+                        * pte. do_swap_page() will wait until the migration
+                        * pte is removed and then restart fault handling.
+                        */
+                       if (pte_write(pteval))
+                               entry = make_writable_migration_entry(
+                                                       page_to_pfn(subpage));
+                       else
+                               entry = make_readable_migration_entry(
+                                                       page_to_pfn(subpage));
+
+                       swp_pte = swp_entry_to_pte(entry);
+                       if (pte_soft_dirty(pteval))
+                               swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                       if (pte_uffd_wp(pteval))
+                               swp_pte = pte_swp_mkuffd_wp(swp_pte);
+                       set_pte_at(mm, address, pvmw.pte, swp_pte);
+                       /*
+                        * No need to invalidate here it will synchronize on
+                        * against the special swap migration pte.
+                        */
+               }
+
+               /*
+                * No need to call mmu_notifier_invalidate_range() it has be
+                * done above for all cases requiring it to happen under page
+                * table lock before mmu_notifier_invalidate_range_end()
+                *
+                * See Documentation/vm/mmu_notifier.rst
+                */
+               page_remove_rmap(subpage, PageHuge(page));
+               put_page(page);
+       }
+
+       mmu_notifier_invalidate_range_end(&range);
+
+       return ret;
+}
+
+/**
+ * try_to_migrate - try to replace all page table mappings with swap entries
+ * @page: the page to replace page table entries for
+ * @flags: action and flags
+ *
+ * Tries to remove all the page table entries which are mapping this page and
+ * replace them with special swap entries. Caller must hold the page lock.
+ */
+void try_to_migrate(struct page *page, enum ttu_flags flags)
+{
+       struct rmap_walk_control rwc = {
+               .rmap_one = try_to_migrate_one,
+               .arg = (void *)flags,
+               .done = page_not_mapped,
+               .anon_lock = page_lock_anon_vma_read,
+       };
+
+       /*
+        * Migration always ignores mlock and only supports TTU_RMAP_LOCKED and
+        * TTU_SPLIT_HUGE_PMD and TTU_SYNC flags.
+        */
+       if (WARN_ON_ONCE(flags & ~(TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD |
+                                       TTU_SYNC)))
+               return;
+
+       if (is_zone_device_page(page) && !is_device_private_page(page))
+               return;
+
        /*
         * During exec, a temporary VMA is setup and later moved.
         * The VMA is moved under the anon_vma lock but not the
@@ -1775,38 +1951,70 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
         * locking requirements of exec(), migration skips
         * temporary VMAs until after exec() completes.
         */
-       if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
-           && !PageKsm(page) && PageAnon(page))
+       if (!PageKsm(page) && PageAnon(page))
                rwc.invalid_vma = invalid_migration_vma;
 
        if (flags & TTU_RMAP_LOCKED)
                rmap_walk_locked(page, &rwc);
        else
                rmap_walk(page, &rwc);
+}
 
-       /*
-        * When racing against e.g. zap_pte_range() on another cpu,
-        * in between its ptep_get_and_clear_full() and page_remove_rmap(),
-        * try_to_unmap() may return false when it is about to become true,
-        * if page table locking is skipped: use TTU_SYNC to wait for that.
-        */
-       return !page_mapcount(page);
+/*
+ * Walks the vma's mapping a page and mlocks the page if any locked vma's are
+ * found. Once one is found the page is locked and the scan can be terminated.
+ */
+static bool page_mlock_one(struct page *page, struct vm_area_struct *vma,
+                                unsigned long address, void *unused)
+{
+       struct page_vma_mapped_walk pvmw = {
+               .page = page,
+               .vma = vma,
+               .address = address,
+       };
+
+       /* An un-locked vma doesn't have any pages to lock, continue the scan */
+       if (!(vma->vm_flags & VM_LOCKED))
+               return true;
+
+       while (page_vma_mapped_walk(&pvmw)) {
+               /*
+                * Need to recheck under the ptl to serialise with
+                * __munlock_pagevec_fill() after VM_LOCKED is cleared in
+                * munlock_vma_pages_range().
+                */
+               if (vma->vm_flags & VM_LOCKED) {
+                       /*
+                        * PTE-mapped THP are never marked as mlocked; but
+                        * this function is never called on a DoubleMap THP,
+                        * nor on an Anon THP (which may still be PTE-mapped
+                        * after DoubleMap was cleared).
+                        */
+                       mlock_vma_page(page);
+                       /*
+                        * No need to scan further once the page is marked
+                        * as mlocked.
+                        */
+                       page_vma_mapped_walk_done(&pvmw);
+                       return false;
+               }
+       }
+
+       return true;
 }
 
 /**
- * try_to_munlock - try to munlock a page
- * @page: the page to be munlocked
+ * page_mlock - try to mlock a page
+ * @page: the page to be mlocked
  *
- * Called from munlock code.  Checks all of the VMAs mapping the page
- * to make sure nobody else has this page mlocked. The page will be
- * returned with PG_mlocked cleared if no other vmas have it mlocked.
+ * Called from munlock code. Checks all of the VMAs mapping the page and mlocks
+ * the page if any are found. The page will be returned with PG_mlocked cleared
+ * if it is not mapped by any locked vmas.
  */
-
-void try_to_munlock(struct page *page)
+void page_mlock(struct page *page)
 {
        struct rmap_walk_control rwc = {
-               .rmap_one = try_to_unmap_one,
-               .arg = (void *)TTU_MUNLOCK,
+               .rmap_one = page_mlock_one,
                .done = page_not_mapped,
                .anon_lock = page_lock_anon_vma_read,
 
@@ -1815,9 +2023,199 @@ void try_to_munlock(struct page *page)
        VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
        VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
 
+       /* Anon THP are only marked as mlocked when singly mapped */
+       if (PageTransCompound(page) && PageAnon(page))
+               return;
+
        rmap_walk(page, &rwc);
 }
 
+#ifdef CONFIG_DEVICE_PRIVATE
+struct make_exclusive_args {
+       struct mm_struct *mm;
+       unsigned long address;
+       void *owner;
+       bool valid;
+};
+
+static bool page_make_device_exclusive_one(struct page *page,
+               struct vm_area_struct *vma, unsigned long address, void *priv)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct page_vma_mapped_walk pvmw = {
+               .page = page,
+               .vma = vma,
+               .address = address,
+       };
+       struct make_exclusive_args *args = priv;
+       pte_t pteval;
+       struct page *subpage;
+       bool ret = true;
+       struct mmu_notifier_range range;
+       swp_entry_t entry;
+       pte_t swp_pte;
+
+       mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma,
+                                     vma->vm_mm, address, min(vma->vm_end,
+                                     address + page_size(page)), args->owner);
+       mmu_notifier_invalidate_range_start(&range);
+
+       while (page_vma_mapped_walk(&pvmw)) {
+               /* Unexpected PMD-mapped THP? */
+               VM_BUG_ON_PAGE(!pvmw.pte, page);
+
+               if (!pte_present(*pvmw.pte)) {
+                       ret = false;
+                       page_vma_mapped_walk_done(&pvmw);
+                       break;
+               }
+
+               subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+               address = pvmw.address;
+
+               /* Nuke the page table entry. */
+               flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+               pteval = ptep_clear_flush(vma, address, pvmw.pte);
+
+               /* Move the dirty bit to the page. Now the pte is gone. */
+               if (pte_dirty(pteval))
+                       set_page_dirty(page);
+
+               /*
+                * Check that our target page is still mapped at the expected
+                * address.
+                */
+               if (args->mm == mm && args->address == address &&
+                   pte_write(pteval))
+                       args->valid = true;
+
+               /*
+                * Store the pfn of the page in a special migration
+                * pte. do_swap_page() will wait until the migration
+                * pte is removed and then restart fault handling.
+                */
+               if (pte_write(pteval))
+                       entry = make_writable_device_exclusive_entry(
+                                                       page_to_pfn(subpage));
+               else
+                       entry = make_readable_device_exclusive_entry(
+                                                       page_to_pfn(subpage));
+               swp_pte = swp_entry_to_pte(entry);
+               if (pte_soft_dirty(pteval))
+                       swp_pte = pte_swp_mksoft_dirty(swp_pte);
+               if (pte_uffd_wp(pteval))
+                       swp_pte = pte_swp_mkuffd_wp(swp_pte);
+
+               set_pte_at(mm, address, pvmw.pte, swp_pte);
+
+               /*
+                * There is a reference on the page for the swap entry which has
+                * been removed, so shouldn't take another.
+                */
+               page_remove_rmap(subpage, false);
+       }
+
+       mmu_notifier_invalidate_range_end(&range);
+
+       return ret;
+}
+
+/**
+ * page_make_device_exclusive - mark the page exclusively owned by a device
+ * @page: the page to replace page table entries for
+ * @mm: the mm_struct where the page is expected to be mapped
+ * @address: address where the page is expected to be mapped
+ * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier callbacks
+ *
+ * Tries to remove all the page table entries which are mapping this page and
+ * replace them with special device exclusive swap entries to grant a device
+ * exclusive access to the page. Caller must hold the page lock.
+ *
+ * Returns false if the page is still mapped, or if it could not be unmapped
+ * from the expected address. Otherwise returns true (success).
+ */
+static bool page_make_device_exclusive(struct page *page, struct mm_struct *mm,
+                               unsigned long address, void *owner)
+{
+       struct make_exclusive_args args = {
+               .mm = mm,
+               .address = address,
+               .owner = owner,
+               .valid = false,
+       };
+       struct rmap_walk_control rwc = {
+               .rmap_one = page_make_device_exclusive_one,
+               .done = page_not_mapped,
+               .anon_lock = page_lock_anon_vma_read,
+               .arg = &args,
+       };
+
+       /*
+        * Restrict to anonymous pages for now to avoid potential writeback
+        * issues. Also tail pages shouldn't be passed to rmap_walk so skip
+        * those.
+        */
+       if (!PageAnon(page) || PageTail(page))
+               return false;
+
+       rmap_walk(page, &rwc);
+
+       return args.valid && !page_mapcount(page);
+}
+
+/**
+ * make_device_exclusive_range() - Mark a range for exclusive use by a device
+ * @mm: mm_struct of assoicated target process
+ * @start: start of the region to mark for exclusive device access
+ * @end: end address of region
+ * @pages: returns the pages which were successfully marked for exclusive access
+ * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier to allow filtering
+ *
+ * Returns: number of pages found in the range by GUP. A page is marked for
+ * exclusive access only if the page pointer is non-NULL.
+ *
+ * This function finds ptes mapping page(s) to the given address range, locks
+ * them and replaces mappings with special swap entries preventing userspace CPU
+ * access. On fault these entries are replaced with the original mapping after
+ * calling MMU notifiers.
+ *
+ * A driver using this to program access from a device must use a mmu notifier
+ * critical section to hold a device specific lock during programming. Once
+ * programming is complete it should drop the page lock and reference after
+ * which point CPU access to the page will revoke the exclusive access.
+ */
+int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
+                               unsigned long end, struct page **pages,
+                               void *owner)
+{
+       long npages = (end - start) >> PAGE_SHIFT;
+       long i;
+
+       npages = get_user_pages_remote(mm, start, npages,
+                                      FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD,
+                                      pages, NULL, NULL);
+       if (npages < 0)
+               return npages;
+
+       for (i = 0; i < npages; i++, start += PAGE_SIZE) {
+               if (!trylock_page(pages[i])) {
+                       put_page(pages[i]);
+                       pages[i] = NULL;
+                       continue;
+               }
+
+               if (!page_make_device_exclusive(pages[i], mm, start, owner)) {
+                       unlock_page(pages[i]);
+                       put_page(pages[i]);
+                       pages[i] = NULL;
+               }
+       }
+
+       return npages;
+}
+EXPORT_SYMBOL_GPL(make_device_exclusive_range);
+#endif
+
 void __put_anon_vma(struct anon_vma *anon_vma)
 {
        struct anon_vma *root = anon_vma->root;
@@ -1858,7 +2256,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,
  * Find all the mappings of a page using the mapping pointer and the vma chains
  * contained in the anon_vma struct it points to.
  *
- * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
+ * When called from page_mlock(), the mmap_lock of the mm containing the vma
  * where the page was found will be held for write.  So, we won't recheck
  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
  * LOCKED.
@@ -1911,7 +2309,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
  * Find all the mappings of a page using the mapping pointer and the vma chains
  * contained in the address_space struct it points to.
  *
- * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
+ * When called from page_mlock(), the mmap_lock of the mm containing the vma
  * where the page was found will be held for write.  So, we won't recheck
  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
  * LOCKED.