unsigned long start, end, addr;
unsigned long size = cmd->npages << PAGE_SHIFT;
struct mm_struct *mm = dmirror->notifier.mm;
- struct page *pages[64];
struct dmirror_bounce bounce;
- unsigned long next;
- int ret;
+ int ret = 0;
start = cmd->addr;
end = start + size;
return -EINVAL;
mmap_read_lock(mm);
- for (addr = start; addr < end; addr = next) {
- unsigned long mapped = 0;
- int i;
-
- next = min(end, addr + (ARRAY_SIZE(pages) << PAGE_SHIFT));
+ for (addr = start; !ret && addr < end; addr += PAGE_SIZE) {
+ struct folio *folio;
+ struct page *page;
- ret = make_device_exclusive_range(mm, addr, next, pages, NULL);
- /*
- * Do dmirror_atomic_map() iff all pages are marked for
- * exclusive access to avoid accessing uninitialized
- * fields of pages.
- */
- if (ret == (next - addr) >> PAGE_SHIFT)
- mapped = dmirror_atomic_map(addr, next, pages, dmirror);
- for (i = 0; i < ret; i++) {
- if (pages[i]) {
- unlock_page(pages[i]);
- put_page(pages[i]);
- }
+ page = make_device_exclusive(mm, addr, NULL, &folio);
+ if (IS_ERR(page)) {
+ ret = PTR_ERR(page);
+ break;
}
- if (addr + (mapped << PAGE_SHIFT) < next) {
- mmap_read_unlock(mm);
- mmput(mm);
- return -EBUSY;
- }
+ ret = dmirror_atomic_map(addr, addr + PAGE_SIZE, &page, dmirror);
+ ret = ret == 1 ? 0 : -EBUSY;
+ folio_unlock(folio);
+ folio_put(folio);
}
mmap_read_unlock(mm);
mmput(mm);
+ if (ret)
+ return ret;
+
/* Return the migrated data for verification. */
ret = dmirror_bounce_init(&bounce, start, size);
if (ret)
.arg = &args,
};
- /*
- * Restrict to anonymous folios for now to avoid potential writeback
- * issues.
- */
- if (!folio_test_anon(folio) || folio_test_hugetlb(folio))
- return false;
-
rmap_walk(folio, &rwc);
return args.valid && !folio_mapcount(folio);
}
/**
- * make_device_exclusive_range() - Mark a range for exclusive use by a device
+ * make_device_exclusive() - Mark a page for exclusive use by a device
* @mm: mm_struct of associated target process
- * @start: start of the region to mark for exclusive device access
- * @end: end address of region
- * @pages: returns the pages which were successfully marked for exclusive access
+ * @addr: the virtual address to mark for exclusive device access
* @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier to allow filtering
+ * @foliop: folio pointer will be stored here on success.
+ *
+ * This function looks up the page mapped at the given address, grabs a
+ * folio reference, locks the folio and replaces the PTE with special
+ * device-exclusive PFN swap entry, preventing access through the process
+ * page tables. The function will return with the folio locked and referenced.
*
- * Returns: number of pages found in the range by GUP. A page is marked for
- * exclusive access only if the page pointer is non-NULL.
+ * On fault, the device-exclusive entries are replaced with the original PTE
+ * under folio lock, after calling MMU notifiers.
*
- * This function finds ptes mapping page(s) to the given address range, locks
- * them and replaces mappings with special swap entries preventing userspace CPU
- * access. On fault these entries are replaced with the original mapping after
- * calling MMU notifiers.
+ * Only anonymous non-hugetlb folios are supported and the VMA must have
+ * write permissions such that we can fault in the anonymous page writable
+ * in order to mark it exclusive. The caller must hold the mmap_lock in read
+ * mode.
*
* A driver using this to program access from a device must use a mmu notifier
* critical section to hold a device specific lock during programming. Once
- * programming is complete it should drop the page lock and reference after
+ * programming is complete it should drop the folio lock and reference after
* which point CPU access to the page will revoke the exclusive access.
+ *
+ * Notes:
+ * #. This function always operates on individual PTEs mapping individual
+ * pages. PMD-sized THPs are first remapped to be mapped by PTEs before
+ * the conversion happens on a single PTE corresponding to @addr.
+ * #. While concurrent access through the process page tables is prevented,
+ * concurrent access through other page references (e.g., earlier GUP
+ * invocation) is not handled and not supported.
+ * #. device-exclusive entries are considered "clean" and "old" by core-mm.
+ * Device drivers must update the folio state when informed by MMU
+ * notifiers.
+ *
+ * Returns: pointer to mapped page on success, otherwise a negative error.
*/
-int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
- unsigned long end, struct page **pages,
- void *owner)
+struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr,
+ void *owner, struct folio **foliop)
{
- long npages = (end - start) >> PAGE_SHIFT;
- long i;
+ struct folio *folio;
+ struct page *page;
+ long npages;
+
+ mmap_assert_locked(mm);
- npages = get_user_pages_remote(mm, start, npages,
+ /*
+ * Fault in the page writable and try to lock it; note that if the
+ * address would already be marked for exclusive use by a device,
+ * the GUP call would undo that first by triggering a fault.
+ */
+ npages = get_user_pages_remote(mm, addr, 1,
FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD,
- pages, NULL);
- if (npages < 0)
- return npages;
-
- for (i = 0; i < npages; i++, start += PAGE_SIZE) {
- struct folio *folio = page_folio(pages[i]);
- if (PageTail(pages[i]) || !folio_trylock(folio)) {
- folio_put(folio);
- pages[i] = NULL;
- continue;
- }
+ &page, NULL);
+ if (npages != 1)
+ return ERR_PTR(npages);
+ folio = page_folio(page);
- if (!folio_make_device_exclusive(folio, mm, start, owner)) {
- folio_unlock(folio);
- folio_put(folio);
- pages[i] = NULL;
- }
+ if (!folio_test_anon(folio) || folio_test_hugetlb(folio)) {
+ folio_put(folio);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (!folio_trylock(folio)) {
+ folio_put(folio);
+ return ERR_PTR(-EBUSY);
}
- return npages;
+ if (!folio_make_device_exclusive(folio, mm, addr, owner)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ return ERR_PTR(-EBUSY);
+ }
+ *foliop = folio;
+ return page;
}
-EXPORT_SYMBOL_GPL(make_device_exclusive_range);
+EXPORT_SYMBOL_GPL(make_device_exclusive);
#endif
void __put_anon_vma(struct anon_vma *anon_vma)