#include <linux/spinlock.h>
#include <linux/mm.h>
+#include <linux/memfd.h>
#include <linux/memremap.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/hugetlb.h>
#include <linux/migrate.h>
#include <linux/mm_inline.h>
+#include <linux/pagevec.h>
#include <linux/sched/mm.h>
#include <linux/shmem_fs.h>
&locked, gup_flags);
}
EXPORT_SYMBOL(pin_user_pages_unlocked);
+
+/**
+ * memfd_pin_folios() - pin folios associated with a memfd
+ * @memfd: the memfd whose folios are to be pinned
+ * @start: the first memfd offset
+ * @end: the last memfd offset (inclusive)
+ * @folios: array that receives pointers to the folios pinned
+ * @max_folios: maximum number of entries in @folios
+ * @offset: the offset into the first folio
+ *
+ * Attempt to pin folios associated with a memfd in the contiguous range
+ * [start, end]. Given that a memfd is either backed by shmem or hugetlb,
+ * the folios can either be found in the page cache or need to be allocated
+ * if necessary. Once the folios are located, they are all pinned via
+ * FOLL_PIN and @offset is populatedwith the offset into the first folio.
+ * And, eventually, these pinned folios must be released either using
+ * unpin_folios() or unpin_folio().
+ *
+ * It must be noted that the folios may be pinned for an indefinite amount
+ * of time. And, in most cases, the duration of time they may stay pinned
+ * would be controlled by the userspace. This behavior is effectively the
+ * same as using FOLL_LONGTERM with other GUP APIs.
+ *
+ * Returns number of folios pinned, which could be less than @max_folios
+ * as it depends on the folio sizes that cover the range [start, end].
+ * If no folios were pinned, it returns -errno.
+ */
+long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end,
+ struct folio **folios, unsigned int max_folios,
+ pgoff_t *offset)
+{
+ unsigned int flags, nr_folios, nr_found;
+ unsigned int i, pgshift = PAGE_SHIFT;
+ pgoff_t start_idx, end_idx, next_idx;
+ struct folio *folio = NULL;
+ struct folio_batch fbatch;
+ struct hstate *h;
+ long ret = -EINVAL;
+
+ if (start < 0 || start > end || !max_folios)
+ return -EINVAL;
+
+ if (!memfd)
+ return -EINVAL;
+
+ if (!shmem_file(memfd) && !is_file_hugepages(memfd))
+ return -EINVAL;
+
+ if (end >= i_size_read(file_inode(memfd)))
+ return -EINVAL;
+
+ if (is_file_hugepages(memfd)) {
+ h = hstate_file(memfd);
+ pgshift = huge_page_shift(h);
+ }
+
+ flags = memalloc_pin_save();
+ do {
+ nr_folios = 0;
+ start_idx = start >> pgshift;
+ end_idx = end >> pgshift;
+ if (is_file_hugepages(memfd)) {
+ start_idx <<= huge_page_order(h);
+ end_idx <<= huge_page_order(h);
+ }
+
+ folio_batch_init(&fbatch);
+ while (start_idx <= end_idx && nr_folios < max_folios) {
+ /*
+ * In most cases, we should be able to find the folios
+ * in the page cache. If we cannot find them for some
+ * reason, we try to allocate them and add them to the
+ * page cache.
+ */
+ nr_found = filemap_get_folios_contig(memfd->f_mapping,
+ &start_idx,
+ end_idx,
+ &fbatch);
+ if (folio) {
+ folio_put(folio);
+ folio = NULL;
+ }
+
+ next_idx = 0;
+ for (i = 0; i < nr_found; i++) {
+ /*
+ * As there can be multiple entries for a
+ * given folio in the batch returned by
+ * filemap_get_folios_contig(), the below
+ * check is to ensure that we pin and return a
+ * unique set of folios between start and end.
+ */
+ if (next_idx &&
+ next_idx != folio_index(fbatch.folios[i]))
+ continue;
+
+ folio = page_folio(&fbatch.folios[i]->page);
+
+ if (try_grab_folio(folio, 1, FOLL_PIN)) {
+ folio_batch_release(&fbatch);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (nr_folios == 0)
+ *offset = offset_in_folio(folio, start);
+
+ folios[nr_folios] = folio;
+ next_idx = folio_next_index(folio);
+ if (++nr_folios == max_folios)
+ break;
+ }
+
+ folio = NULL;
+ folio_batch_release(&fbatch);
+ if (!nr_found) {
+ folio = memfd_alloc_folio(memfd, start_idx);
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
+ if (ret != -EEXIST)
+ goto err;
+ }
+ }
+ }
+
+ ret = check_and_migrate_movable_folios(nr_folios, folios);
+ } while (ret == -EAGAIN);
+
+ memalloc_pin_restore(flags);
+ return ret ? ret : nr_folios;
+err:
+ memalloc_pin_restore(flags);
+ unpin_folios(folios, nr_folios);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(memfd_pin_folios);
xas_unlock_irq(xas);
}
+/*
+ * This is a helper function used by memfd_pin_user_pages() in GUP (gup.c).
+ * It is mainly called to allocate a folio in a memfd when the caller
+ * (memfd_pin_folios()) cannot find a folio in the page cache at a given
+ * index in the mapping.
+ */
+struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+ struct folio *folio;
+ gfp_t gfp_mask;
+ int err;
+
+ if (is_file_hugepages(memfd)) {
+ /*
+ * The folio would most likely be accessed by a DMA driver,
+ * therefore, we have zone memory constraints where we can
+ * alloc from. Also, the folio will be pinned for an indefinite
+ * amount of time, so it is not expected to be migrated away.
+ */
+ gfp_mask = htlb_alloc_mask(hstate_file(memfd));
+ gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE);
+
+ folio = alloc_hugetlb_folio_nodemask(hstate_file(memfd),
+ numa_node_id(),
+ NULL,
+ gfp_mask,
+ false);
+ if (folio && folio_try_get(folio)) {
+ err = hugetlb_add_to_page_cache(folio,
+ memfd->f_mapping,
+ idx);
+ if (err) {
+ folio_put(folio);
+ free_huge_folio(folio);
+ return ERR_PTR(err);
+ }
+ return folio;
+ }
+ return ERR_PTR(-ENOMEM);
+ }
+#endif
+ return shmem_read_folio(memfd->f_mapping, idx);
+}
+
/*
* Setting SEAL_WRITE requires us to verify there's no pending writer. However,
* via get_user_pages(), drivers might have some pending I/O without any active