mm/filemap: use head pages in generic_file_buffered_read
authorMatthew Wilcox (Oracle) <willy@infradead.org>
Wed, 24 Feb 2021 20:01:59 +0000 (12:01 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Feb 2021 21:38:27 +0000 (13:38 -0800)
Add filemap_get_read_batch() which returns the head pages which represent
a contiguous array of bytes in the file.  It also stops when encountering
a page marked as Readahead or !Uptodate (but does return that page) so it
can be handled appropriately by filemap_get_pages().  That lets us remove
the loop in filemap_get_pages() and check only the last page.

Link: https://lkml.kernel.org/r/20210122160140.223228-5-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/filemap.c

index aadf9bd..4fce973 100644 (file)
@@ -2178,6 +2178,51 @@ static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
                return lock_page_killable(page);
 }
 
+/*
+ * filemap_get_read_batch - Get a batch of pages for read
+ *
+ * Get a batch of pages which represent a contiguous range of bytes
+ * in the file.  No tail pages will be returned.  If @index is in the
+ * middle of a THP, the entire THP will be returned.  The last page in
+ * the batch may have Readahead set or be not Uptodate so that the
+ * caller can take the appropriate action.
+ */
+static void filemap_get_read_batch(struct address_space *mapping,
+               pgoff_t index, pgoff_t max, struct pagevec *pvec)
+{
+       XA_STATE(xas, &mapping->i_pages, index);
+       struct page *head;
+
+       rcu_read_lock();
+       for (head = xas_load(&xas); head; head = xas_next(&xas)) {
+               if (xas_retry(&xas, head))
+                       continue;
+               if (xas.xa_index > max || xa_is_value(head))
+                       break;
+               if (!page_cache_get_speculative(head))
+                       goto retry;
+
+               /* Has the page moved or been split? */
+               if (unlikely(head != xas_reload(&xas)))
+                       goto put_page;
+
+               if (!pagevec_add(pvec, head))
+                       break;
+               if (!PageUptodate(head))
+                       break;
+               if (PageReadahead(head))
+                       break;
+               xas.xa_index = head->index + thp_nr_pages(head) - 1;
+               xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK;
+               continue;
+put_page:
+               put_page(head);
+retry:
+               xas_reset(&xas);
+       }
+       rcu_read_unlock();
+}
+
 static struct page *filemap_read_page(struct kiocb *iocb, struct file *filp,
                struct address_space *mapping, struct page *page)
 {
@@ -2331,15 +2376,15 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
        struct address_space *mapping = filp->f_mapping;
        struct file_ra_state *ra = &filp->f_ra;
        pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
-       pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
-       unsigned int nr = min_t(unsigned long, last_index - index, PAGEVEC_SIZE);
-       int i, j, err = 0;
+       pgoff_t last_index;
+       int err = 0;
 
+       last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
 find_page:
        if (fatal_signal_pending(current))
                return -EINTR;
 
-       pvec->nr = find_get_pages_contig(mapping, index, nr, pvec->pages);
+       filemap_get_read_batch(mapping, index, last_index, pvec);
        if (pvec->nr)
                goto got_pages;
 
@@ -2348,29 +2393,30 @@ find_page:
 
        page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
 
-       pvec->nr = find_get_pages_contig(mapping, index, nr, pvec->pages);
+       filemap_get_read_batch(mapping, index, last_index, pvec);
        if (pvec->nr)
                goto got_pages;
 
        pvec->pages[0] = filemap_create_page(iocb, iter);
        err = PTR_ERR_OR_ZERO(pvec->pages[0]);
-       if (!IS_ERR_OR_NULL(pvec->pages[0]))
-               pvec->nr = 1;
+       if (IS_ERR_OR_NULL(pvec->pages[0]))
+               goto err;
+       pvec->nr = 1;
+       return 0;
 got_pages:
-       for (i = 0; i < pvec->nr; i++) {
-               struct page *page = pvec->pages[i];
-               pgoff_t pg_index = index + i;
+       {
+               struct page *page = pvec->pages[pvec->nr - 1];
+               pgoff_t pg_index = page->index;
                loff_t pg_pos = max(iocb->ki_pos,
                                    (loff_t) pg_index << PAGE_SHIFT);
                loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
 
                if (PageReadahead(page)) {
                        if (iocb->ki_flags & IOCB_NOIO) {
-                               for (j = i; j < pvec->nr; j++)
-                                       put_page(pvec->pages[j]);
-                               pvec->nr = i;
+                               put_page(page);
+                               pvec->nr--;
                                err = -EAGAIN;
-                               break;
+                               goto err;
                        }
                        page_cache_async_readahead(mapping, ra, filp, page,
                                        pg_index, last_index - pg_index);
@@ -2378,26 +2424,23 @@ got_pages:
 
                if (!PageUptodate(page)) {
                        if ((iocb->ki_flags & IOCB_NOWAIT) ||
-                           ((iocb->ki_flags & IOCB_WAITQ) && i)) {
-                               for (j = i; j < pvec->nr; j++)
-                                       put_page(pvec->pages[j]);
-                               pvec->nr = i;
+                           ((iocb->ki_flags & IOCB_WAITQ) && pvec->nr > 1)) {
+                               put_page(page);
+                               pvec->nr--;
                                err = -EAGAIN;
-                               break;
+                               goto err;
                        }
 
                        page = filemap_update_page(iocb, filp, iter, page,
                                        pg_pos, pg_count);
                        if (IS_ERR_OR_NULL(page)) {
-                               for (j = i + 1; j < pvec->nr; j++)
-                                       put_page(pvec->pages[j]);
-                               pvec->nr = i;
+                               pvec->nr--;
                                err = PTR_ERR_OR_ZERO(page);
-                               break;
                        }
                }
        }
 
+err:
        if (likely(pvec->nr))
                return 0;
        if (err)
@@ -2442,6 +2485,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
                return 0;
 
        iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
+       pagevec_init(&pvec);
 
        do {
                cond_resched();
@@ -2469,13 +2513,8 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
                isize = i_size_read(inode);
                if (unlikely(iocb->ki_pos >= isize))
                        goto put_pages;
-
                end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
 
-               while ((iocb->ki_pos >> PAGE_SHIFT) + pvec.nr >
-                      (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
-                       put_page(pvec.pages[--pvec.nr]);
-
                /*
                 * Once we start copying data, we don't want to be touching any
                 * cachelines that might be contended:
@@ -2489,24 +2528,32 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
                if (iocb->ki_pos >> PAGE_SHIFT !=
                    ra->prev_pos >> PAGE_SHIFT)
                        mark_page_accessed(pvec.pages[0]);
-               for (i = 1; i < pagevec_count(&pvec); i++)
-                       mark_page_accessed(pvec.pages[i]);
 
                for (i = 0; i < pagevec_count(&pvec); i++) {
-                       unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
-                       unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos,
-                                                  PAGE_SIZE - offset);
-                       unsigned int copied;
+                       struct page *page = pvec.pages[i];
+                       size_t page_size = thp_size(page);
+                       size_t offset = iocb->ki_pos & (page_size - 1);
+                       size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
+                                            page_size - offset);
+                       size_t copied;
 
+                       if (end_offset < page_offset(page))
+                               break;
+                       if (i > 0)
+                               mark_page_accessed(page);
                        /*
                         * If users can be writing to this page using arbitrary
                         * virtual addresses, take care about potential aliasing
                         * before reading the page on the kernel side.
                         */
-                       if (writably_mapped)
-                               flush_dcache_page(pvec.pages[i]);
+                       if (writably_mapped) {
+                               int j;
+
+                               for (j = 0; j < thp_nr_pages(page); j++)
+                                       flush_dcache_page(page + j);
+                       }
 
-                       copied = copy_page_to_iter(pvec.pages[i], offset, bytes, iter);
+                       copied = copy_page_to_iter(page, offset, bytes, iter);
 
                        written += copied;
                        iocb->ki_pos += copied;
@@ -2520,6 +2567,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 put_pages:
                for (i = 0; i < pagevec_count(&pvec); i++)
                        put_page(pvec.pages[i]);
+               pagevec_reinit(&pvec);
        } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
 
        file_accessed(filp);