* ->swap_lock (try_to_unmap_one)
* ->private_lock (try_to_unmap_one)
* ->i_pages lock (try_to_unmap_one)
- * ->pgdat->lru_lock (follow_page->mark_page_accessed)
- * ->pgdat->lru_lock (check_pte_range->isolate_lru_page)
+ * ->lruvec->lru_lock (follow_page->mark_page_accessed)
+ * ->lruvec->lru_lock (check_pte_range->isolate_lru_page)
* ->private_lock (page_remove_rmap->set_page_dirty)
* ->i_pages lock (page_remove_rmap->set_page_dirty)
* bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
if (PageSwapBacked(page)) {
__mod_lruvec_page_state(page, NR_SHMEM, -nr);
if (PageTransHuge(page))
- __dec_node_page_state(page, NR_SHMEM_THPS);
+ __dec_lruvec_page_state(page, NR_SHMEM_THPS);
} else if (PageTransHuge(page)) {
- __dec_node_page_state(page, NR_FILE_THPS);
+ __dec_lruvec_page_state(page, NR_FILE_THPS);
filemap_nr_thps_dec(mapping);
}
freepage(page);
if (PageTransHuge(page) && !PageHuge(page)) {
- page_ref_sub(page, HPAGE_PMD_NR);
+ page_ref_sub(page, thp_nr_pages(page));
VM_BUG_ON_PAGE(page_count(page) <= 0, page);
} else {
put_page(page);
.range_end = end,
};
- if (!mapping_cap_writeback_dirty(mapping) ||
+ if (!mapping_can_writeback(mapping) ||
!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
return 0;
}
EXPORT_SYMBOL_GPL(replace_page_cache_page);
-static int __add_to_page_cache_locked(struct page *page,
- struct address_space *mapping,
- pgoff_t offset, gfp_t gfp_mask,
- void **shadowp)
+noinline int __add_to_page_cache_locked(struct page *page,
+ struct address_space *mapping,
+ pgoff_t offset, gfp_t gfp,
+ void **shadowp)
{
XA_STATE(xas, &mapping->i_pages, offset);
int huge = PageHuge(page);
int error;
- void *old;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapBacked(page), page);
page->index = offset;
if (!huge) {
- error = mem_cgroup_charge(page, current->mm, gfp_mask);
+ error = mem_cgroup_charge(page, current->mm, gfp);
if (error)
goto error;
}
+ gfp &= GFP_RECLAIM_MASK;
+
do {
+ unsigned int order = xa_get_order(xas.xa, xas.xa_index);
+ void *entry, *old = NULL;
+
+ if (order > thp_order(page))
+ xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
+ order, gfp);
xas_lock_irq(&xas);
- old = xas_load(&xas);
- if (old && !xa_is_value(old))
- xas_set_err(&xas, -EEXIST);
+ xas_for_each_conflict(&xas, entry) {
+ old = entry;
+ if (!xa_is_value(entry)) {
+ xas_set_err(&xas, -EEXIST);
+ goto unlock;
+ }
+ }
+
+ if (old) {
+ if (shadowp)
+ *shadowp = old;
+ /* entry may have been split before we acquired lock */
+ order = xa_get_order(xas.xa, xas.xa_index);
+ if (order > thp_order(page)) {
+ xas_split(&xas, old, order);
+ xas_reset(&xas);
+ }
+ }
+
xas_store(&xas, page);
if (xas_error(&xas))
goto unlock;
- if (xa_is_value(old)) {
+ if (old)
mapping->nrexceptional--;
- if (shadowp)
- *shadowp = old;
- }
mapping->nrpages++;
/* hugetlb pages do not participate in page cache accounting */
__inc_lruvec_page_state(page, NR_FILE_PAGES);
unlock:
xas_unlock_irq(&xas);
- } while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
+ } while (xas_nomem(&xas, gfp));
if (xas_error(&xas)) {
error = xas_error(&xas);
* unlock_page - unlock a locked page
* @page: the page
*
- * Unlocks the page and wakes up sleepers in ___wait_on_page_locked().
+ * Unlocks the page and wakes up sleepers in wait_on_page_locked().
* Also wakes sleepers in wait_on_page_writeback() because the wakeup
* mechanism between PageLocked pages and PageWriteback pages is shared.
* But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
rotate_reclaimable_page(page);
}
+ /*
+ * Writeback does not hold a page reference of its own, relying
+ * on truncation to wait for the clearing of PG_writeback.
+ * But here we must make sure that the page is not freed and
+ * reused before the wake_up_page().
+ */
+ get_page(page);
if (!test_clear_page_writeback(page))
BUG();
smp_mb__after_atomic();
wake_up_page(page, PG_writeback);
+ put_page(page);
}
EXPORT_SYMBOL(end_page_writeback);
else
wait_on_page_locked(page);
return 0;
- } else {
- if (flags & FAULT_FLAG_KILLABLE) {
- int ret;
+ }
+ if (flags & FAULT_FLAG_KILLABLE) {
+ int ret;
- ret = __lock_page_killable(page);
- if (ret) {
- mmap_read_unlock(mm);
- return 0;
- }
- } else
- __lock_page(page);
- return 1;
+ ret = __lock_page_killable(page);
+ if (ret) {
+ mmap_read_unlock(mm);
+ return 0;
+ }
+ } else {
+ __lock_page(page);
}
+ return 1;
+
}
/**
/**
* find_get_entry - find and get a page cache entry
* @mapping: the address_space to search
- * @offset: the page cache index
+ * @index: The page cache index.
*
* Looks up the page cache slot at @mapping & @offset. If there is a
- * page cache page, it is returned with an increased refcount.
+ * page cache page, the head page is returned with an increased refcount.
*
* If the slot holds a shadow entry of a previously evicted page, or a
* swap entry from shmem/tmpfs, it is returned.
*
- * Return: the found page or shadow entry, %NULL if nothing is found.
+ * Return: The head page or shadow entry, %NULL if nothing is found.
*/
-struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
+struct page *find_get_entry(struct address_space *mapping, pgoff_t index)
{
- XA_STATE(xas, &mapping->i_pages, offset);
+ XA_STATE(xas, &mapping->i_pages, index);
struct page *page;
rcu_read_lock();
put_page(page);
goto repeat;
}
- page = find_subpage(page, offset);
out:
rcu_read_unlock();
}
/**
- * find_lock_entry - locate, pin and lock a page cache entry
- * @mapping: the address_space to search
- * @offset: the page cache index
+ * find_lock_entry - Locate and lock a page cache entry.
+ * @mapping: The address_space to search.
+ * @index: The page cache index.
*
- * Looks up the page cache slot at @mapping & @offset. If there is a
- * page cache page, it is returned locked and with an increased
- * refcount.
+ * Looks up the page at @mapping & @index. If there is a page in the
+ * cache, the head page is returned locked and with an increased refcount.
*
* If the slot holds a shadow entry of a previously evicted page, or a
* swap entry from shmem/tmpfs, it is returned.
*
- * find_lock_entry() may sleep.
- *
- * Return: the found page or shadow entry, %NULL if nothing is found.
+ * Context: May sleep.
+ * Return: The head page or shadow entry, %NULL if nothing is found.
*/
-struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
+struct page *find_lock_entry(struct address_space *mapping, pgoff_t index)
{
struct page *page;
repeat:
- page = find_get_entry(mapping, offset);
+ page = find_get_entry(mapping, index);
if (page && !xa_is_value(page)) {
lock_page(page);
/* Has the page been truncated? */
- if (unlikely(page_mapping(page) != mapping)) {
+ if (unlikely(page->mapping != mapping)) {
unlock_page(page);
put_page(page);
goto repeat;
}
- VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
+ VM_BUG_ON_PAGE(!thp_contains(page, index), page);
}
return page;
}
-EXPORT_SYMBOL(find_lock_entry);
/**
* pagecache_get_page - Find and get a reference to a page.
*
* * %FGP_ACCESSED - The page will be marked accessed.
* * %FGP_LOCK - The page is returned locked.
+ * * %FGP_HEAD - If the page is present and a THP, return the head page
+ * rather than the exact page specified by the index.
* * %FGP_CREAT - If no page is present then a new page is allocated using
* @gfp_mask and added to the page cache and the VM's LRU list.
* The page is returned locked and with an increased refcount.
}
/* Has the page been truncated? */
- if (unlikely(compound_head(page)->mapping != mapping)) {
+ if (unlikely(page->mapping != mapping)) {
unlock_page(page);
put_page(page);
goto repeat;
}
- VM_BUG_ON_PAGE(page->index != index, page);
+ VM_BUG_ON_PAGE(!thp_contains(page, index), page);
}
if (fgp_flags & FGP_ACCESSED)
if (page_is_idle(page))
clear_page_idle(page);
}
+ if (!(fgp_flags & FGP_HEAD))
+ page = find_subpage(page, index);
no_page:
if (!page && (fgp_flags & FGP_CREAT)) {
int err;
- if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping))
+ if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
gfp_mask |= __GFP_WRITE;
if (fgp_flags & FGP_NOFS)
gfp_mask &= ~__GFP_FS;
ra->ra_pages /= 4;
}
+static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
+{
+ if (iocb->ki_flags & IOCB_WAITQ)
+ return lock_page_async(page, iocb->ki_waitq);
+ else if (iocb->ki_flags & IOCB_NOWAIT)
+ return trylock_page(page) ? 0 : -EAGAIN;
+ else
+ return lock_page_killable(page);
+}
+
+static struct page *
+generic_file_buffered_read_readpage(struct kiocb *iocb,
+ struct file *filp,
+ struct address_space *mapping,
+ struct page *page)
+{
+ struct file_ra_state *ra = &filp->f_ra;
+ int error;
+
+ if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
+ unlock_page(page);
+ put_page(page);
+ return ERR_PTR(-EAGAIN);
+ }
+
+ /*
+ * A previous I/O error may have been due to temporary
+ * failures, eg. multipath errors.
+ * PG_error will be set again if readpage fails.
+ */
+ ClearPageError(page);
+ /* Start the actual read. The read will unlock the page. */
+ error = mapping->a_ops->readpage(filp, page);
+
+ if (unlikely(error)) {
+ put_page(page);
+ return error != AOP_TRUNCATED_PAGE ? ERR_PTR(error) : NULL;
+ }
+
+ if (!PageUptodate(page)) {
+ error = lock_page_for_iocb(iocb, page);
+ if (unlikely(error)) {
+ put_page(page);
+ return ERR_PTR(error);
+ }
+ if (!PageUptodate(page)) {
+ if (page->mapping == NULL) {
+ /*
+ * invalidate_mapping_pages got it
+ */
+ unlock_page(page);
+ put_page(page);
+ return NULL;
+ }
+ unlock_page(page);
+ shrink_readahead_size_eio(ra);
+ put_page(page);
+ return ERR_PTR(-EIO);
+ }
+ unlock_page(page);
+ }
+
+ return page;
+}
+
+static struct page *
+generic_file_buffered_read_pagenotuptodate(struct kiocb *iocb,
+ struct file *filp,
+ struct iov_iter *iter,
+ struct page *page,
+ loff_t pos, loff_t count)
+{
+ struct address_space *mapping = filp->f_mapping;
+ struct inode *inode = mapping->host;
+ int error;
+
+ /*
+ * See comment in do_read_cache_page on why
+ * wait_on_page_locked is used to avoid unnecessarily
+ * serialisations and why it's safe.
+ */
+ if (iocb->ki_flags & IOCB_WAITQ) {
+ error = wait_on_page_locked_async(page,
+ iocb->ki_waitq);
+ } else {
+ error = wait_on_page_locked_killable(page);
+ }
+ if (unlikely(error)) {
+ put_page(page);
+ return ERR_PTR(error);
+ }
+ if (PageUptodate(page))
+ return page;
+
+ if (inode->i_blkbits == PAGE_SHIFT ||
+ !mapping->a_ops->is_partially_uptodate)
+ goto page_not_up_to_date;
+ /* pipes can't handle partially uptodate pages */
+ if (unlikely(iov_iter_is_pipe(iter)))
+ goto page_not_up_to_date;
+ if (!trylock_page(page))
+ goto page_not_up_to_date;
+ /* Did it get truncated before we got the lock? */
+ if (!page->mapping)
+ goto page_not_up_to_date_locked;
+ if (!mapping->a_ops->is_partially_uptodate(page,
+ pos & ~PAGE_MASK, count))
+ goto page_not_up_to_date_locked;
+ unlock_page(page);
+ return page;
+
+page_not_up_to_date:
+ /* Get exclusive access to the page ... */
+ error = lock_page_for_iocb(iocb, page);
+ if (unlikely(error)) {
+ put_page(page);
+ return ERR_PTR(error);
+ }
+
+page_not_up_to_date_locked:
+ /* Did it get truncated before we got the lock? */
+ if (!page->mapping) {
+ unlock_page(page);
+ put_page(page);
+ return NULL;
+ }
+
+ /* Did somebody else fill it already? */
+ if (PageUptodate(page)) {
+ unlock_page(page);
+ return page;
+ }
+
+ return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
+}
+
+static struct page *
+generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ struct file *filp = iocb->ki_filp;
+ struct address_space *mapping = filp->f_mapping;
+ pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
+ struct page *page;
+ int error;
+
+ if (iocb->ki_flags & IOCB_NOIO)
+ return ERR_PTR(-EAGAIN);
+
+ /*
+ * Ok, it wasn't cached, so we need to create a new
+ * page..
+ */
+ page = page_cache_alloc(mapping);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ error = add_to_page_cache_lru(page, mapping, index,
+ mapping_gfp_constraint(mapping, GFP_KERNEL));
+ if (error) {
+ put_page(page);
+ return error != -EEXIST ? ERR_PTR(error) : NULL;
+ }
+
+ return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
+}
+
+static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
+ struct iov_iter *iter,
+ struct page **pages,
+ unsigned int nr)
+{
+ struct file *filp = iocb->ki_filp;
+ struct address_space *mapping = filp->f_mapping;
+ struct file_ra_state *ra = &filp->f_ra;
+ pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
+ pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
+ int i, j, nr_got, err = 0;
+
+ nr = min_t(unsigned long, last_index - index, nr);
+find_page:
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
+ nr_got = find_get_pages_contig(mapping, index, nr, pages);
+ if (nr_got)
+ goto got_pages;
+
+ if (iocb->ki_flags & IOCB_NOIO)
+ return -EAGAIN;
+
+ page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
+
+ nr_got = find_get_pages_contig(mapping, index, nr, pages);
+ if (nr_got)
+ goto got_pages;
+
+ pages[0] = generic_file_buffered_read_no_cached_page(iocb, iter);
+ err = PTR_ERR_OR_ZERO(pages[0]);
+ if (!IS_ERR_OR_NULL(pages[0]))
+ nr_got = 1;
+got_pages:
+ for (i = 0; i < nr_got; i++) {
+ struct page *page = pages[i];
+ pgoff_t pg_index = index + i;
+ loff_t pg_pos = max(iocb->ki_pos,
+ (loff_t) pg_index << PAGE_SHIFT);
+ loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
+
+ if (PageReadahead(page)) {
+ if (iocb->ki_flags & IOCB_NOIO) {
+ for (j = i; j < nr_got; j++)
+ put_page(pages[j]);
+ nr_got = i;
+ err = -EAGAIN;
+ break;
+ }
+ page_cache_async_readahead(mapping, ra, filp, page,
+ pg_index, last_index - pg_index);
+ }
+
+ if (!PageUptodate(page)) {
+ if ((iocb->ki_flags & IOCB_NOWAIT) ||
+ ((iocb->ki_flags & IOCB_WAITQ) && i)) {
+ for (j = i; j < nr_got; j++)
+ put_page(pages[j]);
+ nr_got = i;
+ err = -EAGAIN;
+ break;
+ }
+
+ page = generic_file_buffered_read_pagenotuptodate(iocb,
+ filp, iter, page, pg_pos, pg_count);
+ if (IS_ERR_OR_NULL(page)) {
+ for (j = i + 1; j < nr_got; j++)
+ put_page(pages[j]);
+ nr_got = i;
+ err = PTR_ERR_OR_ZERO(page);
+ break;
+ }
+ }
+ }
+
+ if (likely(nr_got))
+ return nr_got;
+ if (err)
+ return err;
+ /*
+ * No pages and no error means we raced and should retry:
+ */
+ goto find_page;
+}
+
/**
* generic_file_buffered_read - generic file read routine
* @iocb: the iocb to read
struct iov_iter *iter, ssize_t written)
{
struct file *filp = iocb->ki_filp;
+ struct file_ra_state *ra = &filp->f_ra;
struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
- struct file_ra_state *ra = &filp->f_ra;
- loff_t *ppos = &iocb->ki_pos;
- pgoff_t index;
- pgoff_t last_index;
- pgoff_t prev_index;
- unsigned long offset; /* offset into pagecache page */
- unsigned int prev_offset;
- int error = 0;
-
- if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
+ struct page *pages_onstack[PAGEVEC_SIZE], **pages = NULL;
+ unsigned int nr_pages = min_t(unsigned int, 512,
+ ((iocb->ki_pos + iter->count + PAGE_SIZE - 1) >> PAGE_SHIFT) -
+ (iocb->ki_pos >> PAGE_SHIFT));
+ int i, pg_nr, error = 0;
+ bool writably_mapped;
+ loff_t isize, end_offset;
+
+ if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
return 0;
iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
- index = *ppos >> PAGE_SHIFT;
- prev_index = ra->prev_pos >> PAGE_SHIFT;
- prev_offset = ra->prev_pos & (PAGE_SIZE-1);
- last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
- offset = *ppos & ~PAGE_MASK;
+ if (nr_pages > ARRAY_SIZE(pages_onstack))
+ pages = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
- for (;;) {
- struct page *page;
- pgoff_t end_index;
- loff_t isize;
- unsigned long nr, ret;
+ if (!pages) {
+ pages = pages_onstack;
+ nr_pages = min_t(unsigned int, nr_pages, ARRAY_SIZE(pages_onstack));
+ }
+ do {
cond_resched();
-find_page:
- if (fatal_signal_pending(current)) {
- error = -EINTR;
- goto out;
- }
- page = find_get_page(mapping, index);
- if (!page) {
- if (iocb->ki_flags & IOCB_NOIO)
- goto would_block;
- page_cache_sync_readahead(mapping,
- ra, filp,
- index, last_index - index);
- page = find_get_page(mapping, index);
- if (unlikely(page == NULL))
- goto no_cached_page;
- }
- if (PageReadahead(page)) {
- if (iocb->ki_flags & IOCB_NOIO) {
- put_page(page);
- goto out;
- }
- page_cache_async_readahead(mapping,
- ra, filp, page,
- index, last_index - index);
- }
- if (!PageUptodate(page)) {
- /*
- * See comment in do_read_cache_page on why
- * wait_on_page_locked is used to avoid unnecessarily
- * serialisations and why it's safe.
- */
- if (iocb->ki_flags & IOCB_WAITQ) {
- if (written) {
- put_page(page);
- goto out;
- }
- error = wait_on_page_locked_async(page,
- iocb->ki_waitq);
- } else {
- if (iocb->ki_flags & IOCB_NOWAIT) {
- put_page(page);
- goto would_block;
- }
- error = wait_on_page_locked_killable(page);
- }
- if (unlikely(error))
- goto readpage_error;
- if (PageUptodate(page))
- goto page_ok;
-
- if (inode->i_blkbits == PAGE_SHIFT ||
- !mapping->a_ops->is_partially_uptodate)
- goto page_not_up_to_date;
- /* pipes can't handle partially uptodate pages */
- if (unlikely(iov_iter_is_pipe(iter)))
- goto page_not_up_to_date;
- if (!trylock_page(page))
- goto page_not_up_to_date;
- /* Did it get truncated before we got the lock? */
- if (!page->mapping)
- goto page_not_up_to_date_locked;
- if (!mapping->a_ops->is_partially_uptodate(page,
- offset, iter->count))
- goto page_not_up_to_date_locked;
- unlock_page(page);
+ /*
+ * If we've already successfully copied some data, then we
+ * can no longer safely return -EIOCBQUEUED. Hence mark
+ * an async read NOWAIT at that point.
+ */
+ if ((iocb->ki_flags & IOCB_WAITQ) && written)
+ iocb->ki_flags |= IOCB_NOWAIT;
+
+ i = 0;
+ pg_nr = generic_file_buffered_read_get_pages(iocb, iter,
+ pages, nr_pages);
+ if (pg_nr < 0) {
+ error = pg_nr;
+ break;
}
-page_ok:
+
/*
- * i_size must be checked after we know the page is Uptodate.
+ * i_size must be checked after we know the pages are Uptodate.
*
* Checking i_size after the check allows us to calculate
* the correct value for "nr", which means the zero-filled
* part of the page is not copied back to userspace (unless
* another truncate extends the file - this is desired though).
*/
-
isize = i_size_read(inode);
- end_index = (isize - 1) >> PAGE_SHIFT;
- if (unlikely(!isize || index > end_index)) {
- put_page(page);
- goto out;
- }
+ if (unlikely(iocb->ki_pos >= isize))
+ goto put_pages;
- /* nr is the maximum number of bytes to copy from this page */
- nr = PAGE_SIZE;
- if (index == end_index) {
- nr = ((isize - 1) & ~PAGE_MASK) + 1;
- if (nr <= offset) {
- put_page(page);
- goto out;
- }
- }
- nr = nr - offset;
+ end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
- /* If users can be writing to this page using arbitrary
- * virtual addresses, take care about potential aliasing
- * before reading the page on the kernel side.
- */
- if (mapping_writably_mapped(mapping))
- flush_dcache_page(page);
+ while ((iocb->ki_pos >> PAGE_SHIFT) + pg_nr >
+ (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
+ put_page(pages[--pg_nr]);
/*
- * When a sequential read accesses a page several times,
- * only mark it as accessed the first time.
+ * Once we start copying data, we don't want to be touching any
+ * cachelines that might be contended:
*/
- if (prev_index != index || offset != prev_offset)
- mark_page_accessed(page);
- prev_index = index;
+ writably_mapped = mapping_writably_mapped(mapping);
/*
- * Ok, we have the page, and it's up-to-date, so
- * now we can copy it to user space...
+ * When a sequential read accesses a page several times, only
+ * mark it as accessed the first time.
*/
+ if (iocb->ki_pos >> PAGE_SHIFT !=
+ ra->prev_pos >> PAGE_SHIFT)
+ mark_page_accessed(pages[0]);
+ for (i = 1; i < pg_nr; i++)
+ mark_page_accessed(pages[i]);
+
+ for (i = 0; i < pg_nr; i++) {
+ unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
+ unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos,
+ PAGE_SIZE - offset);
+ unsigned int copied;
- ret = copy_page_to_iter(page, offset, nr, iter);
- offset += ret;
- index += offset >> PAGE_SHIFT;
- offset &= ~PAGE_MASK;
- prev_offset = offset;
-
- put_page(page);
- written += ret;
- if (!iov_iter_count(iter))
- goto out;
- if (ret < nr) {
- error = -EFAULT;
- goto out;
- }
- continue;
-
-page_not_up_to_date:
- /* Get exclusive access to the page ... */
- if (iocb->ki_flags & IOCB_WAITQ)
- error = lock_page_async(page, iocb->ki_waitq);
- else
- error = lock_page_killable(page);
- if (unlikely(error))
- goto readpage_error;
-
-page_not_up_to_date_locked:
- /* Did it get truncated before we got the lock? */
- if (!page->mapping) {
- unlock_page(page);
- put_page(page);
- continue;
- }
+ /*
+ * If users can be writing to this page using arbitrary
+ * virtual addresses, take care about potential aliasing
+ * before reading the page on the kernel side.
+ */
+ if (writably_mapped)
+ flush_dcache_page(pages[i]);
- /* Did somebody else fill it already? */
- if (PageUptodate(page)) {
- unlock_page(page);
- goto page_ok;
- }
+ copied = copy_page_to_iter(pages[i], offset, bytes, iter);
-readpage:
- if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
- unlock_page(page);
- put_page(page);
- goto would_block;
- }
- /*
- * A previous I/O error may have been due to temporary
- * failures, eg. multipath errors.
- * PG_error will be set again if readpage fails.
- */
- ClearPageError(page);
- /* Start the actual read. The read will unlock the page. */
- error = mapping->a_ops->readpage(filp, page);
+ written += copied;
+ iocb->ki_pos += copied;
+ ra->prev_pos = iocb->ki_pos;
- if (unlikely(error)) {
- if (error == AOP_TRUNCATED_PAGE) {
- put_page(page);
- error = 0;
- goto find_page;
- }
- goto readpage_error;
- }
-
- if (!PageUptodate(page)) {
- if (iocb->ki_flags & IOCB_WAITQ)
- error = lock_page_async(page, iocb->ki_waitq);
- else
- error = lock_page_killable(page);
-
- if (unlikely(error))
- goto readpage_error;
- if (!PageUptodate(page)) {
- if (page->mapping == NULL) {
- /*
- * invalidate_mapping_pages got it
- */
- unlock_page(page);
- put_page(page);
- goto find_page;
- }
- unlock_page(page);
- shrink_readahead_size_eio(ra);
- error = -EIO;
- goto readpage_error;
+ if (copied < bytes) {
+ error = -EFAULT;
+ break;
}
- unlock_page(page);
}
+put_pages:
+ for (i = 0; i < pg_nr; i++)
+ put_page(pages[i]);
+ } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
- goto page_ok;
-
-readpage_error:
- /* UHHUH! A synchronous read error occurred. Report it */
- put_page(page);
- goto out;
-
-no_cached_page:
- /*
- * Ok, it wasn't cached, so we need to create a new
- * page..
- */
- page = page_cache_alloc(mapping);
- if (!page) {
- error = -ENOMEM;
- goto out;
- }
- error = add_to_page_cache_lru(page, mapping, index,
- mapping_gfp_constraint(mapping, GFP_KERNEL));
- if (error) {
- put_page(page);
- if (error == -EEXIST) {
- error = 0;
- goto find_page;
- }
- goto out;
- }
- goto readpage;
- }
+ file_accessed(filp);
-would_block:
- error = -EAGAIN;
-out:
- ra->prev_pos = prev_index;
- ra->prev_pos <<= PAGE_SHIFT;
- ra->prev_pos |= prev_offset;
+ if (pages != pages_onstack)
+ kfree(pages);
- *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
- file_accessed(filp);
return written ? written : error;
}
EXPORT_SYMBOL_GPL(generic_file_buffered_read);
struct file *file = vmf->vma->vm_file;
struct file_ra_state *ra = &file->f_ra;
struct address_space *mapping = file->f_mapping;
+ DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff);
struct file *fpin = NULL;
- pgoff_t offset = vmf->pgoff;
unsigned int mmap_miss;
/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_SEQ_READ) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- page_cache_sync_readahead(mapping, ra, file, offset,
- ra->ra_pages);
+ page_cache_sync_ra(&ractl, ra, ra->ra_pages);
return fpin;
}
* mmap read-around
*/
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
+ ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2);
ra->size = ra->ra_pages;
ra->async_size = ra->ra_pages / 4;
- ra_submit(ra, mapping, file);
+ ractl._index = ra->start;
+ do_page_cache_ra(&ractl, ra->size, ra->async_size);
return fpin;
}
pgoff_t last_pgoff = start_pgoff;
unsigned long max_idx;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
- struct page *page;
+ struct page *head, *page;
unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
rcu_read_lock();
- xas_for_each(&xas, page, end_pgoff) {
- if (xas_retry(&xas, page))
+ xas_for_each(&xas, head, end_pgoff) {
+ if (xas_retry(&xas, head))
continue;
- if (xa_is_value(page))
+ if (xa_is_value(head))
goto next;
/*
* Check for a locked page first, as a speculative
* reference may adversely influence page migration.
*/
- if (PageLocked(page))
+ if (PageLocked(head))
goto next;
- if (!page_cache_get_speculative(page))
+ if (!page_cache_get_speculative(head))
goto next;
/* Has the page moved or been split? */
- if (unlikely(page != xas_reload(&xas)))
+ if (unlikely(head != xas_reload(&xas)))
goto skip;
- page = find_subpage(page, xas.xa_index);
+ page = find_subpage(head, xas.xa_index);
- if (!PageUptodate(page) ||
+ if (!PageUptodate(head) ||
PageReadahead(page) ||
PageHWPoison(page))
goto skip;
- if (!trylock_page(page))
+ if (!trylock_page(head))
goto skip;
- if (page->mapping != mapping || !PageUptodate(page))
+ if (head->mapping != mapping || !PageUptodate(head))
goto unlock;
max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
- if (page->index >= max_idx)
+ if (xas.xa_index >= max_idx)
goto unlock;
if (mmap_miss > 0)
last_pgoff = xas.xa_index;
if (alloc_set_pte(vmf, page))
goto unlock;
- unlock_page(page);
+ unlock_page(head);
goto next;
unlock:
- unlock_page(page);
+ unlock_page(head);
skip:
- put_page(page);
+ put_page(head);
next:
/* Huge page is mapped? No need to proceed. */
if (pmd_trans_huge(*vmf->pmd))
goto out;
/*
- * Page is not up to date and may be locked due one of the following
+ * Page is not up to date and may be locked due to one of the following
* case a: Page is being filled and the page lock is held
* case b: Read/write error clearing the page uptodate status
* case c: Truncation in progress (page locked)
}
EXPORT_SYMBOL(read_cache_page_gfp);
-/*
- * Don't operate on ranges the page cache doesn't support, and don't exceed the
- * LFS limits. If pos is under the limit it becomes a short access. If it
- * exceeds the limit we return -EFBIG.
- */
-static int generic_write_check_limits(struct file *file, loff_t pos,
- loff_t *count)
-{
- struct inode *inode = file->f_mapping->host;
- loff_t max_size = inode->i_sb->s_maxbytes;
- loff_t limit = rlimit(RLIMIT_FSIZE);
-
- if (limit != RLIM_INFINITY) {
- if (pos >= limit) {
- send_sig(SIGXFSZ, current, 0);
- return -EFBIG;
- }
- *count = min(*count, limit - pos);
- }
-
- if (!(file->f_flags & O_LARGEFILE))
- max_size = MAX_NON_LFS;
-
- if (unlikely(pos >= max_size))
- return -EFBIG;
-
- *count = min(*count, max_size - pos);
-
- return 0;
-}
-
-/*
- * Performs necessary checks before doing a write
- *
- * Can adjust writing position or amount of bytes to write.
- * Returns appropriate error code that caller should return or
- * zero in case that write should be allowed.
- */
-inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- loff_t count;
- int ret;
-
- if (IS_SWAPFILE(inode))
- return -ETXTBSY;
-
- if (!iov_iter_count(from))
- return 0;
-
- /* FIXME: this is for backwards compatibility with 2.4 */
- if (iocb->ki_flags & IOCB_APPEND)
- iocb->ki_pos = i_size_read(inode);
-
- if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
- return -EINVAL;
-
- count = iov_iter_count(from);
- ret = generic_write_check_limits(file, iocb->ki_pos, &count);
- if (ret)
- return ret;
-
- iov_iter_truncate(from, count);
- return iov_iter_count(from);
-}
-EXPORT_SYMBOL(generic_write_checks);
-
-/*
- * Performs necessary checks before doing a clone.
- *
- * Can adjust amount of bytes to clone via @req_count argument.
- * Returns appropriate error code that caller should return or
- * zero in case the clone should be allowed.
- */
-int generic_remap_checks(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t *req_count, unsigned int remap_flags)
-{
- struct inode *inode_in = file_in->f_mapping->host;
- struct inode *inode_out = file_out->f_mapping->host;
- uint64_t count = *req_count;
- uint64_t bcount;
- loff_t size_in, size_out;
- loff_t bs = inode_out->i_sb->s_blocksize;
- int ret;
-
- /* The start of both ranges must be aligned to an fs block. */
- if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
- return -EINVAL;
-
- /* Ensure offsets don't wrap. */
- if (pos_in + count < pos_in || pos_out + count < pos_out)
- return -EINVAL;
-
- size_in = i_size_read(inode_in);
- size_out = i_size_read(inode_out);
-
- /* Dedupe requires both ranges to be within EOF. */
- if ((remap_flags & REMAP_FILE_DEDUP) &&
- (pos_in >= size_in || pos_in + count > size_in ||
- pos_out >= size_out || pos_out + count > size_out))
- return -EINVAL;
-
- /* Ensure the infile range is within the infile. */
- if (pos_in >= size_in)
- return -EINVAL;
- count = min(count, size_in - (uint64_t)pos_in);
-
- ret = generic_write_check_limits(file_out, pos_out, &count);
- if (ret)
- return ret;
-
- /*
- * If the user wanted us to link to the infile's EOF, round up to the
- * next block boundary for this check.
- *
- * Otherwise, make sure the count is also block-aligned, having
- * already confirmed the starting offsets' block alignment.
- */
- if (pos_in + count == size_in) {
- bcount = ALIGN(size_in, bs) - pos_in;
- } else {
- if (!IS_ALIGNED(count, bs))
- count = ALIGN_DOWN(count, bs);
- bcount = count;
- }
-
- /* Don't allow overlapped cloning within the same file. */
- if (inode_in == inode_out &&
- pos_out + bcount > pos_in &&
- pos_out < pos_in + bcount)
- return -EINVAL;
-
- /*
- * We shortened the request but the caller can't deal with that, so
- * bounce the request back to userspace.
- */
- if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
- return -EINVAL;
-
- *req_count = count;
- return 0;
-}
-
-
-/*
- * Performs common checks before doing a file copy/clone
- * from @file_in to @file_out.
- */
-int generic_file_rw_checks(struct file *file_in, struct file *file_out)
-{
- struct inode *inode_in = file_inode(file_in);
- struct inode *inode_out = file_inode(file_out);
-
- /* Don't copy dirs, pipes, sockets... */
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- return -EISDIR;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- return -EINVAL;
-
- if (!(file_in->f_mode & FMODE_READ) ||
- !(file_out->f_mode & FMODE_WRITE) ||
- (file_out->f_flags & O_APPEND))
- return -EBADF;
-
- return 0;
-}
-
-/*
- * Performs necessary checks before doing a file copy
- *
- * Can adjust amount of bytes to copy via @req_count argument.
- * Returns appropriate error code that caller should return or
- * zero in case the copy should be allowed.
- */
-int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t *req_count, unsigned int flags)
-{
- struct inode *inode_in = file_inode(file_in);
- struct inode *inode_out = file_inode(file_out);
- uint64_t count = *req_count;
- loff_t size_in;
- int ret;
-
- ret = generic_file_rw_checks(file_in, file_out);
- if (ret)
- return ret;
-
- /* Don't touch certain kinds of inodes */
- if (IS_IMMUTABLE(inode_out))
- return -EPERM;
-
- if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
- return -ETXTBSY;
-
- /* Ensure offsets don't wrap. */
- if (pos_in + count < pos_in || pos_out + count < pos_out)
- return -EOVERFLOW;
-
- /* Shorten the copy to EOF */
- size_in = i_size_read(inode_in);
- if (pos_in >= size_in)
- count = 0;
- else
- count = min(count, size_in - (uint64_t)pos_in);
-
- ret = generic_write_check_limits(file_out, pos_out, &count);
- if (ret)
- return ret;
-
- /* Don't allow overlapped copying within the same file. */
- if (inode_in == inode_out &&
- pos_out + count > pos_in &&
- pos_out < pos_in + count)
- return -EINVAL;
-
- *req_count = count;
- return 0;
-}
-
int pagecache_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)