Merge tag 'kvmarm-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmar...
[linux-2.6-microblaze.git] / mm / filemap.c
index 6ce832d..d1458ec 100644 (file)
@@ -142,17 +142,6 @@ static void page_cache_delete(struct address_space *mapping,
 
        page->mapping = NULL;
        /* Leave page->index set: truncation lookup relies upon it */
-
-       if (shadow) {
-               mapping->nrexceptional += nr;
-               /*
-                * Make sure the nrexceptional update is committed before
-                * the nrpages update so that final truncate racing
-                * with reclaim does not see both counters 0 at the
-                * same time and miss a shadow entry.
-                */
-               smp_wmb();
-       }
        mapping->nrpages -= nr;
 }
 
@@ -629,12 +618,52 @@ EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
 /* Returns true if writeback might be needed or already in progress. */
 static bool mapping_needs_writeback(struct address_space *mapping)
 {
-       if (dax_mapping(mapping))
-               return mapping->nrexceptional;
-
        return mapping->nrpages;
 }
 
+/**
+ * filemap_range_needs_writeback - check if range potentially needs writeback
+ * @mapping:           address space within which to check
+ * @start_byte:        offset in bytes where the range starts
+ * @end_byte:          offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback. Used by O_DIRECT
+ * read/write with IOCB_NOWAIT, to see if the caller needs to do
+ * filemap_write_and_wait_range() before proceeding.
+ *
+ * Return: %true if the caller should do filemap_write_and_wait_range() before
+ * doing O_DIRECT to a page in this range, %false otherwise.
+ */
+bool filemap_range_needs_writeback(struct address_space *mapping,
+                                  loff_t start_byte, loff_t end_byte)
+{
+       XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
+       pgoff_t max = end_byte >> PAGE_SHIFT;
+       struct page *page;
+
+       if (!mapping_needs_writeback(mapping))
+               return false;
+       if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
+           !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+               return false;
+       if (end_byte < start_byte)
+               return false;
+
+       rcu_read_lock();
+       xas_for_each(&xas, page, max) {
+               if (xas_retry(&xas, page))
+                       continue;
+               if (xa_is_value(page))
+                       continue;
+               if (PageDirty(page) || PageLocked(page) || PageWriteback(page))
+                       break;
+       }
+       rcu_read_unlock();
+       return page != NULL;
+}
+EXPORT_SYMBOL_GPL(filemap_range_needs_writeback);
+
 /**
  * filemap_write_and_wait_range - write out & wait on a file range
  * @mapping:   the address_space for the pages
@@ -843,7 +872,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
        page->index = offset;
 
        if (!huge) {
-               error = mem_cgroup_charge(page, current->mm, gfp);
+               error = mem_cgroup_charge(page, NULL, gfp);
                if (error)
                        goto error;
                charged = true;
@@ -882,8 +911,6 @@ noinline int __add_to_page_cache_locked(struct page *page,
                if (xas_error(&xas))
                        goto unlock;
 
-               if (old)
-                       mapping->nrexceptional--;
                mapping->nrpages++;
 
                /* hugetlb pages do not participate in page cache accounting */
@@ -1432,6 +1459,67 @@ void unlock_page(struct page *page)
 }
 EXPORT_SYMBOL(unlock_page);
 
+/**
+ * end_page_private_2 - Clear PG_private_2 and release any waiters
+ * @page: The page
+ *
+ * Clear the PG_private_2 bit on a page and wake up any sleepers waiting for
+ * this.  The page ref held for PG_private_2 being set is released.
+ *
+ * This is, for example, used when a netfs page is being written to a local
+ * disk cache, thereby allowing writes to the cache for the same page to be
+ * serialised.
+ */
+void end_page_private_2(struct page *page)
+{
+       page = compound_head(page);
+       VM_BUG_ON_PAGE(!PagePrivate2(page), page);
+       clear_bit_unlock(PG_private_2, &page->flags);
+       wake_up_page_bit(page, PG_private_2);
+       put_page(page);
+}
+EXPORT_SYMBOL(end_page_private_2);
+
+/**
+ * wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page
+ * @page: The page to wait on
+ *
+ * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page.
+ */
+void wait_on_page_private_2(struct page *page)
+{
+       page = compound_head(page);
+       while (PagePrivate2(page))
+               wait_on_page_bit(page, PG_private_2);
+}
+EXPORT_SYMBOL(wait_on_page_private_2);
+
+/**
+ * wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page
+ * @page: The page to wait on
+ *
+ * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a
+ * fatal signal is received by the calling task.
+ *
+ * Return:
+ * - 0 if successful.
+ * - -EINTR if a fatal signal was encountered.
+ */
+int wait_on_page_private_2_killable(struct page *page)
+{
+       int ret = 0;
+
+       page = compound_head(page);
+       while (PagePrivate2(page)) {
+               ret = wait_on_page_bit_killable(page, PG_private_2);
+               if (ret < 0)
+                       break;
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(wait_on_page_private_2_killable);
+
 /**
  * end_page_writeback - end writeback against a page
  * @page: the page
@@ -1663,7 +1751,7 @@ EXPORT_SYMBOL(page_cache_prev_miss);
  * @mapping: the address_space to search
  * @index: The page cache index.
  *
- * Looks up the page cache slot at @mapping & @offset.  If there is a
+ * Looks up the page cache slot at @mapping & @index.  If there is a
  * page cache page, the head page is returned with an increased refcount.
  *
  * If the slot holds a shadow entry of a previously evicted page, or a
@@ -2244,8 +2332,6 @@ static int filemap_read_page(struct file *file, struct address_space *mapping,
                return error;
        if (PageUptodate(page))
                return 0;
-       if (!page->mapping)     /* page truncated */
-               return AOP_TRUNCATED_PAGE;
        shrink_readahead_size_eio(&file->f_ra);
        return -EIO;
 }
@@ -2577,8 +2663,8 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 
                size = i_size_read(inode);
                if (iocb->ki_flags & IOCB_NOWAIT) {
-                       if (filemap_range_has_page(mapping, iocb->ki_pos,
-                                                  iocb->ki_pos + count - 1))
+                       if (filemap_range_needs_writeback(mapping, iocb->ki_pos,
+                                               iocb->ki_pos + count - 1))
                                return -EAGAIN;
                } else {
                        retval = filemap_write_and_wait_range(mapping,
@@ -2669,7 +2755,7 @@ unsigned int seek_page_size(struct xa_state *xas, struct page *page)
  * entirely memory-based such as tmpfs, and filesystems which support
  * unwritten extents.
  *
- * Return: The requested offset on successs, or -ENXIO if @whence specifies
+ * Return: The requested offset on success, or -ENXIO if @whence specifies
  * SEEK_DATA and there is no data after @start.  There is an implicit hole
  * after @end - 1, so SEEK_HOLE returns @end if all the bytes between @start
  * and @end contain data.
@@ -2778,7 +2864,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
        struct file *file = vmf->vma->vm_file;
        struct file_ra_state *ra = &file->f_ra;
        struct address_space *mapping = file->f_mapping;
-       DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff);
+       DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff);
        struct file *fpin = NULL;
        unsigned int mmap_miss;
 
@@ -2790,7 +2876,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
 
        if (vmf->vma->vm_flags & VM_SEQ_READ) {
                fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-               page_cache_sync_ra(&ractl, ra, ra->ra_pages);
+               page_cache_sync_ra(&ractl, ra->ra_pages);
                return fpin;
        }
 
@@ -2876,7 +2962,6 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
        struct file *file = vmf->vma->vm_file;
        struct file *fpin = NULL;
        struct address_space *mapping = file->f_mapping;
-       struct file_ra_state *ra = &file->f_ra;
        struct inode *inode = mapping->host;
        pgoff_t offset = vmf->pgoff;
        pgoff_t max_off;
@@ -2963,14 +3048,8 @@ page_not_uptodate:
         * because there really aren't any performance issues here
         * and we need to check for errors.
         */
-       ClearPageError(page);
        fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-       error = mapping->a_ops->readpage(file, page);
-       if (!error) {
-               wait_on_page_locked(page);
-               if (!PageUptodate(page))
-                       error = -EIO;
-       }
+       error = filemap_read_page(file, mapping, page);
        if (fpin)
                goto out_retry;
        put_page(page);
@@ -2978,7 +3057,6 @@ page_not_uptodate:
        if (!error || error == AOP_TRUNCATED_PAGE)
                goto retry_find;
 
-       shrink_readahead_size_eio(ra);
        return VM_FAULT_SIGBUS;
 
 out_retry:
@@ -3189,7 +3267,7 @@ const struct vm_operations_struct generic_file_vm_ops = {
 
 /* This is used for a general mmap of a disk file */
 
-int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
+int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct address_space *mapping = file->f_mapping;
 
@@ -3214,11 +3292,11 @@ vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
 {
        return VM_FAULT_SIGBUS;
 }
-int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
+int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
        return -ENOSYS;
 }
-int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
+int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
 {
        return -ENOSYS;
 }
@@ -3564,10 +3642,6 @@ again:
                 * Otherwise there's a nasty deadlock on copying from the
                 * same page as we're writing to, without it being marked
                 * up-to-date.
-                *
-                * Not only is this an optimisation, but it is also required
-                * to check that the address is actually valid, when atomic
-                * usercopies are used, below.
                 */
                if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
                        status = -EFAULT;
@@ -3587,33 +3661,31 @@ again:
                if (mapping_writably_mapped(mapping))
                        flush_dcache_page(page);
 
-               copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
+               copied = copy_page_from_iter_atomic(page, offset, bytes, i);
                flush_dcache_page(page);
 
                status = a_ops->write_end(file, mapping, pos, bytes, copied,
                                                page, fsdata);
-               if (unlikely(status < 0))
-                       break;
-               copied = status;
-
+               if (unlikely(status != copied)) {
+                       iov_iter_revert(i, copied - max(status, 0L));
+                       if (unlikely(status < 0))
+                               break;
+               }
                cond_resched();
 
-               iov_iter_advance(i, copied);
-               if (unlikely(copied == 0)) {
+               if (unlikely(status == 0)) {
                        /*
-                        * If we were unable to copy any data at all, we must
-                        * fall back to a single segment length write.
-                        *
-                        * If we didn't fallback here, we could livelock
-                        * because not all segments in the iov can be copied at
-                        * once without a pagefault.
+                        * A short copy made ->write_end() reject the
+                        * thing entirely.  Might be memory poisoning
+                        * halfway through, might be a race with munmap,
+                        * might be severe memory pressure.
                         */
-                       bytes = min_t(unsigned long, PAGE_SIZE - offset,
-                                               iov_iter_single_seg_count(i));
+                       if (copied)
+                               bytes = copied;
                        goto again;
                }
-               pos += copied;
-               written += copied;
+               pos += status;
+               written += status;
 
                balance_dirty_pages_ratelimited(mapping);
        } while (iov_iter_count(i));
@@ -3646,7 +3718,7 @@ EXPORT_SYMBOL(generic_perform_write);
 ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
-       struct address_space * mapping = file->f_mapping;
+       struct address_space *mapping = file->f_mapping;
        struct inode    *inode = mapping->host;
        ssize_t         written = 0;
        ssize_t         err;