Merge tag 'kvmarm-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmar...

[linux-2.6-microblaze.git] / mm / filemap.c
diff --git a/mm/filemap.c b/mm/filemap.c

index 6ce832d..d1458ec 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -142,17 +142,6 @@ static void page_cache_delete(struct address_space *mapping,
  
         page->mapping = NULL;
         /* Leave page->index set: truncation lookup relies upon it */
-
-       if (shadow) {
-               mapping->nrexceptional += nr;
-               /*
-                * Make sure the nrexceptional update is committed before
-                * the nrpages update so that final truncate racing
-                * with reclaim does not see both counters 0 at the
-                * same time and miss a shadow entry.
-                */
-               smp_wmb();
-       }
         mapping->nrpages -= nr;
  }
  
@@ -629,12 +618,52 @@ EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
  /* Returns true if writeback might be needed or already in progress. */
  static bool mapping_needs_writeback(struct address_space *mapping)
  {
-       if (dax_mapping(mapping))
-               return mapping->nrexceptional;
-
         return mapping->nrpages;
  }
  
+/**
+ * filemap_range_needs_writeback - check if range potentially needs writeback
+ * @mapping:           address space within which to check
+ * @start_byte:        offset in bytes where the range starts
+ * @end_byte:          offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback. Used by O_DIRECT
+ * read/write with IOCB_NOWAIT, to see if the caller needs to do
+ * filemap_write_and_wait_range() before proceeding.
+ *
+ * Return: %true if the caller should do filemap_write_and_wait_range() before
+ * doing O_DIRECT to a page in this range, %false otherwise.
+ */
+bool filemap_range_needs_writeback(struct address_space *mapping,
+                                  loff_t start_byte, loff_t end_byte)
+{
+       XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
+       pgoff_t max = end_byte >> PAGE_SHIFT;
+       struct page *page;
+
+       if (!mapping_needs_writeback(mapping))
+               return false;
+       if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
+           !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+               return false;
+       if (end_byte < start_byte)
+               return false;
+
+       rcu_read_lock();
+       xas_for_each(&xas, page, max) {
+               if (xas_retry(&xas, page))
+                       continue;
+               if (xa_is_value(page))
+                       continue;
+               if (PageDirty(page) || PageLocked(page) || PageWriteback(page))
+                       break;
+       }
+       rcu_read_unlock();
+       return page != NULL;
+}
+EXPORT_SYMBOL_GPL(filemap_range_needs_writeback);
+
  /**
   * filemap_write_and_wait_range - write out & wait on a file range
   * @mapping:   the address_space for the pages
@@ -843,7 +872,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
         page->index = offset;
  
         if (!huge) {
-               error = mem_cgroup_charge(page, current->mm, gfp);
+               error = mem_cgroup_charge(page, NULL, gfp);
                 if (error)
                         goto error;
                 charged = true;
@@ -882,8 +911,6 @@ noinline int __add_to_page_cache_locked(struct page *page,
                 if (xas_error(&xas))
                         goto unlock;
  
-               if (old)
-                       mapping->nrexceptional--;
                 mapping->nrpages++;
  
                 /* hugetlb pages do not participate in page cache accounting */
@@ -1432,6 +1459,67 @@ void unlock_page(struct page *page)
  }
  EXPORT_SYMBOL(unlock_page);
  
+/**
+ * end_page_private_2 - Clear PG_private_2 and release any waiters
+ * @page: The page
+ *
+ * Clear the PG_private_2 bit on a page and wake up any sleepers waiting for
+ * this.  The page ref held for PG_private_2 being set is released.
+ *
+ * This is, for example, used when a netfs page is being written to a local
+ * disk cache, thereby allowing writes to the cache for the same page to be
+ * serialised.
+ */
+void end_page_private_2(struct page *page)
+{
+       page = compound_head(page);
+       VM_BUG_ON_PAGE(!PagePrivate2(page), page);
+       clear_bit_unlock(PG_private_2, &page->flags);
+       wake_up_page_bit(page, PG_private_2);
+       put_page(page);
+}
+EXPORT_SYMBOL(end_page_private_2);
+
+/**
+ * wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page
+ * @page: The page to wait on
+ *
+ * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page.
+ */
+void wait_on_page_private_2(struct page *page)
+{
+       page = compound_head(page);
+       while (PagePrivate2(page))
+               wait_on_page_bit(page, PG_private_2);
+}
+EXPORT_SYMBOL(wait_on_page_private_2);
+
+/**
+ * wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page
+ * @page: The page to wait on
+ *
+ * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a
+ * fatal signal is received by the calling task.
+ *
+ * Return:
+ * - 0 if successful.
+ * - -EINTR if a fatal signal was encountered.
+ */
+int wait_on_page_private_2_killable(struct page *page)
+{
+       int ret = 0;
+
+       page = compound_head(page);
+       while (PagePrivate2(page)) {
+               ret = wait_on_page_bit_killable(page, PG_private_2);
+               if (ret < 0)
+                       break;
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(wait_on_page_private_2_killable);
+
  /**
   * end_page_writeback - end writeback against a page
   * @page: the page
@@ -1663,7 +1751,7 @@ EXPORT_SYMBOL(page_cache_prev_miss);
   * @mapping: the address_space to search
   * @index: The page cache index.
   *
- * Looks up the page cache slot at @mapping & @offset.  If there is a
+ * Looks up the page cache slot at @mapping & @index.  If there is a
   * page cache page, the head page is returned with an increased refcount.
   *
   * If the slot holds a shadow entry of a previously evicted page, or a
@@ -2244,8 +2332,6 @@ static int filemap_read_page(struct file *file, struct address_space *mapping,
                 return error;
         if (PageUptodate(page))
                 return 0;
-       if (!page->mapping)     /* page truncated */
-               return AOP_TRUNCATED_PAGE;
         shrink_readahead_size_eio(&file->f_ra);
         return -EIO;
  }
@@ -2577,8 +2663,8 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
  
                 size = i_size_read(inode);
                 if (iocb->ki_flags & IOCB_NOWAIT) {
-                       if (filemap_range_has_page(mapping, iocb->ki_pos,
-                                                  iocb->ki_pos + count - 1))
+                       if (filemap_range_needs_writeback(mapping, iocb->ki_pos,
+                                               iocb->ki_pos + count - 1))
                                 return -EAGAIN;
                 } else {
                         retval = filemap_write_and_wait_range(mapping,
@@ -2669,7 +2755,7 @@ unsigned int seek_page_size(struct xa_state *xas, struct page *page)
   * entirely memory-based such as tmpfs, and filesystems which support
   * unwritten extents.
   *
- * Return: The requested offset on successs, or -ENXIO if @whence specifies
+ * Return: The requested offset on success, or -ENXIO if @whence specifies
   * SEEK_DATA and there is no data after @start.  There is an implicit hole
   * after @end - 1, so SEEK_HOLE returns @end if all the bytes between @start
   * and @end contain data.
@@ -2778,7 +2864,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
         struct file *file = vmf->vma->vm_file;
         struct file_ra_state *ra = &file->f_ra;
         struct address_space *mapping = file->f_mapping;
-       DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff);
+       DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff);
         struct file *fpin = NULL;
         unsigned int mmap_miss;
  
@@ -2790,7 +2876,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
  
         if (vmf->vma->vm_flags & VM_SEQ_READ) {
                 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-               page_cache_sync_ra(&ractl, ra, ra->ra_pages);
+               page_cache_sync_ra(&ractl, ra->ra_pages);
                 return fpin;
         }
  
@@ -2876,7 +2962,6 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
         struct file *file = vmf->vma->vm_file;
         struct file *fpin = NULL;
         struct address_space *mapping = file->f_mapping;
-       struct file_ra_state *ra = &file->f_ra;
         struct inode *inode = mapping->host;
         pgoff_t offset = vmf->pgoff;
         pgoff_t max_off;
@@ -2963,14 +3048,8 @@ page_not_uptodate:
          * because there really aren't any performance issues here
          * and we need to check for errors.
          */
-       ClearPageError(page);
         fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-       error = mapping->a_ops->readpage(file, page);
-       if (!error) {
-               wait_on_page_locked(page);
-               if (!PageUptodate(page))
-                       error = -EIO;
-       }
+       error = filemap_read_page(file, mapping, page);
         if (fpin)
                 goto out_retry;
         put_page(page);
@@ -2978,7 +3057,6 @@ page_not_uptodate:
         if (!error || error == AOP_TRUNCATED_PAGE)
                 goto retry_find;
  
-       shrink_readahead_size_eio(ra);
         return VM_FAULT_SIGBUS;
  
  out_retry:
@@ -3189,7 +3267,7 @@ const struct vm_operations_struct generic_file_vm_ops = {
  
  /* This is used for a general mmap of a disk file */
  
-int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
+int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
  {
         struct address_space *mapping = file->f_mapping;
  
@@ -3214,11 +3292,11 @@ vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
  {
         return VM_FAULT_SIGBUS;
  }
-int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
+int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
  {
         return -ENOSYS;
  }
-int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
+int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
  {
         return -ENOSYS;
  }
@@ -3564,10 +3642,6 @@ again:
                  * Otherwise there's a nasty deadlock on copying from the
                  * same page as we're writing to, without it being marked
                  * up-to-date.
-                *
-                * Not only is this an optimisation, but it is also required
-                * to check that the address is actually valid, when atomic
-                * usercopies are used, below.
                  */
                 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
                         status = -EFAULT;
@@ -3587,33 +3661,31 @@ again:
                 if (mapping_writably_mapped(mapping))
                         flush_dcache_page(page);
  
-               copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
+               copied = copy_page_from_iter_atomic(page, offset, bytes, i);
                 flush_dcache_page(page);
  
                 status = a_ops->write_end(file, mapping, pos, bytes, copied,
                                                 page, fsdata);
-               if (unlikely(status < 0))
-                       break;
-               copied = status;
-
+               if (unlikely(status != copied)) {
+                       iov_iter_revert(i, copied - max(status, 0L));
+                       if (unlikely(status < 0))
+                               break;
+               }
                 cond_resched();
  
-               iov_iter_advance(i, copied);
-               if (unlikely(copied == 0)) {
+               if (unlikely(status == 0)) {
                         /*
-                        * If we were unable to copy any data at all, we must
-                        * fall back to a single segment length write.
-                        *
-                        * If we didn't fallback here, we could livelock
-                        * because not all segments in the iov can be copied at
-                        * once without a pagefault.
+                        * A short copy made ->write_end() reject the
+                        * thing entirely.  Might be memory poisoning
+                        * halfway through, might be a race with munmap,
+                        * might be severe memory pressure.
                          */
-                       bytes = min_t(unsigned long, PAGE_SIZE - offset,
-                                               iov_iter_single_seg_count(i));
+                       if (copied)
+                               bytes = copied;
                         goto again;
                 }
-               pos += copied;
-               written += copied;
+               pos += status;
+               written += status;
  
                 balance_dirty_pages_ratelimited(mapping);
         } while (iov_iter_count(i));
@@ -3646,7 +3718,7 @@ EXPORT_SYMBOL(generic_perform_write);
  ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
  {
         struct file *file = iocb->ki_filp;
-       struct address_space * mapping = file->f_mapping;
+       struct address_space *mapping = file->f_mapping;
         struct inode    *inode = mapping->host;
         ssize_t         written = 0;
         ssize_t         err;