Merge tag 'vfs-5.10-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

[linux-2.6-microblaze.git] / mm / filemap.c
diff --git a/mm/filemap.c b/mm/filemap.c

index 9962fd6..d5e7c20 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -249,7 +249,7 @@ static void page_cache_free_page(struct address_space *mapping,
                 freepage(page);
  
         if (PageTransHuge(page) && !PageHuge(page)) {
-               page_ref_sub(page, HPAGE_PMD_NR);
+               page_ref_sub(page, thp_nr_pages(page));
                 VM_BUG_ON_PAGE(page_count(page) <= 0, page);
         } else {
                 put_page(page);
@@ -414,7 +414,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
                 .range_end = end,
         };
  
-       if (!mapping_cap_writeback_dirty(mapping) ||
+       if (!mapping_can_writeback(mapping) ||
             !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                 return 0;
  
@@ -827,15 +827,14 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
  }
  EXPORT_SYMBOL_GPL(replace_page_cache_page);
  
-static int __add_to_page_cache_locked(struct page *page,
-                                     struct address_space *mapping,
-                                     pgoff_t offset, gfp_t gfp_mask,
-                                     void **shadowp)
+noinline int __add_to_page_cache_locked(struct page *page,
+                                       struct address_space *mapping,
+                                       pgoff_t offset, gfp_t gfp,
+                                       void **shadowp)
  {
         XA_STATE(xas, &mapping->i_pages, offset);
         int huge = PageHuge(page);
         int error;
-       void *old;
  
         VM_BUG_ON_PAGE(!PageLocked(page), page);
         VM_BUG_ON_PAGE(PageSwapBacked(page), page);
@@ -846,25 +845,46 @@ static int __add_to_page_cache_locked(struct page *page,
         page->index = offset;
  
         if (!huge) {
-               error = mem_cgroup_charge(page, current->mm, gfp_mask);
+               error = mem_cgroup_charge(page, current->mm, gfp);
                 if (error)
                         goto error;
         }
  
+       gfp &= GFP_RECLAIM_MASK;
+
         do {
+               unsigned int order = xa_get_order(xas.xa, xas.xa_index);
+               void *entry, *old = NULL;
+
+               if (order > thp_order(page))
+                       xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
+                                       order, gfp);
                 xas_lock_irq(&xas);
-               old = xas_load(&xas);
-               if (old && !xa_is_value(old))
-                       xas_set_err(&xas, -EEXIST);
+               xas_for_each_conflict(&xas, entry) {
+                       old = entry;
+                       if (!xa_is_value(entry)) {
+                               xas_set_err(&xas, -EEXIST);
+                               goto unlock;
+                       }
+               }
+
+               if (old) {
+                       if (shadowp)
+                               *shadowp = old;
+                       /* entry may have been split before we acquired lock */
+                       order = xa_get_order(xas.xa, xas.xa_index);
+                       if (order > thp_order(page)) {
+                               xas_split(&xas, old, order);
+                               xas_reset(&xas);
+                       }
+               }
+
                 xas_store(&xas, page);
                 if (xas_error(&xas))
                         goto unlock;
  
-               if (xa_is_value(old)) {
+               if (old)
                         mapping->nrexceptional--;
-                       if (shadowp)
-                               *shadowp = old;
-               }
                 mapping->nrpages++;
  
                 /* hugetlb pages do not participate in page cache accounting */
@@ -872,7 +892,7 @@ static int __add_to_page_cache_locked(struct page *page,
                         __inc_lruvec_page_state(page, NR_FILE_PAGES);
  unlock:
                 xas_unlock_irq(&xas);
-       } while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
+       } while (xas_nomem(&xas, gfp));
  
         if (xas_error(&xas)) {
                 error = xas_error(&xas);
@@ -1425,7 +1445,7 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem
   * unlock_page - unlock a locked page
   * @page: the page
   *
- * Unlocks the page and wakes up sleepers in ___wait_on_page_locked().
+ * Unlocks the page and wakes up sleepers in wait_on_page_locked().
   * Also wakes sleepers in wait_on_page_writeback() because the wakeup
   * mechanism between PageLocked pages and PageWriteback pages is shared.
   * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
@@ -1645,19 +1665,19 @@ EXPORT_SYMBOL(page_cache_prev_miss);
  /**
   * find_get_entry - find and get a page cache entry
   * @mapping: the address_space to search
- * @offset: the page cache index
+ * @index: The page cache index.
   *
   * Looks up the page cache slot at @mapping & @offset.  If there is a
- * page cache page, it is returned with an increased refcount.
+ * page cache page, the head page is returned with an increased refcount.
   *
   * If the slot holds a shadow entry of a previously evicted page, or a
   * swap entry from shmem/tmpfs, it is returned.
   *
- * Return: the found page or shadow entry, %NULL if nothing is found.
+ * Return: The head page or shadow entry, %NULL if nothing is found.
   */
-struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
+struct page *find_get_entry(struct address_space *mapping, pgoff_t index)
  {
-       XA_STATE(xas, &mapping->i_pages, offset);
+       XA_STATE(xas, &mapping->i_pages, index);
         struct page *page;
  
         rcu_read_lock();
@@ -1685,7 +1705,6 @@ repeat:
                 put_page(page);
                 goto repeat;
         }
-       page = find_subpage(page, offset);
  out:
         rcu_read_unlock();
  
@@ -1693,40 +1712,37 @@ out:
  }
  
  /**
- * find_lock_entry - locate, pin and lock a page cache entry
- * @mapping: the address_space to search
- * @offset: the page cache index
+ * find_lock_entry - Locate and lock a page cache entry.
+ * @mapping: The address_space to search.
+ * @index: The page cache index.
   *
- * Looks up the page cache slot at @mapping & @offset.  If there is a
- * page cache page, it is returned locked and with an increased
- * refcount.
+ * Looks up the page at @mapping & @index.  If there is a page in the
+ * cache, the head page is returned locked and with an increased refcount.
   *
   * If the slot holds a shadow entry of a previously evicted page, or a
   * swap entry from shmem/tmpfs, it is returned.
   *
- * find_lock_entry() may sleep.
- *
- * Return: the found page or shadow entry, %NULL if nothing is found.
+ * Context: May sleep.
+ * Return: The head page or shadow entry, %NULL if nothing is found.
   */
-struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
+struct page *find_lock_entry(struct address_space *mapping, pgoff_t index)
  {
         struct page *page;
  
  repeat:
-       page = find_get_entry(mapping, offset);
+       page = find_get_entry(mapping, index);
         if (page && !xa_is_value(page)) {
                 lock_page(page);
                 /* Has the page been truncated? */
-               if (unlikely(page_mapping(page) != mapping)) {
+               if (unlikely(page->mapping != mapping)) {
                         unlock_page(page);
                         put_page(page);
                         goto repeat;
                 }
-               VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
+               VM_BUG_ON_PAGE(!thp_contains(page, index), page);
         }
         return page;
  }
-EXPORT_SYMBOL(find_lock_entry);
  
  /**
   * pagecache_get_page - Find and get a reference to a page.
@@ -1741,6 +1757,8 @@ EXPORT_SYMBOL(find_lock_entry);
   *
   * * %FGP_ACCESSED - The page will be marked accessed.
   * * %FGP_LOCK - The page is returned locked.
+ * * %FGP_HEAD - If the page is present and a THP, return the head page
+ *   rather than the exact page specified by the index.
   * * %FGP_CREAT - If no page is present then a new page is allocated using
   *   @gfp_mask and added to the page cache and the VM's LRU list.
   *   The page is returned locked and with an increased refcount.
@@ -1781,12 +1799,12 @@ repeat:
                 }
  
                 /* Has the page been truncated? */
-               if (unlikely(compound_head(page)->mapping != mapping)) {
+               if (unlikely(page->mapping != mapping)) {
                         unlock_page(page);
                         put_page(page);
                         goto repeat;
                 }
-               VM_BUG_ON_PAGE(page->index != index, page);
+               VM_BUG_ON_PAGE(!thp_contains(page, index), page);
         }
  
         if (fgp_flags & FGP_ACCESSED)
@@ -1796,11 +1814,13 @@ repeat:
                 if (page_is_idle(page))
                         clear_page_idle(page);
         }
+       if (!(fgp_flags & FGP_HEAD))
+               page = find_subpage(page, index);
  
  no_page:
         if (!page && (fgp_flags & FGP_CREAT)) {
                 int err;
-               if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping))
+               if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
                         gfp_mask |= __GFP_WRITE;
                 if (fgp_flags & FGP_NOFS)
                         gfp_mask &= ~__GFP_FS;
@@ -2179,6 +2199,14 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
         last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
         offset = *ppos & ~PAGE_MASK;
  
+       /*
+        * If we've already successfully copied some data, then we
+        * can no longer safely return -EIOCBQUEUED. Hence mark
+        * an async read NOWAIT at that point.
+        */
+       if (written && (iocb->ki_flags & IOCB_WAITQ))
+               iocb->ki_flags |= IOCB_NOWAIT;
+
         for (;;) {
                 struct page *page;
                 pgoff_t end_index;
@@ -2568,8 +2596,8 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
         struct file *file = vmf->vma->vm_file;
         struct file_ra_state *ra = &file->f_ra;
         struct address_space *mapping = file->f_mapping;
+       DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff);
         struct file *fpin = NULL;
-       pgoff_t offset = vmf->pgoff;
         unsigned int mmap_miss;
  
         /* If we don't want any read-ahead, don't bother */
@@ -2580,8 +2608,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
  
         if (vmf->vma->vm_flags & VM_SEQ_READ) {
                 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-               page_cache_sync_readahead(mapping, ra, file, offset,
-                                         ra->ra_pages);
+               page_cache_sync_ra(&ractl, ra, ra->ra_pages);
                 return fpin;
         }
  
@@ -2601,10 +2628,11 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
          * mmap read-around
          */
         fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-       ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
+       ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2);
         ra->size = ra->ra_pages;
         ra->async_size = ra->ra_pages / 4;
-       ra_submit(ra, mapping, file);
+       ractl._index = ra->start;
+       do_page_cache_ra(&ractl, ra->size, ra->async_size);
         return fpin;
  }
  
@@ -2793,42 +2821,42 @@ void filemap_map_pages(struct vm_fault *vmf,
         pgoff_t last_pgoff = start_pgoff;
         unsigned long max_idx;
         XA_STATE(xas, &mapping->i_pages, start_pgoff);
-       struct page *page;
+       struct page *head, *page;
         unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
  
         rcu_read_lock();
-       xas_for_each(&xas, page, end_pgoff) {
-               if (xas_retry(&xas, page))
+       xas_for_each(&xas, head, end_pgoff) {
+               if (xas_retry(&xas, head))
                         continue;
-               if (xa_is_value(page))
+               if (xa_is_value(head))
                         goto next;
  
                 /*
                  * Check for a locked page first, as a speculative
                  * reference may adversely influence page migration.
                  */
-               if (PageLocked(page))
+               if (PageLocked(head))
                         goto next;
-               if (!page_cache_get_speculative(page))
+               if (!page_cache_get_speculative(head))
                         goto next;
  
                 /* Has the page moved or been split? */
-               if (unlikely(page != xas_reload(&xas)))
+               if (unlikely(head != xas_reload(&xas)))
                         goto skip;
-               page = find_subpage(page, xas.xa_index);
+               page = find_subpage(head, xas.xa_index);
  
-               if (!PageUptodate(page) ||
+               if (!PageUptodate(head) ||
                                 PageReadahead(page) ||
                                 PageHWPoison(page))
                         goto skip;
-               if (!trylock_page(page))
+               if (!trylock_page(head))
                         goto skip;
  
-               if (page->mapping != mapping || !PageUptodate(page))
+               if (head->mapping != mapping || !PageUptodate(head))
                         goto unlock;
  
                 max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
-               if (page->index >= max_idx)
+               if (xas.xa_index >= max_idx)
                         goto unlock;
  
                 if (mmap_miss > 0)
@@ -2840,12 +2868,12 @@ void filemap_map_pages(struct vm_fault *vmf,
                 last_pgoff = xas.xa_index;
                 if (alloc_set_pte(vmf, page))
                         goto unlock;
-               unlock_page(page);
+               unlock_page(head);
                 goto next;
  unlock:
-               unlock_page(page);
+               unlock_page(head);
  skip:
-               put_page(page);
+               put_page(head);
  next:
                 /* Huge page is mapped? No need to proceed. */
                 if (pmd_trans_huge(*vmf->pmd))
@@ -2984,7 +3012,7 @@ filler:
                 goto out;
  
         /*
-        * Page is not up to date and may be locked due one of the following
+        * Page is not up to date and may be locked due to one of the following
          * case a: Page is being filled and the page lock is held
          * case b: Read/write error clearing the page uptodate status
          * case c: Truncation in progress (page locked)