Merge tag 'devprop-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[linux-2.6-microblaze.git] / mm / readahead.c
index 8a97bd4..d3a4754 100644 (file)
@@ -156,7 +156,7 @@ static void read_cache_pages_invalidate_page(struct address_space *mapping,
                if (!trylock_page(page))
                        BUG();
                page->mapping = mapping;
-               do_invalidatepage(page, 0, PAGE_SIZE);
+               folio_invalidate(page_folio(page), 0, PAGE_SIZE);
                page->mapping = NULL;
                unlock_page(page);
        }
@@ -262,7 +262,7 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages,
 
        blk_finish_plug(&plug);
 
-       BUG_ON(!list_empty(pages));
+       BUG_ON(pages && !list_empty(pages));
        BUG_ON(readahead_count(rac));
 
 out:
@@ -361,7 +361,7 @@ EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
  * behaviour which would occur if page allocations are causing VM writeback.
  * We really don't want to intermingle reads and writes like that.
  */
-void do_page_cache_ra(struct readahead_control *ractl,
+static void do_page_cache_ra(struct readahead_control *ractl,
                unsigned long nr_to_read, unsigned long lookahead_size)
 {
        struct inode *inode = ractl->mapping->host;
@@ -545,11 +545,103 @@ static int try_context_readahead(struct address_space *mapping,
        return 1;
 }
 
+/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size.  I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define MAX_PAGECACHE_ORDER    HPAGE_PMD_ORDER
+#else
+#define MAX_PAGECACHE_ORDER    8
+#endif
+
+static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
+               pgoff_t mark, unsigned int order, gfp_t gfp)
+{
+       int err;
+       struct folio *folio = filemap_alloc_folio(gfp, order);
+
+       if (!folio)
+               return -ENOMEM;
+       if (mark - index < (1UL << order))
+               folio_set_readahead(folio);
+       err = filemap_add_folio(ractl->mapping, folio, index, gfp);
+       if (err)
+               folio_put(folio);
+       else
+               ractl->_nr_pages += 1UL << order;
+       return err;
+}
+
+void page_cache_ra_order(struct readahead_control *ractl,
+               struct file_ra_state *ra, unsigned int new_order)
+{
+       struct address_space *mapping = ractl->mapping;
+       pgoff_t index = readahead_index(ractl);
+       pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
+       pgoff_t mark = index + ra->size - ra->async_size;
+       int err = 0;
+       gfp_t gfp = readahead_gfp_mask(mapping);
+
+       if (!mapping_large_folio_support(mapping) || ra->size < 4)
+               goto fallback;
+
+       limit = min(limit, index + ra->size - 1);
+
+       if (new_order < MAX_PAGECACHE_ORDER) {
+               new_order += 2;
+               if (new_order > MAX_PAGECACHE_ORDER)
+                       new_order = MAX_PAGECACHE_ORDER;
+               while ((1 << new_order) > ra->size)
+                       new_order--;
+       }
+
+       while (index <= limit) {
+               unsigned int order = new_order;
+
+               /* Align with smaller pages if needed */
+               if (index & ((1UL << order) - 1)) {
+                       order = __ffs(index);
+                       if (order == 1)
+                               order = 0;
+               }
+               /* Don't allocate pages past EOF */
+               while (index + (1UL << order) - 1 > limit) {
+                       if (--order == 1)
+                               order = 0;
+               }
+               err = ra_alloc_folio(ractl, index, mark, order, gfp);
+               if (err)
+                       break;
+               index += 1UL << order;
+       }
+
+       if (index > limit) {
+               ra->size += index - limit - 1;
+               ra->async_size += index - limit - 1;
+       }
+
+       read_pages(ractl, NULL, false);
+
+       /*
+        * If there were already pages in the page cache, then we may have
+        * left some gaps.  Let the regular readahead code take care of this
+        * situation.
+        */
+       if (!err)
+               return;
+fallback:
+       do_page_cache_ra(ractl, ra->size, ra->async_size);
+}
+
 /*
  * A minimal readahead algorithm for trivial sequential/random reads.
  */
 static void ondemand_readahead(struct readahead_control *ractl,
-               bool hit_readahead_marker, unsigned long req_size)
+               struct folio *folio, unsigned long req_size)
 {
        struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
        struct file_ra_state *ra = ractl->ra;
@@ -584,12 +676,12 @@ static void ondemand_readahead(struct readahead_control *ractl,
        }
 
        /*
-        * Hit a marked page without valid readahead state.
+        * Hit a marked folio without valid readahead state.
         * E.g. interleaved reads.
         * Query the pagecache for async_size, which normally equals to
         * readahead size. Ramp it up and use it as the new readahead size.
         */
-       if (hit_readahead_marker) {
+       if (folio) {
                pgoff_t start;
 
                rcu_read_lock();
@@ -662,7 +754,7 @@ readit:
        }
 
        ractl->_index = ra->start;
-       do_page_cache_ra(ractl, ra->size, ra->async_size);
+       page_cache_ra_order(ractl, ra, folio ? folio_order(folio) : 0);
 }
 
 void page_cache_sync_ra(struct readahead_control *ractl,
@@ -690,7 +782,7 @@ void page_cache_sync_ra(struct readahead_control *ractl,
        }
 
        /* do read-ahead */
-       ondemand_readahead(ractl, false, req_count);
+       ondemand_readahead(ractl, NULL, req_count);
 }
 EXPORT_SYMBOL_GPL(page_cache_sync_ra);
 
@@ -709,17 +801,11 @@ void page_cache_async_ra(struct readahead_control *ractl,
 
        folio_clear_readahead(folio);
 
-       /*
-        * Defer asynchronous read-ahead on IO congestion.
-        */
-       if (inode_read_congested(ractl->mapping->host))
-               return;
-
        if (blk_cgroup_congested())
                return;
 
        /* do read-ahead */
-       ondemand_readahead(ractl, true, req_count);
+       ondemand_readahead(ractl, folio, req_count);
 }
 EXPORT_SYMBOL_GPL(page_cache_async_ra);