Linux 6.9-rc1
[linux-2.6-microblaze.git] / mm / readahead.c
index fdcd28c..130c0e7 100644 (file)
 #include <linux/export.h>
 #include <linux/backing-dev.h>
 #include <linux/task_io_accounting_ops.h>
-#include <linux/pagevec.h>
 #include <linux/pagemap.h>
+#include <linux/psi.h>
 #include <linux/syscalls.h>
 #include <linux/file.h>
 #include <linux/mm_inline.h>
@@ -152,6 +152,8 @@ static void read_pages(struct readahead_control *rac)
        if (!readahead_count(rac))
                return;
 
+       if (unlikely(rac->_workingset))
+               psi_memstall_enter(&rac->_pflags);
        blk_start_plug(&plug);
 
        if (aops->readahead) {
@@ -179,6 +181,9 @@ static void read_pages(struct readahead_control *rac)
        }
 
        blk_finish_plug(&plug);
+       if (unlikely(rac->_workingset))
+               psi_memstall_leave(&rac->_pflags);
+       rac->_workingset = false;
 
        BUG_ON(readahead_count(rac));
 }
@@ -252,6 +257,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
                }
                if (i == nr_to_read - lookahead_size)
                        folio_set_readahead(folio);
+               ractl->_workingset |= folio_test_workingset(folio);
                ractl->_nr_pages++;
        }
 
@@ -455,19 +461,6 @@ static int try_context_readahead(struct address_space *mapping,
        return 1;
 }
 
-/*
- * There are some parts of the kernel which assume that PMD entries
- * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
- * limit the maximum allocation order to PMD size.  I'm not aware of any
- * assumptions about maximum order if THP are disabled, but 8 seems like
- * a good order (that's 1MB if you're using 4kB pages)
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define MAX_PAGECACHE_ORDER    HPAGE_PMD_ORDER
-#else
-#define MAX_PAGECACHE_ORDER    8
-#endif
-
 static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
                pgoff_t mark, unsigned int order, gfp_t gfp)
 {
@@ -476,15 +469,18 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
 
        if (!folio)
                return -ENOMEM;
-       mark = round_up(mark, 1UL << order);
+       mark = round_down(mark, 1UL << order);
        if (index == mark)
                folio_set_readahead(folio);
        err = filemap_add_folio(ractl->mapping, folio, index, gfp);
-       if (err)
+       if (err) {
                folio_put(folio);
-       else
-               ractl->_nr_pages += 1UL << order;
-       return err;
+               return err;
+       }
+
+       ractl->_nr_pages += 1UL << order;
+       ractl->_workingset |= folio_test_workingset(folio);
+       return 0;
 }
 
 void page_cache_ra_order(struct readahead_control *ractl,
@@ -504,10 +500,8 @@ void page_cache_ra_order(struct readahead_control *ractl,
 
        if (new_order < MAX_PAGECACHE_ORDER) {
                new_order += 2;
-               if (new_order > MAX_PAGECACHE_ORDER)
-                       new_order = MAX_PAGECACHE_ORDER;
-               while ((1 << new_order) > ra->size)
-                       new_order--;
+               new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
+               new_order = min_t(unsigned int, new_order, ilog2(ra->size));
        }
 
        filemap_invalidate_lock_shared(mapping);
@@ -515,16 +509,11 @@ void page_cache_ra_order(struct readahead_control *ractl,
                unsigned int order = new_order;
 
                /* Align with smaller pages if needed */
-               if (index & ((1UL << order) - 1)) {
+               if (index & ((1UL << order) - 1))
                        order = __ffs(index);
-                       if (order == 1)
-                               order = 0;
-               }
                /* Don't allocate pages past EOF */
-               while (index + (1UL << order) - 1 > limit) {
-                       if (--order == 1)
-                               order = 0;
-               }
+               while (index + (1UL << order) - 1 > limit)
+                       order--;
                err = ra_alloc_folio(ractl, index, mark, order, gfp);
                if (err)
                        break;
@@ -581,7 +570,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
         * It's the expected callback index, assume sequential access.
         * Ramp up sizes, and push forward the readahead window.
         */
-       expected = round_up(ra->start + ra->size - ra->async_size,
+       expected = round_down(ra->start + ra->size - ra->async_size,
                        1UL << order);
        if (index == expected || index == (ra->start + ra->size)) {
                ra->start += ra->size;
@@ -739,7 +728,8 @@ ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
         */
        ret = -EINVAL;
        if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
-           !S_ISREG(file_inode(f.file)->i_mode))
+           (!S_ISREG(file_inode(f.file)->i_mode) &&
+           !S_ISBLK(file_inode(f.file)->i_mode)))
                goto out;
 
        ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
@@ -791,21 +781,25 @@ void readahead_expand(struct readahead_control *ractl,
        /* Expand the leading edge downwards */
        while (ractl->_index > new_index) {
                unsigned long index = ractl->_index - 1;
-               struct page *page = xa_load(&mapping->i_pages, index);
+               struct folio *folio = xa_load(&mapping->i_pages, index);
 
-               if (page && !xa_is_value(page))
-                       return; /* Page apparently present */
+               if (folio && !xa_is_value(folio))
+                       return; /* Folio apparently present */
 
-               page = __page_cache_alloc(gfp_mask);
-               if (!page)
+               folio = filemap_alloc_folio(gfp_mask, 0);
+               if (!folio)
                        return;
-               if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
-                       put_page(page);
+               if (filemap_add_folio(mapping, folio, index, gfp_mask) < 0) {
+                       folio_put(folio);
                        return;
                }
-
+               if (unlikely(folio_test_workingset(folio)) &&
+                               !ractl->_workingset) {
+                       ractl->_workingset = true;
+                       psi_memstall_enter(&ractl->_pflags);
+               }
                ractl->_nr_pages++;
-               ractl->_index = page->index;
+               ractl->_index = folio->index;
        }
 
        new_len += new_start - readahead_pos(ractl);
@@ -814,18 +808,23 @@ void readahead_expand(struct readahead_control *ractl,
        /* Expand the trailing edge upwards */
        while (ractl->_nr_pages < new_nr_pages) {
                unsigned long index = ractl->_index + ractl->_nr_pages;
-               struct page *page = xa_load(&mapping->i_pages, index);
+               struct folio *folio = xa_load(&mapping->i_pages, index);
 
-               if (page && !xa_is_value(page))
-                       return; /* Page apparently present */
+               if (folio && !xa_is_value(folio))
+                       return; /* Folio apparently present */
 
-               page = __page_cache_alloc(gfp_mask);
-               if (!page)
+               folio = filemap_alloc_folio(gfp_mask, 0);
+               if (!folio)
                        return;
-               if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
-                       put_page(page);
+               if (filemap_add_folio(mapping, folio, index, gfp_mask) < 0) {
+                       folio_put(folio);
                        return;
                }
+               if (unlikely(folio_test_workingset(folio)) &&
+                               !ractl->_workingset) {
+                       ractl->_workingset = true;
+                       psi_memstall_enter(&ractl->_pflags);
+               }
                ractl->_nr_pages++;
                if (ra) {
                        ra->size++;