Linux 6.9-rc1
[linux-2.6-microblaze.git] / mm / page-writeback.c
index d0d466a..3e19b87 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/math64.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/fs.h>
@@ -197,7 +198,7 @@ static void wb_min_max_ratio(struct bdi_writeback *wb,
                        min *= this_bw;
                        min = div64_ul(min, tot_bw);
                }
-               if (max < 100) {
+               if (max < 100 * BDI_RATIO_SCALE) {
                        max *= this_bw;
                        max = div64_ul(max, tot_bw);
                }
@@ -650,11 +651,48 @@ void wb_domain_exit(struct wb_domain *dom)
  */
 static unsigned int bdi_min_ratio;
 
-int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
+static int bdi_check_pages_limit(unsigned long pages)
+{
+       unsigned long max_dirty_pages = global_dirtyable_memory();
+
+       if (pages > max_dirty_pages)
+               return -EINVAL;
+
+       return 0;
+}
+
+static unsigned long bdi_ratio_from_pages(unsigned long pages)
+{
+       unsigned long background_thresh;
+       unsigned long dirty_thresh;
+       unsigned long ratio;
+
+       global_dirty_limits(&background_thresh, &dirty_thresh);
+       ratio = div64_u64(pages * 100ULL * BDI_RATIO_SCALE, dirty_thresh);
+
+       return ratio;
+}
+
+static u64 bdi_get_bytes(unsigned int ratio)
+{
+       unsigned long background_thresh;
+       unsigned long dirty_thresh;
+       u64 bytes;
+
+       global_dirty_limits(&background_thresh, &dirty_thresh);
+       bytes = (dirty_thresh * PAGE_SIZE * ratio) / BDI_RATIO_SCALE / 100;
+
+       return bytes;
+}
+
+static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 {
        unsigned int delta;
        int ret = 0;
 
+       if (min_ratio > 100 * BDI_RATIO_SCALE)
+               return -EINVAL;
+
        spin_lock_bh(&bdi_lock);
        if (min_ratio > bdi->max_ratio) {
                ret = -EINVAL;
@@ -665,7 +703,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
                        bdi->min_ratio = min_ratio;
                } else {
                        delta = min_ratio - bdi->min_ratio;
-                       if (bdi_min_ratio + delta < 100) {
+                       if (bdi_min_ratio + delta < 100 * BDI_RATIO_SCALE) {
                                bdi_min_ratio += delta;
                                bdi->min_ratio = min_ratio;
                        } else {
@@ -678,11 +716,11 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
        return ret;
 }
 
-int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
+static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio)
 {
        int ret = 0;
 
-       if (max_ratio > 100)
+       if (max_ratio > 100 * BDI_RATIO_SCALE)
                return -EINVAL;
 
        spin_lock_bh(&bdi_lock);
@@ -690,14 +728,88 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
                ret = -EINVAL;
        } else {
                bdi->max_ratio = max_ratio;
-               bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
+               bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) /
+                                               (100 * BDI_RATIO_SCALE);
        }
        spin_unlock_bh(&bdi_lock);
 
        return ret;
 }
+
+int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio)
+{
+       return __bdi_set_min_ratio(bdi, min_ratio);
+}
+
+int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio)
+{
+       return __bdi_set_max_ratio(bdi, max_ratio);
+}
+
+int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
+{
+       return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE);
+}
+
+int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio)
+{
+       return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE);
+}
 EXPORT_SYMBOL(bdi_set_max_ratio);
 
+u64 bdi_get_min_bytes(struct backing_dev_info *bdi)
+{
+       return bdi_get_bytes(bdi->min_ratio);
+}
+
+int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes)
+{
+       int ret;
+       unsigned long pages = min_bytes >> PAGE_SHIFT;
+       unsigned long min_ratio;
+
+       ret = bdi_check_pages_limit(pages);
+       if (ret)
+               return ret;
+
+       min_ratio = bdi_ratio_from_pages(pages);
+       return __bdi_set_min_ratio(bdi, min_ratio);
+}
+
+u64 bdi_get_max_bytes(struct backing_dev_info *bdi)
+{
+       return bdi_get_bytes(bdi->max_ratio);
+}
+
+int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes)
+{
+       int ret;
+       unsigned long pages = max_bytes >> PAGE_SHIFT;
+       unsigned long max_ratio;
+
+       ret = bdi_check_pages_limit(pages);
+       if (ret)
+               return ret;
+
+       max_ratio = bdi_ratio_from_pages(pages);
+       return __bdi_set_max_ratio(bdi, max_ratio);
+}
+
+int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit)
+{
+       if (strict_limit > 1)
+               return -EINVAL;
+
+       spin_lock_bh(&bdi_lock);
+       if (strict_limit)
+               bdi->capabilities |= BDI_CAP_STRICTLIMIT;
+       else
+               bdi->capabilities &= ~BDI_CAP_STRICTLIMIT;
+       spin_unlock_bh(&bdi_lock);
+
+       return 0;
+}
+
 static unsigned long dirty_freerun_ceiling(unsigned long thresh,
                                           unsigned long bg_thresh)
 {
@@ -760,15 +872,15 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
        fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
                              &numerator, &denominator);
 
-       wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
+       wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE);
        wb_thresh *= numerator;
        wb_thresh = div64_ul(wb_thresh, denominator);
 
        wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
 
-       wb_thresh += (thresh * wb_min_ratio) / 100;
-       if (wb_thresh > (thresh * wb_max_ratio) / 100)
-               wb_thresh = thresh * wb_max_ratio / 100;
+       wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE);
+       if (wb_thresh > (thresh * wb_max_ratio) / (100 * BDI_RATIO_SCALE))
+               wb_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
 
        return wb_thresh;
 }
@@ -1081,7 +1193,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb,
         * write_bandwidth = ---------------------------------------------------
         *                                          period
         *
-        * @written may have decreased due to folio_account_redirty().
+        * @written may have decreased due to folio_redirty_for_writepage().
         * Avoid underflowing @bw calculation.
         */
        bw = written - min(written, wb->written_stamp);
@@ -1526,7 +1638,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
         */
        dtc->wb_thresh = __wb_calc_thresh(dtc);
        dtc->wb_bg_thresh = dtc->thresh ?
-               div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
+               div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
 
        /*
         * In order to avoid the stacked BDI deadlock we need
@@ -1809,7 +1921,7 @@ pause:
                        break;
                }
                __set_current_state(TASK_KILLABLE);
-               wb->dirty_sleep = now;
+               bdi->last_bdp_sleep = jiffies;
                io_schedule_timeout(pause);
 
                current->dirty_paused_when = now + pause;
@@ -1933,6 +2045,7 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,
        wb_put(wb);
        return ret;
 }
+EXPORT_SYMBOL_GPL(balance_dirty_pages_ratelimited_flags);
 
 /**
  * balance_dirty_pages_ratelimited - balance dirty memory state.
@@ -2212,18 +2325,18 @@ void __init page_writeback_init(void)
 }
 
 /**
- * tag_pages_for_writeback - tag pages to be written by write_cache_pages
+ * tag_pages_for_writeback - tag pages to be written by writeback
  * @mapping: address space structure to write
  * @start: starting page index
  * @end: ending page index (inclusive)
  *
  * This function scans the page range from @start to @end (inclusive) and tags
- * all pages that have DIRTY tag set with a special TOWRITE tag. The idea is
- * that write_cache_pages (or whoever calls this function) will then use
- * TOWRITE tag to identify pages eligible for writeback.  This mechanism is
- * used to avoid livelocking of writeback by a process steadily creating new
- * dirty pages in the file (thus it is important for this function to be quick
- * so that it can tag pages faster than a dirtying process can create them).
+ * all pages that have DIRTY tag set with a special TOWRITE tag.  The caller
+ * can then use the TOWRITE tag to identify pages eligible for writeback.
+ * This mechanism is used to avoid livelocking of writeback by a process
+ * steadily creating new dirty pages in the file (thus it is important for this
+ * function to be quick so that it can tag pages faster than a dirtying process
+ * can create them).
  */
 void tag_pages_for_writeback(struct address_space *mapping,
                             pgoff_t start, pgoff_t end)
@@ -2247,212 +2360,243 @@ void tag_pages_for_writeback(struct address_space *mapping,
 }
 EXPORT_SYMBOL(tag_pages_for_writeback);
 
-/**
- * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
- * @mapping: address space structure to write
- * @wbc: subtract the number of written pages from *@wbc->nr_to_write
- * @writepage: function called for each page
- * @data: data passed to writepage function
- *
- * If a page is already under I/O, write_cache_pages() skips it, even
- * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
- * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
- * and msync() need to guarantee that all the data which was dirty at the time
- * the call was made get new I/O started against them.  If wbc->sync_mode is
- * WB_SYNC_ALL then we were called for data integrity and we must wait for
- * existing IO to complete.
- *
- * To avoid livelocks (when other process dirties new pages), we first tag
- * pages which should be written back with TOWRITE tag and only then start
- * writing them. For data-integrity sync we have to be careful so that we do
- * not miss some pages (e.g., because some other process has cleared TOWRITE
- * tag we set). The rule we follow is that TOWRITE tag can be cleared only
- * by the process clearing the DIRTY tag (and submitting the page for IO).
- *
- * To avoid deadlocks between range_cyclic writeback and callers that hold
- * pages in PageWriteback to aggregate IO until write_cache_pages() returns,
- * we do not loop back to the start of the file. Doing so causes a page
- * lock/page writeback access order inversion - we should only ever lock
- * multiple pages in ascending page->index order, and looping back to the start
- * of the file violates that rule and causes deadlocks.
- *
- * Return: %0 on success, negative error code otherwise
- */
-int write_cache_pages(struct address_space *mapping,
-                     struct writeback_control *wbc, writepage_t writepage,
-                     void *data)
+static bool folio_prepare_writeback(struct address_space *mapping,
+               struct writeback_control *wbc, struct folio *folio)
 {
-       int ret = 0;
-       int done = 0;
-       int error;
-       struct pagevec pvec;
-       int nr_pages;
-       pgoff_t index;
-       pgoff_t end;            /* Inclusive */
-       pgoff_t done_index;
-       int range_whole = 0;
-       xa_mark_t tag;
-
-       pagevec_init(&pvec);
-       if (wbc->range_cyclic) {
-               index = mapping->writeback_index; /* prev offset */
-               end = -1;
-       } else {
-               index = wbc->range_start >> PAGE_SHIFT;
-               end = wbc->range_end >> PAGE_SHIFT;
-               if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
-                       range_whole = 1;
-       }
-       if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
-               tag_pages_for_writeback(mapping, index, end);
-               tag = PAGECACHE_TAG_TOWRITE;
-       } else {
-               tag = PAGECACHE_TAG_DIRTY;
-       }
-       done_index = index;
-       while (!done && (index <= end)) {
-               int i;
-
-               nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
-                               tag);
-               if (nr_pages == 0)
-                       break;
-
-               for (i = 0; i < nr_pages; i++) {
-                       struct page *page = pvec.pages[i];
+       /*
+        * Folio truncated or invalidated. We can freely skip it then,
+        * even for data integrity operations: the folio has disappeared
+        * concurrently, so there could be no real expectation of this
+        * data integrity operation even if there is now a new, dirty
+        * folio at the same pagecache index.
+        */
+       if (unlikely(folio->mapping != mapping))
+               return false;
 
-                       done_index = page->index;
+       /*
+        * Did somebody else write it for us?
+        */
+       if (!folio_test_dirty(folio))
+               return false;
 
-                       lock_page(page);
+       if (folio_test_writeback(folio)) {
+               if (wbc->sync_mode == WB_SYNC_NONE)
+                       return false;
+               folio_wait_writeback(folio);
+       }
+       BUG_ON(folio_test_writeback(folio));
 
-                       /*
-                        * Page truncated or invalidated. We can freely skip it
-                        * then, even for data integrity operations: the page
-                        * has disappeared concurrently, so there could be no
-                        * real expectation of this data integrity operation
-                        * even if there is now a new, dirty page at the same
-                        * pagecache address.
-                        */
-                       if (unlikely(page->mapping != mapping)) {
-continue_unlock:
-                               unlock_page(page);
-                               continue;
-                       }
+       if (!folio_clear_dirty_for_io(folio))
+               return false;
 
-                       if (!PageDirty(page)) {
-                               /* someone wrote it for us */
-                               goto continue_unlock;
-                       }
+       return true;
+}
 
-                       if (PageWriteback(page)) {
-                               if (wbc->sync_mode != WB_SYNC_NONE)
-                                       wait_on_page_writeback(page);
-                               else
-                                       goto continue_unlock;
-                       }
+static xa_mark_t wbc_to_tag(struct writeback_control *wbc)
+{
+       if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+               return PAGECACHE_TAG_TOWRITE;
+       return PAGECACHE_TAG_DIRTY;
+}
 
-                       BUG_ON(PageWriteback(page));
-                       if (!clear_page_dirty_for_io(page))
-                               goto continue_unlock;
+static pgoff_t wbc_end(struct writeback_control *wbc)
+{
+       if (wbc->range_cyclic)
+               return -1;
+       return wbc->range_end >> PAGE_SHIFT;
+}
 
-                       trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
-                       error = (*writepage)(page, wbc, data);
-                       if (unlikely(error)) {
-                               /*
-                                * Handle errors according to the type of
-                                * writeback. There's no need to continue for
-                                * background writeback. Just push done_index
-                                * past this page so media errors won't choke
-                                * writeout for the entire file. For integrity
-                                * writeback, we must process the entire dirty
-                                * set regardless of errors because the fs may
-                                * still have state to clear for each page. In
-                                * that case we continue processing and return
-                                * the first error.
-                                */
-                               if (error == AOP_WRITEPAGE_ACTIVATE) {
-                                       unlock_page(page);
-                                       error = 0;
-                               } else if (wbc->sync_mode != WB_SYNC_ALL) {
-                                       ret = error;
-                                       done_index = page->index + 1;
-                                       done = 1;
-                                       break;
-                               }
-                               if (!ret)
-                                       ret = error;
-                       }
+static struct folio *writeback_get_folio(struct address_space *mapping,
+               struct writeback_control *wbc)
+{
+       struct folio *folio;
 
-                       /*
-                        * We stop writing back only if we are not doing
-                        * integrity sync. In case of integrity sync we have to
-                        * keep going until we have written all the pages
-                        * we tagged for writeback prior to entering this loop.
-                        */
-                       if (--wbc->nr_to_write <= 0 &&
-                           wbc->sync_mode == WB_SYNC_NONE) {
-                               done = 1;
-                               break;
-                       }
-               }
-               pagevec_release(&pvec);
+retry:
+       folio = folio_batch_next(&wbc->fbatch);
+       if (!folio) {
+               folio_batch_release(&wbc->fbatch);
                cond_resched();
+               filemap_get_folios_tag(mapping, &wbc->index, wbc_end(wbc),
+                               wbc_to_tag(wbc), &wbc->fbatch);
+               folio = folio_batch_next(&wbc->fbatch);
+               if (!folio)
+                       return NULL;
        }
 
-       /*
-        * If we hit the last page and there is more work to be done: wrap
-        * back the index back to the start of the file for the next
-        * time we are called.
-        */
-       if (wbc->range_cyclic && !done)
-               done_index = 0;
-       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
-               mapping->writeback_index = done_index;
+       folio_lock(folio);
+       if (unlikely(!folio_prepare_writeback(mapping, wbc, folio))) {
+               folio_unlock(folio);
+               goto retry;
+       }
 
-       return ret;
+       trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
+       return folio;
 }
-EXPORT_SYMBOL(write_cache_pages);
 
-/*
- * Function used by generic_writepages to call the real writepage
- * function and set the mapping flags on error
+/**
+ * writeback_iter - iterate folio of a mapping for writeback
+ * @mapping: address space structure to write
+ * @wbc: writeback context
+ * @folio: previously iterated folio (%NULL to start)
+ * @error: in-out pointer for writeback errors (see below)
+ *
+ * This function returns the next folio for the writeback operation described by
+ * @wbc on @mapping and  should be called in a while loop in the ->writepages
+ * implementation.
+ *
+ * To start the writeback operation, %NULL is passed in the @folio argument, and
+ * for every subsequent iteration the folio returned previously should be passed
+ * back in.
+ *
+ * If there was an error in the per-folio writeback inside the writeback_iter()
+ * loop, @error should be set to the error value.
+ *
+ * Once the writeback described in @wbc has finished, this function will return
+ * %NULL and if there was an error in any iteration restore it to @error.
+ *
+ * Note: callers should not manually break out of the loop using break or goto
+ * but must keep calling writeback_iter() until it returns %NULL.
+ *
+ * Return: the folio to write or %NULL if the loop is done.
  */
-static int __writepage(struct page *page, struct writeback_control *wbc,
-                      void *data)
+struct folio *writeback_iter(struct address_space *mapping,
+               struct writeback_control *wbc, struct folio *folio, int *error)
 {
-       struct address_space *mapping = data;
-       int ret = mapping->a_ops->writepage(page, wbc);
-       mapping_set_error(mapping, ret);
-       return ret;
+       if (!folio) {
+               folio_batch_init(&wbc->fbatch);
+               wbc->saved_err = *error = 0;
+
+               /*
+                * For range cyclic writeback we remember where we stopped so
+                * that we can continue where we stopped.
+                *
+                * For non-cyclic writeback we always start at the beginning of
+                * the passed in range.
+                */
+               if (wbc->range_cyclic)
+                       wbc->index = mapping->writeback_index;
+               else
+                       wbc->index = wbc->range_start >> PAGE_SHIFT;
+
+               /*
+                * To avoid livelocks when other processes dirty new pages, we
+                * first tag pages which should be written back and only then
+                * start writing them.
+                *
+                * For data-integrity writeback we have to be careful so that we
+                * do not miss some pages (e.g., because some other process has
+                * cleared the TOWRITE tag we set).  The rule we follow is that
+                * TOWRITE tag can be cleared only by the process clearing the
+                * DIRTY tag (and submitting the page for I/O).
+                */
+               if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+                       tag_pages_for_writeback(mapping, wbc->index,
+                                       wbc_end(wbc));
+       } else {
+               wbc->nr_to_write -= folio_nr_pages(folio);
+
+               WARN_ON_ONCE(*error > 0);
+
+               /*
+                * For integrity writeback we have to keep going until we have
+                * written all the folios we tagged for writeback above, even if
+                * we run past wbc->nr_to_write or encounter errors.
+                * We stash away the first error we encounter in wbc->saved_err
+                * so that it can be retrieved when we're done.  This is because
+                * the file system may still have state to clear for each folio.
+                *
+                * For background writeback we exit as soon as we run past
+                * wbc->nr_to_write or encounter the first error.
+                */
+               if (wbc->sync_mode == WB_SYNC_ALL) {
+                       if (*error && !wbc->saved_err)
+                               wbc->saved_err = *error;
+               } else {
+                       if (*error || wbc->nr_to_write <= 0)
+                               goto done;
+               }
+       }
+
+       folio = writeback_get_folio(mapping, wbc);
+       if (!folio) {
+               /*
+                * To avoid deadlocks between range_cyclic writeback and callers
+                * that hold pages in PageWriteback to aggregate I/O until
+                * the writeback iteration finishes, we do not loop back to the
+                * start of the file.  Doing so causes a page lock/page
+                * writeback access order inversion - we should only ever lock
+                * multiple pages in ascending page->index order, and looping
+                * back to the start of the file violates that rule and causes
+                * deadlocks.
+                */
+               if (wbc->range_cyclic)
+                       mapping->writeback_index = 0;
+
+               /*
+                * Return the first error we encountered (if there was any) to
+                * the caller.
+                */
+               *error = wbc->saved_err;
+       }
+       return folio;
+
+done:
+       if (wbc->range_cyclic)
+               mapping->writeback_index = folio->index + folio_nr_pages(folio);
+       folio_batch_release(&wbc->fbatch);
+       return NULL;
 }
 
 /**
- * generic_writepages - walk the list of dirty pages of the given address space and writepage() all of them.
+ * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
  * @mapping: address space structure to write
  * @wbc: subtract the number of written pages from *@wbc->nr_to_write
- *
- * This is a library function, which implements the writepages()
- * address_space_operation.
+ * @writepage: function called for each page
+ * @data: data passed to writepage function
  *
  * Return: %0 on success, negative error code otherwise
+ *
+ * Note: please use writeback_iter() instead.
  */
-int generic_writepages(struct address_space *mapping,
-                      struct writeback_control *wbc)
+int write_cache_pages(struct address_space *mapping,
+                     struct writeback_control *wbc, writepage_t writepage,
+                     void *data)
 {
-       struct blk_plug plug;
-       int ret;
+       struct folio *folio = NULL;
+       int error;
 
-       /* deal with chardevs and other special file */
-       if (!mapping->a_ops->writepage)
-               return 0;
+       while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
+               error = writepage(folio, wbc, data);
+               if (error == AOP_WRITEPAGE_ACTIVATE) {
+                       folio_unlock(folio);
+                       error = 0;
+               }
+       }
+
+       return error;
+}
+EXPORT_SYMBOL(write_cache_pages);
+
+static int writeback_use_writepage(struct address_space *mapping,
+               struct writeback_control *wbc)
+{
+       struct folio *folio = NULL;
+       struct blk_plug plug;
+       int err;
 
        blk_start_plug(&plug);
-       ret = write_cache_pages(mapping, wbc, __writepage, mapping);
+       while ((folio = writeback_iter(mapping, wbc, folio, &err))) {
+               err = mapping->a_ops->writepage(&folio->page, wbc);
+               if (err == AOP_WRITEPAGE_ACTIVATE) {
+                       folio_unlock(folio);
+                       err = 0;
+               }
+               mapping_set_error(mapping, err);
+       }
        blk_finish_plug(&plug);
-       return ret;
-}
 
-EXPORT_SYMBOL(generic_writepages);
+       return err;
+}
 
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
@@ -2464,11 +2608,15 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
        wb = inode_to_wb_wbc(mapping->host, wbc);
        wb_bandwidth_estimate_start(wb);
        while (1) {
-               if (mapping->a_ops->writepages)
+               if (mapping->a_ops->writepages) {
                        ret = mapping->a_ops->writepages(mapping, wbc);
-               else
-                       ret = generic_writepages(mapping, wbc);
-               if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL))
+               } else if (mapping->a_ops->writepage) {
+                       ret = writeback_use_writepage(mapping, wbc);
+               } else {
+                       /* deal with chardevs and other special files */
+                       ret = 0;
+               }
+               if (ret != -ENOMEM || wbc->sync_mode != WB_SYNC_ALL)
                        break;
 
                /*
@@ -2491,46 +2639,6 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
        return ret;
 }
 
-/**
- * folio_write_one - write out a single folio and wait on I/O.
- * @folio: The folio to write.
- *
- * The folio must be locked by the caller and will be unlocked upon return.
- *
- * Note that the mapping's AS_EIO/AS_ENOSPC flags will be cleared when this
- * function returns.
- *
- * Return: %0 on success, negative error code otherwise
- */
-int folio_write_one(struct folio *folio)
-{
-       struct address_space *mapping = folio->mapping;
-       int ret = 0;
-       struct writeback_control wbc = {
-               .sync_mode = WB_SYNC_ALL,
-               .nr_to_write = folio_nr_pages(folio),
-       };
-
-       BUG_ON(!folio_test_locked(folio));
-
-       folio_wait_writeback(folio);
-
-       if (folio_clear_dirty_for_io(folio)) {
-               folio_get(folio);
-               ret = mapping->a_ops->writepage(&folio->page, &wbc);
-               if (ret == 0)
-                       folio_wait_writeback(folio);
-               folio_put(folio);
-       } else {
-               folio_unlock(folio);
-       }
-
-       if (!ret)
-               ret = filemap_check_errors(mapping);
-       return ret;
-}
-EXPORT_SYMBOL(folio_write_one);
-
 /*
  * For address_spaces which do not use buffers nor write back.
  */
@@ -2545,7 +2653,7 @@ EXPORT_SYMBOL(noop_dirty_folio);
 /*
  * Helper function for set_page_dirty family.
  *
- * Caller must hold lock_page_memcg().
+ * Caller must hold folio_memcg_lock().
  *
  * NOTE: This relies on being atomic wrt interrupts.
  */
@@ -2560,7 +2668,7 @@ static void folio_account_dirtied(struct folio *folio,
                struct bdi_writeback *wb;
                long nr = folio_nr_pages(folio);
 
-               inode_attach_wb(inode, &folio->page);
+               inode_attach_wb(inode, folio);
                wb = inode_to_wb(inode);
 
                __lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr);
@@ -2579,7 +2687,7 @@ static void folio_account_dirtied(struct folio *folio,
 /*
  * Helper function for deaccounting dirty page without writeback.
  *
- * Caller must hold lock_page_memcg().
+ * Caller must hold folio_memcg_lock().
  */
 void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb)
 {
@@ -2598,9 +2706,9 @@ void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb)
  * If warn is true, then emit a warning if the folio is not uptodate and has
  * not been truncated.
  *
- * The caller must hold lock_page_memcg().  Most callers have the folio
+ * The caller must hold folio_memcg_lock().  Most callers have the folio
  * locked.  A few have the folio blocked from truncation through other
- * means (eg zap_page_range() has it mapped and is holding the page table
+ * means (eg zap_vma_pages() has it mapped and is holding the page table
  * lock).  This can also be called from mark_buffer_dirty(), which I
  * cannot prove is always protected against truncate.
  */
@@ -2625,7 +2733,7 @@ void __folio_mark_dirty(struct folio *folio, struct address_space *mapping,
  * @folio: Folio to be marked as dirty.
  *
  * Filesystems which do not use buffer heads should call this function
- * from their set_page_dirty address space operation.  It ignores the
+ * from their dirty_folio address space operation.  It ignores the
  * contents of folio_get_private(), so if the filesystem marks individual
  * blocks as dirty, the filesystem should handle that itself.
  *
@@ -2657,37 +2765,6 @@ bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio)
 }
 EXPORT_SYMBOL(filemap_dirty_folio);
 
-/**
- * folio_account_redirty - Manually account for redirtying a page.
- * @folio: The folio which is being redirtied.
- *
- * Most filesystems should call folio_redirty_for_writepage() instead
- * of this fuction.  If your filesystem is doing writeback outside the
- * context of a writeback_control(), it can call this when redirtying
- * a folio, to de-account the dirty counters (NR_DIRTIED, WB_DIRTIED,
- * tsk->nr_dirtied), so that they match the written counters (NR_WRITTEN,
- * WB_WRITTEN) in long term. The mismatches will lead to systematic errors
- * in balanced_dirty_ratelimit and the dirty pages position control.
- */
-void folio_account_redirty(struct folio *folio)
-{
-       struct address_space *mapping = folio->mapping;
-
-       if (mapping && mapping_can_writeback(mapping)) {
-               struct inode *inode = mapping->host;
-               struct bdi_writeback *wb;
-               struct wb_lock_cookie cookie = {};
-               long nr = folio_nr_pages(folio);
-
-               wb = unlocked_inode_to_wb_begin(inode, &cookie);
-               current->nr_dirtied -= nr;
-               node_stat_mod_folio(folio, NR_DIRTIED, -nr);
-               wb_stat_mod(wb, WB_DIRTIED, -nr);
-               unlocked_inode_to_wb_end(inode, &cookie);
-       }
-}
-EXPORT_SYMBOL(folio_account_redirty);
-
 /**
  * folio_redirty_for_writepage - Decline to write a dirty folio.
  * @wbc: The writeback control.
@@ -2703,13 +2780,23 @@ EXPORT_SYMBOL(folio_account_redirty);
 bool folio_redirty_for_writepage(struct writeback_control *wbc,
                struct folio *folio)
 {
-       bool ret;
+       struct address_space *mapping = folio->mapping;
        long nr = folio_nr_pages(folio);
+       bool ret;
 
        wbc->pages_skipped += nr;
-       ret = filemap_dirty_folio(folio->mapping, folio);
-       folio_account_redirty(folio);
+       ret = filemap_dirty_folio(mapping, folio);
+       if (mapping && mapping_can_writeback(mapping)) {
+               struct inode *inode = mapping->host;
+               struct bdi_writeback *wb;
+               struct wb_lock_cookie cookie = {};
 
+               wb = unlocked_inode_to_wb_begin(inode, &cookie);
+               current->nr_dirtied -= nr;
+               node_stat_mod_folio(folio, NR_DIRTIED, -nr);
+               wb_stat_mod(wb, WB_DIRTIED, -nr);
+               unlocked_inode_to_wb_end(inode, &cookie);
+       }
        return ret;
 }
 EXPORT_SYMBOL(folio_redirty_for_writepage);
@@ -2733,11 +2820,11 @@ bool folio_mark_dirty(struct folio *folio)
 
        if (likely(mapping)) {
                /*
-                * readahead/lru_deactivate_page could remain
+                * readahead/folio_deactivate could remain
                 * PG_readahead/PG_reclaim due to race with folio_end_writeback
                 * About readahead, if the folio is written, the flags would be
                 * reset. So no problem.
-                * About lru_deactivate_page, if the folio is redirtied,
+                * About folio_deactivate, if the folio is redirtied,
                 * the flag will be reset. So no problem. but if the
                 * folio is used by readahead it will confuse readahead
                 * and make it restart the size rampup process. But it's
@@ -2892,6 +2979,7 @@ static void wb_inode_writeback_start(struct bdi_writeback *wb)
 
 static void wb_inode_writeback_end(struct bdi_writeback *wb)
 {
+       unsigned long flags;
        atomic_dec(&wb->writeback_inodes);
        /*
         * Make sure estimate of writeback throughput gets updated after
@@ -2900,7 +2988,10 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb)
         * that if multiple inodes end writeback at a similar time, they get
         * batched into one bandwidth update.
         */
-       queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
+       spin_lock_irqsave(&wb->work_lock, flags);
+       if (test_bit(WB_registered, &wb->state))
+               queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
+       spin_unlock_irqrestore(&wb->work_lock, flags);
 }
 
 bool __folio_end_writeback(struct folio *folio)
@@ -2916,19 +3007,16 @@ bool __folio_end_writeback(struct folio *folio)
                unsigned long flags;
 
                xa_lock_irqsave(&mapping->i_pages, flags);
-               ret = folio_test_clear_writeback(folio);
-               if (ret) {
-                       __xa_clear_mark(&mapping->i_pages, folio_index(folio),
-                                               PAGECACHE_TAG_WRITEBACK);
-                       if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
-                               struct bdi_writeback *wb = inode_to_wb(inode);
-
-                               wb_stat_mod(wb, WB_WRITEBACK, -nr);
-                               __wb_writeout_add(wb, nr);
-                               if (!mapping_tagged(mapping,
-                                                   PAGECACHE_TAG_WRITEBACK))
-                                       wb_inode_writeback_end(wb);
-                       }
+               ret = folio_xor_flags_has_waiters(folio, 1 << PG_writeback);
+               __xa_clear_mark(&mapping->i_pages, folio_index(folio),
+                                       PAGECACHE_TAG_WRITEBACK);
+               if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
+                       struct bdi_writeback *wb = inode_to_wb(inode);
+
+                       wb_stat_mod(wb, WB_WRITEBACK, -nr);
+                       __wb_writeout_add(wb, nr);
+                       if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+                               wb_inode_writeback_end(wb);
                }
 
                if (mapping->host && !mapping_tagged(mapping,
@@ -2937,78 +3025,74 @@ bool __folio_end_writeback(struct folio *folio)
 
                xa_unlock_irqrestore(&mapping->i_pages, flags);
        } else {
-               ret = folio_test_clear_writeback(folio);
-       }
-       if (ret) {
-               lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr);
-               zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
-               node_stat_mod_folio(folio, NR_WRITTEN, nr);
+               ret = folio_xor_flags_has_waiters(folio, 1 << PG_writeback);
        }
+
+       lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr);
+       zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
+       node_stat_mod_folio(folio, NR_WRITTEN, nr);
        folio_memcg_unlock(folio);
+
        return ret;
 }
 
-bool __folio_start_writeback(struct folio *folio, bool keep_write)
+void __folio_start_writeback(struct folio *folio, bool keep_write)
 {
        long nr = folio_nr_pages(folio);
        struct address_space *mapping = folio_mapping(folio);
-       bool ret;
        int access_ret;
 
+       VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio);
+
        folio_memcg_lock(folio);
        if (mapping && mapping_use_writeback_tags(mapping)) {
                XA_STATE(xas, &mapping->i_pages, folio_index(folio));
                struct inode *inode = mapping->host;
                struct backing_dev_info *bdi = inode_to_bdi(inode);
                unsigned long flags;
+               bool on_wblist;
 
                xas_lock_irqsave(&xas, flags);
                xas_load(&xas);
-               ret = folio_test_set_writeback(folio);
-               if (!ret) {
-                       bool on_wblist;
+               folio_test_set_writeback(folio);
 
-                       on_wblist = mapping_tagged(mapping,
-                                                  PAGECACHE_TAG_WRITEBACK);
+               on_wblist = mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
 
-                       xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
-                       if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
-                               struct bdi_writeback *wb = inode_to_wb(inode);
+               xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
+               if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
+                       struct bdi_writeback *wb = inode_to_wb(inode);
 
-                               wb_stat_mod(wb, WB_WRITEBACK, nr);
-                               if (!on_wblist)
-                                       wb_inode_writeback_start(wb);
-                       }
-
-                       /*
-                        * We can come through here when swapping
-                        * anonymous folios, so we don't necessarily
-                        * have an inode to track for sync.
-                        */
-                       if (mapping->host && !on_wblist)
-                               sb_mark_inode_writeback(mapping->host);
+                       wb_stat_mod(wb, WB_WRITEBACK, nr);
+                       if (!on_wblist)
+                               wb_inode_writeback_start(wb);
                }
+
+               /*
+                * We can come through here when swapping anonymous
+                * folios, so we don't necessarily have an inode to
+                * track for sync.
+                */
+               if (mapping->host && !on_wblist)
+                       sb_mark_inode_writeback(mapping->host);
                if (!folio_test_dirty(folio))
                        xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
                if (!keep_write)
                        xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
                xas_unlock_irqrestore(&xas, flags);
        } else {
-               ret = folio_test_set_writeback(folio);
-       }
-       if (!ret) {
-               lruvec_stat_mod_folio(folio, NR_WRITEBACK, nr);
-               zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr);
+               folio_test_set_writeback(folio);
        }
+
+       lruvec_stat_mod_folio(folio, NR_WRITEBACK, nr);
+       zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr);
        folio_memcg_unlock(folio);
+
        access_ret = arch_make_folio_accessible(folio);
        /*
         * If writeback has been triggered on a page that cannot be made
         * accessible, it is too late to recover here.
         */
        VM_BUG_ON_FOLIO(access_ret != 0, folio);
-
-       return ret;
 }
 EXPORT_SYMBOL(__folio_start_writeback);
 
@@ -3073,7 +3157,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
  */
 void folio_wait_stable(struct folio *folio)
 {
-       if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
+       if (mapping_stable_writes(folio_mapping(folio)))
                folio_wait_writeback(folio);
 }
 EXPORT_SYMBOL_GPL(folio_wait_stable);