Merge tag 'pci-v5.15-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaa...

[linux-2.6-microblaze.git] / mm / page-writeback.c
diff --git a/mm/page-writeback.c b/mm/page-writeback.c

index e4a381b..4812a17 100644 (file)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -183,7 +183,7 @@ static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
  static void wb_min_max_ratio(struct bdi_writeback *wb,
                              unsigned long *minp, unsigned long *maxp)
  {
-       unsigned long this_bw = wb->avg_write_bandwidth;
+       unsigned long this_bw = READ_ONCE(wb->avg_write_bandwidth);
         unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
         unsigned long long min = wb->bdi->min_ratio;
         unsigned long long max = wb->bdi->max_ratio;
@@ -892,7 +892,7 @@ static long long pos_ratio_polynom(unsigned long setpoint,
  static void wb_position_ratio(struct dirty_throttle_control *dtc)
  {
         struct bdi_writeback *wb = dtc->wb;
-       unsigned long write_bw = wb->avg_write_bandwidth;
+       unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth);
         unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
         unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
         unsigned long wb_thresh = dtc->wb_thresh;
@@ -1115,7 +1115,7 @@ out:
                                         &wb->bdi->tot_write_bandwidth) <= 0);
         }
         wb->write_bandwidth = bw;
-       wb->avg_write_bandwidth = avg;
+       WRITE_ONCE(wb->avg_write_bandwidth, avg);
  }
  
  static void update_dirty_limit(struct dirty_throttle_control *dtc)
@@ -1147,8 +1147,8 @@ update:
         dom->dirty_limit = limit;
  }
  
-static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
-                                   unsigned long now)
+static void domain_update_dirty_limit(struct dirty_throttle_control *dtc,
+                                     unsigned long now)
  {
         struct wb_domain *dom = dtc_dom(dtc);
  
@@ -1324,7 +1324,7 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
         else
                 dirty_ratelimit -= step;
  
-       wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
+       WRITE_ONCE(wb->dirty_ratelimit, max(dirty_ratelimit, 1UL));
         wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
  
         trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
@@ -1336,23 +1336,24 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
  {
         struct bdi_writeback *wb = gdtc->wb;
         unsigned long now = jiffies;
-       unsigned long elapsed = now - wb->bw_time_stamp;
+       unsigned long elapsed;
         unsigned long dirtied;
         unsigned long written;
  
-       lockdep_assert_held(&wb->list_lock);
+       spin_lock(&wb->list_lock);
  
         /*
-        * rate-limit, only update once every 200ms.
+        * Lockless checks for elapsed time are racy and delayed update after
+        * IO completion doesn't do it at all (to make sure written pages are
+        * accounted reasonably quickly). Make sure elapsed >= 1 to avoid
+        * division errors.
          */
-       if (elapsed < BANDWIDTH_INTERVAL)
-               return;
-
+       elapsed = max(now - wb->bw_time_stamp, 1UL);
         dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
         written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
  
         if (update_ratelimit) {
-               domain_update_bandwidth(gdtc, now);
+               domain_update_dirty_limit(gdtc, now);
                 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
  
                 /*
@@ -1360,7 +1361,7 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
                  * compiler has no way to figure that out.  Help it.
                  */
                 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
-                       domain_update_bandwidth(mdtc, now);
+                       domain_update_dirty_limit(mdtc, now);
                         wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
                 }
         }
@@ -1368,16 +1369,15 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
  
         wb->dirtied_stamp = dirtied;
         wb->written_stamp = written;
-       wb->bw_time_stamp = now;
+       WRITE_ONCE(wb->bw_time_stamp, now);
+       spin_unlock(&wb->list_lock);
  }
  
-static void wb_update_bandwidth(struct bdi_writeback *wb)
+void wb_update_bandwidth(struct bdi_writeback *wb)
  {
         struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
  
-       spin_lock(&wb->list_lock);
         __wb_update_bandwidth(&gdtc, NULL, false);
-       spin_unlock(&wb->list_lock);
  }
  
  /* Interval after which we consider wb idle and don't estimate bandwidth */
@@ -1393,7 +1393,7 @@ static void wb_bandwidth_estimate_start(struct bdi_writeback *wb)
                 spin_lock(&wb->list_lock);
                 wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED);
                 wb->written_stamp = wb_stat(wb, WB_WRITTEN);
-               wb->bw_time_stamp = now;
+               WRITE_ONCE(wb->bw_time_stamp, now);
                 spin_unlock(&wb->list_lock);
         }
  }
@@ -1418,7 +1418,7 @@ static unsigned long dirty_poll_interval(unsigned long dirty,
  static unsigned long wb_max_pause(struct bdi_writeback *wb,
                                   unsigned long wb_dirty)
  {
-       unsigned long bw = wb->avg_write_bandwidth;
+       unsigned long bw = READ_ONCE(wb->avg_write_bandwidth);
         unsigned long t;
  
         /*
@@ -1440,8 +1440,8 @@ static long wb_min_pause(struct bdi_writeback *wb,
                          unsigned long dirty_ratelimit,
                          int *nr_dirtied_pause)
  {
-       long hi = ilog2(wb->avg_write_bandwidth);
-       long lo = ilog2(wb->dirty_ratelimit);
+       long hi = ilog2(READ_ONCE(wb->avg_write_bandwidth));
+       long lo = ilog2(READ_ONCE(wb->dirty_ratelimit));
         long t;         /* target pause */
         long pause;     /* estimated next pause */
         int pages;      /* target nr_dirtied_pause */
@@ -1721,15 +1721,12 @@ free_running:
                 if (dirty_exceeded && !wb->dirty_exceeded)
                         wb->dirty_exceeded = 1;
  
-               if (time_is_before_jiffies(wb->bw_time_stamp +
-                                          BANDWIDTH_INTERVAL)) {
-                       spin_lock(&wb->list_lock);
+               if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
+                                          BANDWIDTH_INTERVAL))
                         __wb_update_bandwidth(gdtc, mdtc, true);
-                       spin_unlock(&wb->list_lock);
-               }
  
                 /* throttle according to the chosen dtc */
-               dirty_ratelimit = wb->dirty_ratelimit;
+               dirty_ratelimit = READ_ONCE(wb->dirty_ratelimit);
                 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
                                                         RATELIMIT_CALC_SHIFT;
                 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
@@ -2021,7 +2018,6 @@ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
         return ret;
  }
  
-#ifdef CONFIG_BLOCK
  void laptop_mode_timer_fn(struct timer_list *t)
  {
         struct backing_dev_info *backing_dev_info =
@@ -2056,7 +2052,6 @@ void laptop_sync_completion(void)
  
         rcu_read_unlock();
  }
-#endif
  
  /*
   * If ratelimit_pages is too high then we can get into dirty-data overload
@@ -2374,7 +2369,14 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
                 cond_resched();
                 congestion_wait(BLK_RW_ASYNC, HZ/50);
         }
-       wb_update_bandwidth(wb);
+       /*
+        * Usually few pages are written by now from those we've just submitted
+        * but if there's constant writeback being submitted, this makes sure
+        * writeback bandwidth is updated once in a while.
+        */
+       if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
+                                  BANDWIDTH_INTERVAL))
+               wb_update_bandwidth(wb);
         return ret;
  }
  
@@ -2754,6 +2756,14 @@ static void wb_inode_writeback_start(struct bdi_writeback *wb)
  static void wb_inode_writeback_end(struct bdi_writeback *wb)
  {
         atomic_dec(&wb->writeback_inodes);
+       /*
+        * Make sure estimate of writeback throughput gets updated after
+        * writeback completed. We delay the update by BANDWIDTH_INTERVAL
+        * (which is the interval other bandwidth updates use for batching) so
+        * that if multiple inodes end writeback at a similar time, they get
+        * batched into one bandwidth update.
+        */
+       queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
  }
  
  int test_clear_page_writeback(struct page *page)