Merge tag 'pci-v5.15-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaa...
[linux-2.6-microblaze.git] / mm / page-writeback.c
index e4a381b..4812a17 100644 (file)
@@ -183,7 +183,7 @@ static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
 static void wb_min_max_ratio(struct bdi_writeback *wb,
                             unsigned long *minp, unsigned long *maxp)
 {
-       unsigned long this_bw = wb->avg_write_bandwidth;
+       unsigned long this_bw = READ_ONCE(wb->avg_write_bandwidth);
        unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
        unsigned long long min = wb->bdi->min_ratio;
        unsigned long long max = wb->bdi->max_ratio;
@@ -892,7 +892,7 @@ static long long pos_ratio_polynom(unsigned long setpoint,
 static void wb_position_ratio(struct dirty_throttle_control *dtc)
 {
        struct bdi_writeback *wb = dtc->wb;
-       unsigned long write_bw = wb->avg_write_bandwidth;
+       unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth);
        unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
        unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
        unsigned long wb_thresh = dtc->wb_thresh;
@@ -1115,7 +1115,7 @@ out:
                                        &wb->bdi->tot_write_bandwidth) <= 0);
        }
        wb->write_bandwidth = bw;
-       wb->avg_write_bandwidth = avg;
+       WRITE_ONCE(wb->avg_write_bandwidth, avg);
 }
 
 static void update_dirty_limit(struct dirty_throttle_control *dtc)
@@ -1147,8 +1147,8 @@ update:
        dom->dirty_limit = limit;
 }
 
-static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
-                                   unsigned long now)
+static void domain_update_dirty_limit(struct dirty_throttle_control *dtc,
+                                     unsigned long now)
 {
        struct wb_domain *dom = dtc_dom(dtc);
 
@@ -1324,7 +1324,7 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
        else
                dirty_ratelimit -= step;
 
-       wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
+       WRITE_ONCE(wb->dirty_ratelimit, max(dirty_ratelimit, 1UL));
        wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
 
        trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
@@ -1336,23 +1336,24 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
 {
        struct bdi_writeback *wb = gdtc->wb;
        unsigned long now = jiffies;
-       unsigned long elapsed = now - wb->bw_time_stamp;
+       unsigned long elapsed;
        unsigned long dirtied;
        unsigned long written;
 
-       lockdep_assert_held(&wb->list_lock);
+       spin_lock(&wb->list_lock);
 
        /*
-        * rate-limit, only update once every 200ms.
+        * Lockless checks for elapsed time are racy and delayed update after
+        * IO completion doesn't do it at all (to make sure written pages are
+        * accounted reasonably quickly). Make sure elapsed >= 1 to avoid
+        * division errors.
         */
-       if (elapsed < BANDWIDTH_INTERVAL)
-               return;
-
+       elapsed = max(now - wb->bw_time_stamp, 1UL);
        dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
        written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
 
        if (update_ratelimit) {
-               domain_update_bandwidth(gdtc, now);
+               domain_update_dirty_limit(gdtc, now);
                wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
 
                /*
@@ -1360,7 +1361,7 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
                 * compiler has no way to figure that out.  Help it.
                 */
                if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
-                       domain_update_bandwidth(mdtc, now);
+                       domain_update_dirty_limit(mdtc, now);
                        wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
                }
        }
@@ -1368,16 +1369,15 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
 
        wb->dirtied_stamp = dirtied;
        wb->written_stamp = written;
-       wb->bw_time_stamp = now;
+       WRITE_ONCE(wb->bw_time_stamp, now);
+       spin_unlock(&wb->list_lock);
 }
 
-static void wb_update_bandwidth(struct bdi_writeback *wb)
+void wb_update_bandwidth(struct bdi_writeback *wb)
 {
        struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
 
-       spin_lock(&wb->list_lock);
        __wb_update_bandwidth(&gdtc, NULL, false);
-       spin_unlock(&wb->list_lock);
 }
 
 /* Interval after which we consider wb idle and don't estimate bandwidth */
@@ -1393,7 +1393,7 @@ static void wb_bandwidth_estimate_start(struct bdi_writeback *wb)
                spin_lock(&wb->list_lock);
                wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED);
                wb->written_stamp = wb_stat(wb, WB_WRITTEN);
-               wb->bw_time_stamp = now;
+               WRITE_ONCE(wb->bw_time_stamp, now);
                spin_unlock(&wb->list_lock);
        }
 }
@@ -1418,7 +1418,7 @@ static unsigned long dirty_poll_interval(unsigned long dirty,
 static unsigned long wb_max_pause(struct bdi_writeback *wb,
                                  unsigned long wb_dirty)
 {
-       unsigned long bw = wb->avg_write_bandwidth;
+       unsigned long bw = READ_ONCE(wb->avg_write_bandwidth);
        unsigned long t;
 
        /*
@@ -1440,8 +1440,8 @@ static long wb_min_pause(struct bdi_writeback *wb,
                         unsigned long dirty_ratelimit,
                         int *nr_dirtied_pause)
 {
-       long hi = ilog2(wb->avg_write_bandwidth);
-       long lo = ilog2(wb->dirty_ratelimit);
+       long hi = ilog2(READ_ONCE(wb->avg_write_bandwidth));
+       long lo = ilog2(READ_ONCE(wb->dirty_ratelimit));
        long t;         /* target pause */
        long pause;     /* estimated next pause */
        int pages;      /* target nr_dirtied_pause */
@@ -1721,15 +1721,12 @@ free_running:
                if (dirty_exceeded && !wb->dirty_exceeded)
                        wb->dirty_exceeded = 1;
 
-               if (time_is_before_jiffies(wb->bw_time_stamp +
-                                          BANDWIDTH_INTERVAL)) {
-                       spin_lock(&wb->list_lock);
+               if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
+                                          BANDWIDTH_INTERVAL))
                        __wb_update_bandwidth(gdtc, mdtc, true);
-                       spin_unlock(&wb->list_lock);
-               }
 
                /* throttle according to the chosen dtc */
-               dirty_ratelimit = wb->dirty_ratelimit;
+               dirty_ratelimit = READ_ONCE(wb->dirty_ratelimit);
                task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
                                                        RATELIMIT_CALC_SHIFT;
                max_pause = wb_max_pause(wb, sdtc->wb_dirty);
@@ -2021,7 +2018,6 @@ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
        return ret;
 }
 
-#ifdef CONFIG_BLOCK
 void laptop_mode_timer_fn(struct timer_list *t)
 {
        struct backing_dev_info *backing_dev_info =
@@ -2056,7 +2052,6 @@ void laptop_sync_completion(void)
 
        rcu_read_unlock();
 }
-#endif
 
 /*
  * If ratelimit_pages is too high then we can get into dirty-data overload
@@ -2374,7 +2369,14 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
                cond_resched();
                congestion_wait(BLK_RW_ASYNC, HZ/50);
        }
-       wb_update_bandwidth(wb);
+       /*
+        * Usually few pages are written by now from those we've just submitted
+        * but if there's constant writeback being submitted, this makes sure
+        * writeback bandwidth is updated once in a while.
+        */
+       if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
+                                  BANDWIDTH_INTERVAL))
+               wb_update_bandwidth(wb);
        return ret;
 }
 
@@ -2754,6 +2756,14 @@ static void wb_inode_writeback_start(struct bdi_writeback *wb)
 static void wb_inode_writeback_end(struct bdi_writeback *wb)
 {
        atomic_dec(&wb->writeback_inodes);
+       /*
+        * Make sure estimate of writeback throughput gets updated after
+        * writeback completed. We delay the update by BANDWIDTH_INTERVAL
+        * (which is the interval other bandwidth updates use for batching) so
+        * that if multiple inodes end writeback at a similar time, they get
+        * batched into one bandwidth update.
+        */
+       queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
 }
 
 int test_clear_page_writeback(struct page *page)