writeback: memcg: simplify cgroup_writeback_by_id
authorShakeel Butt <shakeelb@google.com>
Thu, 2 Sep 2021 21:53:27 +0000 (14:53 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Sep 2021 16:58:10 +0000 (09:58 -0700)
Currently cgroup_writeback_by_id calls mem_cgroup_wb_stats() to get dirty
pages for a memcg.  However mem_cgroup_wb_stats() does a lot more than
just get the number of dirty pages.  Just directly get the number of dirty
pages instead of calling mem_cgroup_wb_stats().  Also
cgroup_writeback_by_id() is only called for best-effort dirty flushing, so
remove the unused 'nr' parameter and no need to explicitly flush memcg
stats.

Link: https://lkml.kernel.org/r/20210722182627.2267368-1-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/fs-writeback.c
include/linux/memcontrol.h
include/linux/writeback.h
mm/memcontrol.c

index 867984e..35894a2 100644 (file)
@@ -1039,20 +1039,20 @@ restart:
  * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
  * @bdi_id: target bdi id
  * @memcg_id: target memcg css id
- * @nr: number of pages to write, 0 for best-effort dirty flushing
  * @reason: reason why some writeback work initiated
  * @done: target wb_completion
  *
  * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
  * with the specified parameters.
  */
-int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
                           enum wb_reason reason, struct wb_completion *done)
 {
        struct backing_dev_info *bdi;
        struct cgroup_subsys_state *memcg_css;
        struct bdi_writeback *wb;
        struct wb_writeback_work *work;
+       unsigned long dirty;
        int ret;
 
        /* lookup bdi and memcg */
@@ -1081,24 +1081,22 @@ int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
        }
 
        /*
-        * If @nr is zero, the caller is attempting to write out most of
+        * The caller is attempting to write out most of
         * the currently dirty pages.  Let's take the current dirty page
         * count and inflate it by 25% which should be large enough to
         * flush out most dirty pages while avoiding getting livelocked by
         * concurrent dirtiers.
+        *
+        * BTW the memcg stats are flushed periodically and this is best-effort
+        * estimation, so some potential error is ok.
         */
-       if (!nr) {
-               unsigned long filepages, headroom, dirty, writeback;
-
-               mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
-                                     &writeback);
-               nr = dirty * 10 / 8;
-       }
+       dirty = memcg_page_state(mem_cgroup_from_css(memcg_css), NR_FILE_DIRTY);
+       dirty = dirty * 10 / 8;
 
        /* issue the writeback work */
        work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
        if (work) {
-               work->nr_pages = nr;
+               work->nr_pages = dirty;
                work->sync_mode = WB_SYNC_NONE;
                work->range_cyclic = 1;
                work->reason = reason;
index 2479792..3403ec7 100644 (file)
@@ -955,6 +955,16 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
        local_irq_restore(flags);
 }
 
+static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+       long x = READ_ONCE(memcg->vmstats.state[idx]);
+#ifdef CONFIG_SMP
+       if (x < 0)
+               x = 0;
+#endif
+       return x;
+}
+
 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
                                              enum node_stat_item idx)
 {
@@ -1391,6 +1401,11 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
 {
 }
 
+static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+       return 0;
+}
+
 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
                                              enum node_stat_item idx)
 {
index cbaef09..aeda2c0 100644 (file)
@@ -218,7 +218,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
 void wbc_detach_inode(struct writeback_control *wbc);
 void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
                              size_t bytes);
-int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
                           enum wb_reason reason, struct wb_completion *done);
 void cgroup_writeback_umount(void);
 bool cleanup_offline_cgwb(struct bdi_writeback *wb);
index 702a81d..1047f02 100644 (file)
@@ -645,17 +645,6 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
        cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
 }
 
-/* idx can be of type enum memcg_stat_item or node_stat_item. */
-static unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
-{
-       long x = READ_ONCE(memcg->vmstats.state[idx]);
-#ifdef CONFIG_SMP
-       if (x < 0)
-               x = 0;
-#endif
-       return x;
-}
-
 /* idx can be of type enum memcg_stat_item or node_stat_item. */
 static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
 {
@@ -4668,7 +4657,7 @@ void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
                    atomic_read(&frn->done.cnt) == 1) {
                        frn->at = 0;
                        trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id);
-                       cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, 0,
+                       cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id,
                                               WB_REASON_FOREIGN_FLUSH,
                                               &frn->done);
                }