mm: compaction: optimize proactive compaction deferrals
authorCharan Teja Reddy <charante@codeaurora.org>
Thu, 2 Sep 2021 21:59:56 +0000 (14:59 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Sep 2021 16:58:17 +0000 (09:58 -0700)
Vlastimil Babka figured out that when fragmentation score didn't go down
across the proactive compaction i.e.  when no progress is made, next wake
up for proactive compaction is deferred for 1 << COMPACT_MAX_DEFER_SHIFT,
i.e.  64 times, with each wakeup interval of
HPAGE_FRAG_CHECK_INTERVAL_MSEC(=500).  In each of this wakeup, it just
decrement 'proactive_defer' counter and goes sleep i.e.  it is getting
woken to just decrement a counter.

The same deferral time can also achieved by simply doing the
HPAGE_FRAG_CHECK_INTERVAL_MSEC << COMPACT_MAX_DEFER_SHIFT thus unnecessary
wakeup of kcompact thread is avoided thus also removes the need of
'proactive_defer' thread counter.

[akpm@linux-foundation.org: tweak comment]

Link: https://lore.kernel.org/linux-fsdevel/88abfdb6-2c13-b5a6-5b46-742d12d1c910@suse.cz/
Link: https://lkml.kernel.org/r/1626869599-25412-1-git-send-email-charante@codeaurora.org
Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Khalid Aziz <khalid.aziz@oracle.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Nitin Gupta <nigupta@nvidia.com>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/compaction.c

index 61fb64f..4ee0d40 100644 (file)
@@ -2885,7 +2885,8 @@ static int kcompactd(void *p)
 {
        pg_data_t *pgdat = (pg_data_t *)p;
        struct task_struct *tsk = current;
-       unsigned int proactive_defer = 0;
+       long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC);
+       long timeout = default_timeout;
 
        const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
 
@@ -2902,23 +2903,30 @@ static int kcompactd(void *p)
 
                trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
                if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
-                       kcompactd_work_requested(pgdat),
-                       msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC))) {
+                       kcompactd_work_requested(pgdat), timeout)) {
 
                        psi_memstall_enter(&pflags);
                        kcompactd_do_work(pgdat);
                        psi_memstall_leave(&pflags);
+                       /*
+                        * Reset the timeout value. The defer timeout from
+                        * proactive compaction is lost here but that is fine
+                        * as the condition of the zone changing substantionally
+                        * then carrying on with the previous defer interval is
+                        * not useful.
+                        */
+                       timeout = default_timeout;
                        continue;
                }
 
-               /* kcompactd wait timeout */
+               /*
+                * Start the proactive work with default timeout. Based
+                * on the fragmentation score, this timeout is updated.
+                */
+               timeout = default_timeout;
                if (should_proactive_compact_node(pgdat)) {
                        unsigned int prev_score, score;
 
-                       if (proactive_defer) {
-                               proactive_defer--;
-                               continue;
-                       }
                        prev_score = fragmentation_score_node(pgdat);
                        proactive_compact_node(pgdat);
                        score = fragmentation_score_node(pgdat);
@@ -2926,8 +2934,9 @@ static int kcompactd(void *p)
                         * Defer proactive compaction if the fragmentation
                         * score did not go down i.e. no progress made.
                         */
-                       proactive_defer = score < prev_score ?
-                                       0 : 1 << COMPACT_MAX_DEFER_SHIFT;
+                       if (unlikely(score >= prev_score))
+                               timeout =
+                                  default_timeout << COMPACT_MAX_DEFER_SHIFT;
                }
        }