mm: adjust vm_committed_as_batch according to vm overcommit policy

author Feng Tang <feng.tang@intel.com>

Fri, 7 Aug 2020 06:23:15 +0000 (23:23 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 7 Aug 2020 18:33:26 +0000 (11:33 -0700)
author Feng Tang <feng.tang@intel.com>
Fri, 7 Aug 2020 06:23:15 +0000 (23:23 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Aug 2020 18:33:26 +0000 (11:33 -0700)
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 2830f1c..1c34705 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -206,6 +206,8 @@ int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *,
                 loff_t *);
  int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
                 loff_t *);
+int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
+               loff_t *);
  
  #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
  
diff --git a/include/linux/mman.h b/include/linux/mman.h

index 4b08e9c..6f34c33 100644 (file)
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -57,8 +57,12 @@ extern struct percpu_counter vm_committed_as;
  
  #ifdef CONFIG_SMP
  extern s32 vm_committed_as_batch;
+extern void mm_compute_batch(int overcommit_policy);
  #else
  #define vm_committed_as_batch 0
+static inline void mm_compute_batch(int overcommit_policy)
+{
+}
  #endif
  
  unsigned long vm_memory_committed(void);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 1b4d2dc..f785de3 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2671,7 +2671,7 @@ static struct ctl_table vm_table[] = {
                 .data           = &sysctl_overcommit_memory,
                 .maxlen         = sizeof(sysctl_overcommit_memory),
                 .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
+               .proc_handler   = overcommit_policy_handler,
                 .extra1         = SYSCTL_ZERO,
                 .extra2         = &two,
         },
diff --git a/mm/mm_init.c b/mm/mm_init.c

index 435e5f7..b06a30f 100644 (file)
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -13,6 +13,7 @@
  #include <linux/memory.h>
  #include <linux/notifier.h>
  #include <linux/sched.h>
+#include <linux/mman.h>
  #include "internal.h"
  
  #ifdef CONFIG_DEBUG_MEMORY_INIT
@@ -144,14 +145,23 @@ EXPORT_SYMBOL_GPL(mm_kobj);
  #ifdef CONFIG_SMP
  s32 vm_committed_as_batch = 32;
  
-static void __meminit mm_compute_batch(void)
+void mm_compute_batch(int overcommit_policy)
  {
         u64 memsized_batch;
         s32 nr = num_present_cpus();
         s32 batch = max_t(s32, nr*2, 32);
-
-       /* batch size set to 0.4% of (total memory/#cpus), or max int32 */
-       memsized_batch = min_t(u64, (totalram_pages()/nr)/256, 0x7fffffff);
+       unsigned long ram_pages = totalram_pages();
+
+       /*
+        * For policy OVERCOMMIT_NEVER, set batch size to 0.4% of
+        * (total memory/#cpus), and lift it to 25% for other policies
+        * to easy the possible lock contention for percpu_counter
+        * vm_committed_as, while the max limit is INT_MAX
+        */
+       if (overcommit_policy == OVERCOMMIT_NEVER)
+               memsized_batch = min_t(u64, ram_pages/nr/256, INT_MAX);
+       else
+               memsized_batch = min_t(u64, ram_pages/nr/4, INT_MAX);
  
         vm_committed_as_batch = max_t(s32, memsized_batch, batch);
  }
@@ -162,7 +172,7 @@ static int __meminit mm_compute_batch_notifier(struct notifier_block *self,
         switch (action) {
         case MEM_ONLINE:
         case MEM_OFFLINE:
-               mm_compute_batch();
+               mm_compute_batch(sysctl_overcommit_memory);
         default:
                 break;
         }
@@ -176,7 +186,7 @@ static struct notifier_block compute_batch_nb __meminitdata = {
  
  static int __init mm_compute_batch_init(void)
  {
-       mm_compute_batch();
+       mm_compute_batch(sysctl_overcommit_memory);
         register_hotmemory_notifier(&compute_batch_nb);
  
         return 0;
diff --git a/mm/util.c b/mm/util.c

index 1c9d097..8d6280c 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -746,6 +746,47 @@ int overcommit_ratio_handler(struct ctl_table *table, int write, void *buffer,
         return ret;
  }
  
+static void sync_overcommit_as(struct work_struct *dummy)
+{
+       percpu_counter_sync(&vm_committed_as);
+}
+
+int overcommit_policy_handler(struct ctl_table *table, int write, void *buffer,
+               size_t *lenp, loff_t *ppos)
+{
+       struct ctl_table t;
+       int new_policy;
+       int ret;
+
+       /*
+        * The deviation of sync_overcommit_as could be big with loose policy
+        * like OVERCOMMIT_ALWAYS/OVERCOMMIT_GUESS. When changing policy to
+        * strict OVERCOMMIT_NEVER, we need to reduce the deviation to comply
+        * with the strict "NEVER", and to avoid possible race condtion (even
+        * though user usually won't too frequently do the switching to policy
+        * OVERCOMMIT_NEVER), the switch is done in the following order:
+        *      1. changing the batch
+        *      2. sync percpu count on each CPU
+        *      3. switch the policy
+        */
+       if (write) {
+               t = *table;
+               t.data = &new_policy;
+               ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+               if (ret)
+                       return ret;
+
+               mm_compute_batch(new_policy);
+               if (new_policy == OVERCOMMIT_NEVER)
+                       schedule_on_each_cpu(sync_overcommit_as);
+               sysctl_overcommit_memory = new_policy;
+       } else {
+               ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       }
+
+       return ret;
+}
+
  int overcommit_kbytes_handler(struct ctl_table *table, int write, void *buffer,
                 size_t *lenp, loff_t *ppos)
  {
author	Feng Tang <feng.tang@intel.com>
	Fri, 7 Aug 2020 06:23:15 +0000 (23:23 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 7 Aug 2020 18:33:26 +0000 (11:33 -0700)
include/linux/mm.h		patch \| blob \| history
include/linux/mman.h		patch \| blob \| history
kernel/sysctl.c		patch \| blob \| history
mm/mm_init.c		patch \| blob \| history
mm/util.c		patch \| blob \| history