Merge tag 'seccomp-v5.9-rc1-fix1' of git://git.kernel.org/pub/scm/linux/kernel/git...

[linux-2.6-microblaze.git] / mm / util.c
diff --git a/mm/util.c b/mm/util.c

index cd62e6f..5ef378a 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -425,7 +425,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
   * @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped
   *
   * Assumes @task and @mm are valid (i.e. at least one reference on each), and
- * that mmap_sem is held as writer.
+ * that mmap_lock is held as writer.
   *
   * Return:
   * * 0       on success
@@ -437,7 +437,7 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
         unsigned long locked_vm, limit;
         int ret = 0;
  
-       lockdep_assert_held_write(&mm->mmap_sem);
+       mmap_assert_write_locked(mm);
  
         locked_vm = mm->locked_vm;
         if (inc) {
@@ -481,10 +481,10 @@ int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc)
         if (pages == 0 || !mm)
                 return 0;
  
-       down_write(&mm->mmap_sem);
+       mmap_write_lock(mm);
         ret = __account_locked_vm(mm, pages, inc, current,
                                   capable(CAP_IPC_LOCK));
-       up_write(&mm->mmap_sem);
+       mmap_write_unlock(mm);
  
         return ret;
  }
@@ -501,11 +501,11 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
  
         ret = security_mmap_file(file, prot, flag);
         if (!ret) {
-               if (down_write_killable(&mm->mmap_sem))
+               if (mmap_write_lock_killable(mm))
                         return -EINTR;
-               ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
-                                   &populate, &uf);
-               up_write(&mm->mmap_sem);
+               ret = do_mmap(file, addr, len, prot, flag, pgoff, &populate,
+                             &uf);
+               mmap_write_unlock(mm);
                 userfaultfd_unmap_complete(mm, &uf);
                 if (populate)
                         mm_populate(ret, populate);
@@ -746,6 +746,47 @@ int overcommit_ratio_handler(struct ctl_table *table, int write, void *buffer,
         return ret;
  }
  
+static void sync_overcommit_as(struct work_struct *dummy)
+{
+       percpu_counter_sync(&vm_committed_as);
+}
+
+int overcommit_policy_handler(struct ctl_table *table, int write, void *buffer,
+               size_t *lenp, loff_t *ppos)
+{
+       struct ctl_table t;
+       int new_policy;
+       int ret;
+
+       /*
+        * The deviation of sync_overcommit_as could be big with loose policy
+        * like OVERCOMMIT_ALWAYS/OVERCOMMIT_GUESS. When changing policy to
+        * strict OVERCOMMIT_NEVER, we need to reduce the deviation to comply
+        * with the strict "NEVER", and to avoid possible race condtion (even
+        * though user usually won't too frequently do the switching to policy
+        * OVERCOMMIT_NEVER), the switch is done in the following order:
+        *      1. changing the batch
+        *      2. sync percpu count on each CPU
+        *      3. switch the policy
+        */
+       if (write) {
+               t = *table;
+               t.data = &new_policy;
+               ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+               if (ret)
+                       return ret;
+
+               mm_compute_batch(new_policy);
+               if (new_policy == OVERCOMMIT_NEVER)
+                       schedule_on_each_cpu(sync_overcommit_as);
+               sysctl_overcommit_memory = new_policy;
+       } else {
+               ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       }
+
+       return ret;
+}
+
  int overcommit_kbytes_handler(struct ctl_table *table, int write, void *buffer,
                 size_t *lenp, loff_t *ppos)
  {
@@ -787,10 +828,15 @@ struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
   * balancing memory across competing virtual machines that are hosted.
   * Several metrics drive this policy engine including the guest reported
   * memory commitment.
+ *
+ * The time cost of this is very low for small platforms, and for big
+ * platform like a 2S/36C/72T Skylake server, in worst case where
+ * vm_committed_as's spinlock is under severe contention, the time cost
+ * could be about 30~40 microseconds.
   */
  unsigned long vm_memory_committed(void)
  {
-       return percpu_counter_read_positive(&vm_committed_as);
+       return percpu_counter_sum_positive(&vm_committed_as);
  }
  EXPORT_SYMBOL_GPL(vm_memory_committed);