Linux 6.9-rc1

[linux-2.6-microblaze.git] / mm / oom_kill.c
diff --git a/mm/oom_kill.c b/mm/oom_kill.c

index 8a70bca..8d6a207 100644 (file)
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -44,6 +44,7 @@
  #include <linux/kthread.h>
  #include <linux/init.h>
  #include <linux/mmu_notifier.h>
+#include <linux/cred.h>
  
  #include <asm/tlb.h>
  #include "internal.h"
@@ -56,35 +57,6 @@ static int sysctl_panic_on_oom;
  static int sysctl_oom_kill_allocating_task;
  static int sysctl_oom_dump_tasks = 1;
  
-#ifdef CONFIG_SYSCTL
-static struct ctl_table vm_oom_kill_table[] = {
-       {
-               .procname       = "panic_on_oom",
-               .data           = &sysctl_panic_on_oom,
-               .maxlen         = sizeof(sysctl_panic_on_oom),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = SYSCTL_ZERO,
-               .extra2         = SYSCTL_TWO,
-       },
-       {
-               .procname       = "oom_kill_allocating_task",
-               .data           = &sysctl_oom_kill_allocating_task,
-               .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "oom_dump_tasks",
-               .data           = &sysctl_oom_dump_tasks,
-               .maxlen         = sizeof(sysctl_oom_dump_tasks),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {}
-};
-#endif
-
  /*
   * Serializes oom killer invocations (out_of_memory()) from all contexts to
   * prevent from over eager oom killing (e.g. when the oom killer is invoked
@@ -428,10 +400,11 @@ static int dump_task(struct task_struct *p, void *arg)
                 return 0;
         }
  
-       pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
+       pr_info("[%7d] %5d %5d %8lu %8lu %8lu %8lu %9lu %8ld %8lu         %5hd %s\n",
                 task->pid, from_kuid(&init_user_ns, task_uid(task)),
                 task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
-               mm_pgtables_bytes(task->mm),
+               get_mm_counter(task->mm, MM_ANONPAGES), get_mm_counter(task->mm, MM_FILEPAGES),
+               get_mm_counter(task->mm, MM_SHMEMPAGES), mm_pgtables_bytes(task->mm),
                 get_mm_counter(task->mm, MM_SWAPENTS),
                 task->signal->oom_score_adj, task->comm);
         task_unlock(task);
@@ -452,7 +425,7 @@ static int dump_task(struct task_struct *p, void *arg)
  static void dump_tasks(struct oom_control *oc)
  {
         pr_info("Tasks state (memory values in pages):\n");
-       pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
+       pr_info("[  pid  ]   uid  tgid total_vm      rss rss_anon rss_file rss_shmem pgtables_bytes swapents oom_score_adj name\n");
  
         if (is_memcg_oom(oc))
                 mem_cgroup_scan_tasks(oc->memcg, dump_task, oc);
@@ -466,7 +439,7 @@ static void dump_tasks(struct oom_control *oc)
         }
  }
  
-static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
+static void dump_oom_victim(struct oom_control *oc, struct task_struct *victim)
  {
         /* one line summary of the oom killer context. */
         pr_info("oom-kill:constraint=%s,nodemask=%*pbl",
@@ -478,7 +451,7 @@ static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
                 from_kuid(&init_user_ns, task_uid(victim)));
  }
  
-static void dump_header(struct oom_control *oc, struct task_struct *p)
+static void dump_header(struct oom_control *oc)
  {
         pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
                 current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
@@ -490,14 +463,12 @@ static void dump_header(struct oom_control *oc, struct task_struct *p)
         if (is_memcg_oom(oc))
                 mem_cgroup_print_oom_meminfo(oc->memcg);
         else {
-               show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
+               __show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask, gfp_zone(oc->gfp_mask));
                 if (should_dump_unreclaim_slab())
                         dump_unreclaimable_slab();
         }
         if (sysctl_oom_dump_tasks)
                 dump_tasks(oc);
-       if (p)
-               dump_oom_summary(oc, p);
  }
  
  /*
@@ -508,8 +479,6 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
  
  static bool oom_killer_disabled __read_mostly;
  
-#define K(x) ((x) << (PAGE_SHIFT-10))
-
  /*
   * task->mm can be NULL if the task is the exited group leader.  So to
   * determine whether the task is using a particular mm, we examine all the
@@ -538,10 +507,11 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
  static struct task_struct *oom_reaper_list;
  static DEFINE_SPINLOCK(oom_reaper_lock);
  
-bool __oom_reap_task_mm(struct mm_struct *mm)
+static bool __oom_reap_task_mm(struct mm_struct *mm)
  {
         struct vm_area_struct *vma;
         bool ret = true;
+       VMA_ITERATOR(vmi, mm, 0);
  
         /*
          * Tell all users of get_user/copy_from_user etc... that the content
@@ -551,7 +521,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
          */
         set_bit(MMF_UNSTABLE, &mm->flags);
  
-       for (vma = mm->mmap ; vma; vma = vma->vm_next) {
+       for_each_vma(vmi, vma) {
                 if (vma->vm_flags & (VM_HUGETLB|VM_PFNMAP))
                         continue;
  
@@ -570,7 +540,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
                         struct mmu_gather tlb;
  
                         mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0,
-                                               vma, mm, vma->vm_start,
+                                               mm, vma->vm_start,
                                                 vma->vm_end);
                         tlb_gather_mmu(&tlb, mm);
                         if (mmu_notifier_invalidate_range_start_nonblock(&range)) {
@@ -729,6 +699,35 @@ static void queue_oom_reaper(struct task_struct *tsk)
         add_timer(&tsk->oom_reaper_timer);
  }
  
+#ifdef CONFIG_SYSCTL
+static struct ctl_table vm_oom_kill_table[] = {
+       {
+               .procname       = "panic_on_oom",
+               .data           = &sysctl_panic_on_oom,
+               .maxlen         = sizeof(sysctl_panic_on_oom),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_TWO,
+       },
+       {
+               .procname       = "oom_kill_allocating_task",
+               .data           = &sysctl_oom_kill_allocating_task,
+               .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "oom_dump_tasks",
+               .data           = &sysctl_oom_dump_tasks,
+               .maxlen         = sizeof(sysctl_oom_dump_tasks),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {}
+};
+#endif
+
  static int __init oom_init(void)
  {
         oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
@@ -756,6 +755,7 @@ static inline void queue_oom_reaper(struct task_struct *tsk)
   */
  static void mark_oom_victim(struct task_struct *tsk)
  {
+       const struct cred *cred;
         struct mm_struct *mm = tsk->mm;
  
         WARN_ON(oom_killer_disabled);
@@ -764,10 +764,8 @@ static void mark_oom_victim(struct task_struct *tsk)
                 return;
  
         /* oom_mm is bound to the signal struct life time. */
-       if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
+       if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm))
                 mmgrab(tsk->signal->oom_mm);
-               set_bit(MMF_OOM_VICTIM, &mm->flags);
-       }
  
         /*
          * Make sure that the task is woken up from uninterruptible sleep
@@ -777,7 +775,9 @@ static void mark_oom_victim(struct task_struct *tsk)
          */
         __thaw_task(tsk);
         atomic_inc(&oom_victims);
-       trace_mark_victim(tsk->pid);
+       cred = get_task_cred(tsk);
+       trace_mark_victim(tsk, cred->uid.val);
+       put_cred(cred);
  }
  
  /**
@@ -995,7 +995,6 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
         mmdrop(mm);
         put_task_struct(victim);
  }
-#undef K
  
  /*
   * Kill provided task unless it's secured by setting
@@ -1033,8 +1032,10 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
         }
         task_unlock(victim);
  
-       if (__ratelimit(&oom_rs))
-               dump_header(oc, victim);
+       if (__ratelimit(&oom_rs)) {
+               dump_header(oc);
+               dump_oom_victim(oc, victim);
+       }
  
         /*
          * Do we need to kill the entire memory cgroup?
@@ -1076,7 +1077,7 @@ static void check_panic_on_oom(struct oom_control *oc)
         /* Do not panic for oom kills triggered by sysrq */
         if (is_sysrq_oom(oc))
                 return;
-       dump_header(oc, NULL);
+       dump_header(oc);
         panic("Out of memory: %s panic_on_oom is enabled\n",
                 sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
  }
@@ -1131,12 +1132,10 @@ bool out_of_memory(struct oom_control *oc)
  
         /*
          * The OOM killer does not compensate for IO-less reclaim.
-        * pagefault_out_of_memory lost its gfp context so we have to
-        * make sure exclude 0 mask - all other users should have at least
-        * ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
-        * invoke the OOM killer even if it is a GFP_NOFS allocation.
+        * But mem_cgroup_oom() has to invoke the OOM killer even
+        * if it is a GFP_NOFS allocation.
          */
-       if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
+       if (!(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
                 return true;
  
         /*
@@ -1161,7 +1160,7 @@ bool out_of_memory(struct oom_control *oc)
         select_bad_process(oc);
         /* Found nothing?!?! */
         if (!oc->chosen) {
-               dump_header(oc, NULL);
+               dump_header(oc);
                 pr_warn("Out of memory and no killable processes...\n");
                 /*
                  * If we got here due to an actual allocation at the