Linux 6.9-rc1
[linux-2.6-microblaze.git] / mm / oom_kill.c
index 8a70bca..8d6a207 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/kthread.h>
 #include <linux/init.h>
 #include <linux/mmu_notifier.h>
+#include <linux/cred.h>
 
 #include <asm/tlb.h>
 #include "internal.h"
@@ -56,35 +57,6 @@ static int sysctl_panic_on_oom;
 static int sysctl_oom_kill_allocating_task;
 static int sysctl_oom_dump_tasks = 1;
 
-#ifdef CONFIG_SYSCTL
-static struct ctl_table vm_oom_kill_table[] = {
-       {
-               .procname       = "panic_on_oom",
-               .data           = &sysctl_panic_on_oom,
-               .maxlen         = sizeof(sysctl_panic_on_oom),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = SYSCTL_ZERO,
-               .extra2         = SYSCTL_TWO,
-       },
-       {
-               .procname       = "oom_kill_allocating_task",
-               .data           = &sysctl_oom_kill_allocating_task,
-               .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "oom_dump_tasks",
-               .data           = &sysctl_oom_dump_tasks,
-               .maxlen         = sizeof(sysctl_oom_dump_tasks),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {}
-};
-#endif
-
 /*
  * Serializes oom killer invocations (out_of_memory()) from all contexts to
  * prevent from over eager oom killing (e.g. when the oom killer is invoked
@@ -428,10 +400,11 @@ static int dump_task(struct task_struct *p, void *arg)
                return 0;
        }
 
-       pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
+       pr_info("[%7d] %5d %5d %8lu %8lu %8lu %8lu %9lu %8ld %8lu         %5hd %s\n",
                task->pid, from_kuid(&init_user_ns, task_uid(task)),
                task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
-               mm_pgtables_bytes(task->mm),
+               get_mm_counter(task->mm, MM_ANONPAGES), get_mm_counter(task->mm, MM_FILEPAGES),
+               get_mm_counter(task->mm, MM_SHMEMPAGES), mm_pgtables_bytes(task->mm),
                get_mm_counter(task->mm, MM_SWAPENTS),
                task->signal->oom_score_adj, task->comm);
        task_unlock(task);
@@ -452,7 +425,7 @@ static int dump_task(struct task_struct *p, void *arg)
 static void dump_tasks(struct oom_control *oc)
 {
        pr_info("Tasks state (memory values in pages):\n");
-       pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
+       pr_info("[  pid  ]   uid  tgid total_vm      rss rss_anon rss_file rss_shmem pgtables_bytes swapents oom_score_adj name\n");
 
        if (is_memcg_oom(oc))
                mem_cgroup_scan_tasks(oc->memcg, dump_task, oc);
@@ -466,7 +439,7 @@ static void dump_tasks(struct oom_control *oc)
        }
 }
 
-static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
+static void dump_oom_victim(struct oom_control *oc, struct task_struct *victim)
 {
        /* one line summary of the oom killer context. */
        pr_info("oom-kill:constraint=%s,nodemask=%*pbl",
@@ -478,7 +451,7 @@ static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
                from_kuid(&init_user_ns, task_uid(victim)));
 }
 
-static void dump_header(struct oom_control *oc, struct task_struct *p)
+static void dump_header(struct oom_control *oc)
 {
        pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
                current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
@@ -490,14 +463,12 @@ static void dump_header(struct oom_control *oc, struct task_struct *p)
        if (is_memcg_oom(oc))
                mem_cgroup_print_oom_meminfo(oc->memcg);
        else {
-               show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
+               __show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask, gfp_zone(oc->gfp_mask));
                if (should_dump_unreclaim_slab())
                        dump_unreclaimable_slab();
        }
        if (sysctl_oom_dump_tasks)
                dump_tasks(oc);
-       if (p)
-               dump_oom_summary(oc, p);
 }
 
 /*
@@ -508,8 +479,6 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
 
 static bool oom_killer_disabled __read_mostly;
 
-#define K(x) ((x) << (PAGE_SHIFT-10))
-
 /*
  * task->mm can be NULL if the task is the exited group leader.  So to
  * determine whether the task is using a particular mm, we examine all the
@@ -538,10 +507,11 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
 static struct task_struct *oom_reaper_list;
 static DEFINE_SPINLOCK(oom_reaper_lock);
 
-bool __oom_reap_task_mm(struct mm_struct *mm)
+static bool __oom_reap_task_mm(struct mm_struct *mm)
 {
        struct vm_area_struct *vma;
        bool ret = true;
+       VMA_ITERATOR(vmi, mm, 0);
 
        /*
         * Tell all users of get_user/copy_from_user etc... that the content
@@ -551,7 +521,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
         */
        set_bit(MMF_UNSTABLE, &mm->flags);
 
-       for (vma = mm->mmap ; vma; vma = vma->vm_next) {
+       for_each_vma(vmi, vma) {
                if (vma->vm_flags & (VM_HUGETLB|VM_PFNMAP))
                        continue;
 
@@ -570,7 +540,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
                        struct mmu_gather tlb;
 
                        mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0,
-                                               vma, mm, vma->vm_start,
+                                               mm, vma->vm_start,
                                                vma->vm_end);
                        tlb_gather_mmu(&tlb, mm);
                        if (mmu_notifier_invalidate_range_start_nonblock(&range)) {
@@ -729,6 +699,35 @@ static void queue_oom_reaper(struct task_struct *tsk)
        add_timer(&tsk->oom_reaper_timer);
 }
 
+#ifdef CONFIG_SYSCTL
+static struct ctl_table vm_oom_kill_table[] = {
+       {
+               .procname       = "panic_on_oom",
+               .data           = &sysctl_panic_on_oom,
+               .maxlen         = sizeof(sysctl_panic_on_oom),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_TWO,
+       },
+       {
+               .procname       = "oom_kill_allocating_task",
+               .data           = &sysctl_oom_kill_allocating_task,
+               .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "oom_dump_tasks",
+               .data           = &sysctl_oom_dump_tasks,
+               .maxlen         = sizeof(sysctl_oom_dump_tasks),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {}
+};
+#endif
+
 static int __init oom_init(void)
 {
        oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
@@ -756,6 +755,7 @@ static inline void queue_oom_reaper(struct task_struct *tsk)
  */
 static void mark_oom_victim(struct task_struct *tsk)
 {
+       const struct cred *cred;
        struct mm_struct *mm = tsk->mm;
 
        WARN_ON(oom_killer_disabled);
@@ -764,10 +764,8 @@ static void mark_oom_victim(struct task_struct *tsk)
                return;
 
        /* oom_mm is bound to the signal struct life time. */
-       if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
+       if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm))
                mmgrab(tsk->signal->oom_mm);
-               set_bit(MMF_OOM_VICTIM, &mm->flags);
-       }
 
        /*
         * Make sure that the task is woken up from uninterruptible sleep
@@ -777,7 +775,9 @@ static void mark_oom_victim(struct task_struct *tsk)
         */
        __thaw_task(tsk);
        atomic_inc(&oom_victims);
-       trace_mark_victim(tsk->pid);
+       cred = get_task_cred(tsk);
+       trace_mark_victim(tsk, cred->uid.val);
+       put_cred(cred);
 }
 
 /**
@@ -995,7 +995,6 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
        mmdrop(mm);
        put_task_struct(victim);
 }
-#undef K
 
 /*
  * Kill provided task unless it's secured by setting
@@ -1033,8 +1032,10 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
        }
        task_unlock(victim);
 
-       if (__ratelimit(&oom_rs))
-               dump_header(oc, victim);
+       if (__ratelimit(&oom_rs)) {
+               dump_header(oc);
+               dump_oom_victim(oc, victim);
+       }
 
        /*
         * Do we need to kill the entire memory cgroup?
@@ -1076,7 +1077,7 @@ static void check_panic_on_oom(struct oom_control *oc)
        /* Do not panic for oom kills triggered by sysrq */
        if (is_sysrq_oom(oc))
                return;
-       dump_header(oc, NULL);
+       dump_header(oc);
        panic("Out of memory: %s panic_on_oom is enabled\n",
                sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
 }
@@ -1131,12 +1132,10 @@ bool out_of_memory(struct oom_control *oc)
 
        /*
         * The OOM killer does not compensate for IO-less reclaim.
-        * pagefault_out_of_memory lost its gfp context so we have to
-        * make sure exclude 0 mask - all other users should have at least
-        * ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
-        * invoke the OOM killer even if it is a GFP_NOFS allocation.
+        * But mem_cgroup_oom() has to invoke the OOM killer even
+        * if it is a GFP_NOFS allocation.
         */
-       if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
+       if (!(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
                return true;
 
        /*
@@ -1161,7 +1160,7 @@ bool out_of_memory(struct oom_control *oc)
        select_bad_process(oc);
        /* Found nothing?!?! */
        if (!oc->chosen) {
-               dump_header(oc, NULL);
+               dump_header(oc);
                pr_warn("Out of memory and no killable processes...\n");
                /*
                 * If we got here due to an actual allocation at the