Linux 6.9-rc1
[linux-2.6-microblaze.git] / fs / coredump.c
index 9f4aae2..be6403b 100644 (file)
@@ -68,7 +68,10 @@ struct core_name {
 
 static int expand_corename(struct core_name *cn, int size)
 {
-       char *corename = krealloc(cn->corename, size, GFP_KERNEL);
+       char *corename;
+
+       size = kmalloc_size_roundup(size);
+       corename = krealloc(cn->corename, size, GFP_KERNEL);
 
        if (!corename)
                return -ENOMEM;
@@ -76,7 +79,7 @@ static int expand_corename(struct core_name *cn, int size)
        if (size > core_name_size) /* racy but harmless */
                core_name_size = size;
 
-       cn->size = ksize(corename);
+       cn->size = size;
        cn->corename = corename;
        return 0;
 }
@@ -325,6 +328,10 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
                                err = cn_printf(cn, "%lu",
                                              rlimit(RLIMIT_CORE));
                                break;
+                       /* CPU the task ran on */
+                       case 'C':
+                               err = cn_printf(cn, "%d", cprm->cpu);
+                               break;
                        default:
                                break;
                        }
@@ -354,7 +361,7 @@ static int zap_process(struct task_struct *start, int exit_code)
        struct task_struct *t;
        int nr = 0;
 
-       /* ignore all signals except SIGKILL, see prepare_signal() */
+       /* Allow SIGKILL, see prepare_signal() */
        start->signal->flags = SIGNAL_GROUP_EXIT;
        start->signal->group_exit_code = exit_code;
        start->signal->group_stop_count = 0;
@@ -364,7 +371,9 @@ static int zap_process(struct task_struct *start, int exit_code)
                if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
                        sigaddset(&t->pending.signal, SIGKILL);
                        signal_wake_up(t, 1);
-                       nr++;
+                       /* The vhost_worker does not particpate in coredumps */
+                       if ((t->flags & (PF_USER_WORKER | PF_IO_WORKER)) != PF_USER_WORKER)
+                               nr++;
                }
        }
 
@@ -402,9 +411,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
        if (core_waiters > 0) {
                struct core_thread *ptr;
 
-               freezer_do_not_count();
-               wait_for_completion(&core_state->startup);
-               freezer_count();
+               wait_for_completion_state(&core_state->startup,
+                                         TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
                /*
                 * Wait for all the threads to become inactive, so that
                 * all the thread context (extended register state, like
@@ -412,7 +420,7 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
                 */
                ptr = core_state->dumper.next;
                while (ptr != NULL) {
-                       wait_task_inactive(ptr->task, 0);
+                       wait_task_inactive(ptr->task, TASK_ANY);
                        ptr = ptr->next;
                }
        }
@@ -526,7 +534,6 @@ void do_coredump(const kernel_siginfo_t *siginfo)
        static atomic_t core_dump_count = ATOMIC_INIT(0);
        struct coredump_params cprm = {
                .siginfo = siginfo,
-               .regs = signal_pt_regs(),
                .limit = rlimit(RLIMIT_CORE),
                /*
                 * We must use the same mm->flags while dumping core to avoid
@@ -535,6 +542,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
                 */
                .mm_flags = mm->flags,
                .vma_meta = NULL,
+               .cpu = raw_smp_processor_id(),
        };
 
        audit_core_dumps(siginfo->si_signo);
@@ -638,9 +646,9 @@ void do_coredump(const kernel_siginfo_t *siginfo)
                        goto close_fail;
                }
        } else {
-               struct user_namespace *mnt_userns;
+               struct mnt_idmap *idmap;
                struct inode *inode;
-               int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
+               int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW |
                                 O_LARGEFILE | O_EXCL;
 
                if (cprm.limit < binfmt->min_coredump)
@@ -716,9 +724,9 @@ void do_coredump(const kernel_siginfo_t *siginfo)
                 * a process dumps core while its cwd is e.g. on a vfat
                 * filesystem.
                 */
-               mnt_userns = file_mnt_user_ns(cprm.file);
-               if (!uid_eq(i_uid_into_mnt(mnt_userns, inode),
-                           current_fsuid())) {
+               idmap = file_mnt_idmap(cprm.file);
+               if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode),
+                                   current_fsuid())) {
                        pr_info_ratelimited("Core dump to %s aborted: cannot preserve file owner\n",
                                            cn.corename);
                        goto close_fail;
@@ -730,7 +738,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
                }
                if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
                        goto close_fail;
-               if (do_truncate(mnt_userns, cprm.file->f_path.dentry,
+               if (do_truncate(idmap, cprm.file->f_path.dentry,
                                0, 0, cprm.file))
                        goto close_fail;
        }
@@ -856,14 +864,80 @@ void dump_skip(struct coredump_params *cprm, size_t nr)
 EXPORT_SYMBOL(dump_skip);
 
 #ifdef CONFIG_ELF_CORE
+static int dump_emit_page(struct coredump_params *cprm, struct page *page)
+{
+       struct bio_vec bvec;
+       struct iov_iter iter;
+       struct file *file = cprm->file;
+       loff_t pos;
+       ssize_t n;
+
+       if (!page)
+               return 0;
+
+       if (cprm->to_skip) {
+               if (!__dump_skip(cprm, cprm->to_skip))
+                       return 0;
+               cprm->to_skip = 0;
+       }
+       if (cprm->written + PAGE_SIZE > cprm->limit)
+               return 0;
+       if (dump_interrupted())
+               return 0;
+       pos = file->f_pos;
+       bvec_set_page(&bvec, page, PAGE_SIZE, 0);
+       iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
+       n = __kernel_write_iter(cprm->file, &iter, &pos);
+       if (n != PAGE_SIZE)
+               return 0;
+       file->f_pos = pos;
+       cprm->written += PAGE_SIZE;
+       cprm->pos += PAGE_SIZE;
+
+       return 1;
+}
+
+/*
+ * If we might get machine checks from kernel accesses during the
+ * core dump, let's get those errors early rather than during the
+ * IO. This is not performance-critical enough to warrant having
+ * all the machine check logic in the iovec paths.
+ */
+#ifdef copy_mc_to_kernel
+
+#define dump_page_alloc() alloc_page(GFP_KERNEL)
+#define dump_page_free(x) __free_page(x)
+static struct page *dump_page_copy(struct page *src, struct page *dst)
+{
+       void *buf = kmap_local_page(src);
+       size_t left = copy_mc_to_kernel(page_address(dst), buf, PAGE_SIZE);
+       kunmap_local(buf);
+       return left ? NULL : dst;
+}
+
+#else
+
+/* We just want to return non-NULL; it's never used. */
+#define dump_page_alloc() ERR_PTR(-EINVAL)
+#define dump_page_free(x) ((void)(x))
+static inline struct page *dump_page_copy(struct page *src, struct page *dst)
+{
+       return src;
+}
+#endif
+
 int dump_user_range(struct coredump_params *cprm, unsigned long start,
                    unsigned long len)
 {
        unsigned long addr;
+       struct page *dump_page;
+
+       dump_page = dump_page_alloc();
+       if (!dump_page)
+               return 0;
 
        for (addr = start; addr < start + len; addr += PAGE_SIZE) {
                struct page *page;
-               int stop;
 
                /*
                 * To avoid having to allocate page tables for virtual address
@@ -874,17 +948,17 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
                 */
                page = get_dump_page(addr);
                if (page) {
-                       void *kaddr = kmap_local_page(page);
-
-                       stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
-                       kunmap_local(kaddr);
+                       int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page));
                        put_page(page);
-                       if (stop)
+                       if (stop) {
+                               dump_page_free(dump_page);
                                return 0;
+                       }
                } else {
                        dump_skip(cprm, PAGE_SIZE);
                }
        }
+       dump_page_free(dump_page);
        return 1;
 }
 #endif
@@ -946,7 +1020,6 @@ static struct ctl_table coredump_sysctls[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
-       { }
 };
 
 static int __init init_fs_coredump_sysctls(void)
@@ -1072,30 +1145,20 @@ whole:
        return vma->vm_end - vma->vm_start;
 }
 
-static struct vm_area_struct *first_vma(struct task_struct *tsk,
-                                       struct vm_area_struct *gate_vma)
-{
-       struct vm_area_struct *ret = tsk->mm->mmap;
-
-       if (ret)
-               return ret;
-       return gate_vma;
-}
-
 /*
  * Helper function for iterating across a vma list.  It ensures that the caller
  * will visit `gate_vma' prior to terminating the search.
  */
-static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
+static struct vm_area_struct *coredump_next_vma(struct vma_iterator *vmi,
+                                      struct vm_area_struct *vma,
                                       struct vm_area_struct *gate_vma)
 {
-       struct vm_area_struct *ret;
-
-       ret = this_vma->vm_next;
-       if (ret)
-               return ret;
-       if (this_vma == gate_vma)
+       if (gate_vma && (vma == gate_vma))
                return NULL;
+
+       vma = vma_next(vmi);
+       if (vma)
+               return vma;
        return gate_vma;
 }
 
@@ -1119,9 +1182,10 @@ static void free_vma_snapshot(struct coredump_params *cprm)
  */
 static bool dump_vma_snapshot(struct coredump_params *cprm)
 {
-       struct vm_area_struct *vma, *gate_vma;
+       struct vm_area_struct *gate_vma, *vma = NULL;
        struct mm_struct *mm = current->mm;
-       int i;
+       VMA_ITERATOR(vmi, mm, 0);
+       int i = 0;
 
        /*
         * Once the stack expansion code is fixed to not change VMA bounds
@@ -1141,8 +1205,7 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
                return false;
        }
 
-       for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
-                       vma = next_vma(vma, gate_vma), i++) {
+       while ((vma = coredump_next_vma(&vmi, vma, gate_vma)) != NULL) {
                struct core_vma_metadata *m = cprm->vma_meta + i;
 
                m->start = vma->vm_start;
@@ -1150,10 +1213,10 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
                m->flags = vma->vm_flags;
                m->dump_size = vma_dump_size(vma, cprm->mm_flags);
                m->pgoff = vma->vm_pgoff;
-
                m->file = vma->vm_file;
                if (m->file)
                        get_file(m->file);
+               i++;
        }
 
        mmap_write_unlock(mm);