Linux 6.9-rc1
[linux-2.6-microblaze.git] / kernel / kexec_file.c
index 1d546dc..2d1db05 100644 (file)
@@ -65,7 +65,7 @@ int kexec_image_probe_default(struct kimage *image, void *buf,
        return ret;
 }
 
-void *kexec_image_load_default(struct kimage *image)
+static void *kexec_image_load_default(struct kimage *image)
 {
        if (!image->fops || !image->fops->load)
                return ERR_PTR(-ENOEXEC);
@@ -123,6 +123,8 @@ void kimage_file_post_load_cleanup(struct kimage *image)
         */
        kfree(image->image_loader_data);
        image->image_loader_data = NULL;
+
+       kexec_file_dbg_print = false;
 }
 
 #ifdef CONFIG_KEXEC_SIG
@@ -202,6 +204,8 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
        if (ret < 0)
                return ret;
        image->kernel_buf_len = ret;
+       kexec_dprintk("kernel: %p kernel_size: %#lx\n",
+                     image->kernel_buf, image->kernel_buf_len);
 
        /* Call arch image probe handlers */
        ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
@@ -249,8 +253,8 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
        /* IMA needs to pass the measurement list to the next kernel. */
        ima_add_kexec_buffer(image);
 
-       /* Call arch image load handlers */
-       ldata = arch_kexec_kernel_image_load(image);
+       /* Call image load handler */
+       ldata = kexec_image_load_default(image);
 
        if (IS_ERR(ldata)) {
                ret = PTR_ERR(ldata);
@@ -278,13 +282,16 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
        if (!image)
                return -ENOMEM;
 
+       kexec_file_dbg_print = !!(flags & KEXEC_FILE_DEBUG);
        image->file_mode = 1;
 
+#ifdef CONFIG_CRASH_DUMP
        if (kexec_on_panic) {
                /* Enable special crash kernel control page alloc policy. */
                image->control_page = crashk_res.start;
                image->type = KEXEC_TYPE_CRASH;
        }
+#endif
 
        ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
                                           cmdline_ptr, cmdline_len, flags);
@@ -326,11 +333,13 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
                unsigned long, cmdline_len, const char __user *, cmdline_ptr,
                unsigned long, flags)
 {
-       int ret = 0, i;
+       int image_type = (flags & KEXEC_FILE_ON_CRASH) ?
+                        KEXEC_TYPE_CRASH : KEXEC_TYPE_DEFAULT;
        struct kimage **dest_image, *image;
+       int ret = 0, i;
 
        /* We only trust the superuser with rebooting the system. */
-       if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
+       if (!kexec_load_permitted(image_type))
                return -EPERM;
 
        /* Make sure we have a legal set of flags */
@@ -339,15 +348,17 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 
        image = NULL;
 
-       if (!mutex_trylock(&kexec_mutex))
+       if (!kexec_trylock())
                return -EBUSY;
 
-       dest_image = &kexec_image;
-       if (flags & KEXEC_FILE_ON_CRASH) {
+#ifdef CONFIG_CRASH_DUMP
+       if (image_type == KEXEC_TYPE_CRASH) {
                dest_image = &kexec_crash_image;
                if (kexec_crash_image)
                        arch_kexec_unprotect_crashkres();
-       }
+       } else
+#endif
+               dest_image = &kexec_image;
 
        if (flags & KEXEC_FILE_UNLOAD)
                goto exchange;
@@ -381,13 +392,14 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
        if (ret)
                goto out;
 
+       kexec_dprintk("nr_segments = %lu\n", image->nr_segments);
        for (i = 0; i < image->nr_segments; i++) {
                struct kexec_segment *ksegment;
 
                ksegment = &image->segment[i];
-               pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
-                        i, ksegment->buf, ksegment->bufsz, ksegment->mem,
-                        ksegment->memsz);
+               kexec_dprintk("segment[%d]: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
+                             i, ksegment->buf, ksegment->bufsz, ksegment->mem,
+                             ksegment->memsz);
 
                ret = kimage_load_segment(image, &image->segment[i]);
                if (ret)
@@ -400,6 +412,8 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
        if (ret)
                goto out;
 
+       kexec_dprintk("kexec_file_load: type:%u, start:0x%lx head:0x%lx flags:0x%lx\n",
+                     image->type, image->start, image->head, flags);
        /*
         * Free up any temporary buffers allocated which are not needed
         * after image has been loaded
@@ -408,10 +422,12 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 exchange:
        image = xchg(dest_image, image);
 out:
+#ifdef CONFIG_CRASH_DUMP
        if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
                arch_kexec_protect_crashkres();
+#endif
 
-       mutex_unlock(&kexec_mutex);
+       kexec_unlock();
        kimage_free(image);
        return ret;
 }
@@ -423,11 +439,11 @@ static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
        unsigned long temp_start, temp_end;
 
        temp_end = min(end, kbuf->buf_max);
-       temp_start = temp_end - kbuf->memsz;
+       temp_start = temp_end - kbuf->memsz + 1;
 
        do {
                /* align down start */
-               temp_start = temp_start & (~(kbuf->buf_align - 1));
+               temp_start = ALIGN_DOWN(temp_start, kbuf->buf_align);
 
                if (temp_start < start || temp_start < kbuf->buf_min)
                        return 0;
@@ -524,8 +540,10 @@ static int kexec_walk_memblock(struct kexec_buf *kbuf,
        phys_addr_t mstart, mend;
        struct resource res = { };
 
+#ifdef CONFIG_CRASH_DUMP
        if (kbuf->image->type == KEXEC_TYPE_CRASH)
                return func(&crashk_res, kbuf);
+#endif
 
        /*
         * Using MEMBLOCK_NONE will properly skip MEMBLOCK_DRIVER_MANAGED. See
@@ -584,11 +602,15 @@ static int kexec_walk_memblock(struct kexec_buf *kbuf,
 static int kexec_walk_resources(struct kexec_buf *kbuf,
                                int (*func)(struct resource *, void *))
 {
+#ifdef CONFIG_CRASH_DUMP
        if (kbuf->image->type == KEXEC_TYPE_CRASH)
                return walk_iomem_res_desc(crashk_res.desc,
                                           IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
                                           crashk_res.start, crashk_res.end,
                                           kbuf, func);
+#endif
+       if (kbuf->top_down)
+               return walk_system_ram_res_rev(0, ULONG_MAX, kbuf, func);
        else
                return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
 }
@@ -621,7 +643,7 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
  * kexec_add_buffer - place a buffer in a kexec segment
  * @kbuf:      Buffer contents and memory parameters.
  *
- * This function assumes that kexec_mutex is held.
+ * This function assumes that kexec_lock is held.
  * On successful return, @kbuf->mem will have the physical address of
  * the buffer in memory.
  *
@@ -682,7 +704,7 @@ static int kexec_calculate_store_digests(struct kimage *image)
        struct kexec_sha_region *sha_regions;
        struct purgatory_info *pi = &image->purgatory_info;
 
-       if (!IS_ENABLED(CONFIG_ARCH_HAS_KEXEC_PURGATORY))
+       if (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY))
                return 0;
 
        zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
@@ -723,6 +745,12 @@ static int kexec_calculate_store_digests(struct kimage *image)
        for (j = i = 0; i < image->nr_segments; i++) {
                struct kexec_segment *ksegment;
 
+#ifdef CONFIG_CRASH_HOTPLUG
+               /* Exclude elfcorehdr segment to allow future changes via hotplug */
+               if (j == image->elfcorehdr_index)
+                       continue;
+#endif
+
                ksegment = &image->segment[i];
                /*
                 * Skip purgatory as it will be modified once we put digest
@@ -787,7 +815,7 @@ out:
        return ret;
 }
 
-#ifdef CONFIG_ARCH_HAS_KEXEC_PURGATORY
+#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
 /*
  * kexec_purgatory_setup_kbuf - prepare buffer to load purgatory.
  * @pi:                Purgatory to be loaded.
@@ -864,6 +892,7 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
 {
        unsigned long bss_addr;
        unsigned long offset;
+       size_t sechdrs_size;
        Elf_Shdr *sechdrs;
        int i;
 
@@ -871,11 +900,11 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
         * The section headers in kexec_purgatory are read-only. In order to
         * have them modifiable make a temporary copy.
         */
-       sechdrs = vzalloc(array_size(sizeof(Elf_Shdr), pi->ehdr->e_shnum));
+       sechdrs_size = array_size(sizeof(Elf_Shdr), pi->ehdr->e_shnum);
+       sechdrs = vzalloc(sechdrs_size);
        if (!sechdrs)
                return -ENOMEM;
-       memcpy(sechdrs, (void *)pi->ehdr + pi->ehdr->e_shoff,
-              pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+       memcpy(sechdrs, (void *)pi->ehdr + pi->ehdr->e_shoff, sechdrs_size);
        pi->sechdrs = sechdrs;
 
        offset = 0;
@@ -898,10 +927,22 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
                }
 
                offset = ALIGN(offset, align);
+
+               /*
+                * Check if the segment contains the entry point, if so,
+                * calculate the value of image->start based on it.
+                * If the compiler has produced more than one .text section
+                * (Eg: .text.hot), they are generally after the main .text
+                * section, and they shall not be used to calculate
+                * image->start. So do not re-calculate image->start if it
+                * is not set to the initial value, and warn the user so they
+                * have a chance to fix their purgatory's linker script.
+                */
                if (sechdrs[i].sh_flags & SHF_EXECINSTR &&
                    pi->ehdr->e_entry >= sechdrs[i].sh_addr &&
                    pi->ehdr->e_entry < (sechdrs[i].sh_addr
-                                        + sechdrs[i].sh_size)) {
+                                        + sechdrs[i].sh_size) &&
+                   !WARN_ON(kbuf->image->start != pi->ehdr->e_entry)) {
                        kbuf->image->start -= sechdrs[i].sh_addr;
                        kbuf->image->start += kbuf->mem + offset;
                }
@@ -1134,185 +1175,4 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
 
        return 0;
 }
-#endif /* CONFIG_ARCH_HAS_KEXEC_PURGATORY */
-
-int crash_exclude_mem_range(struct crash_mem *mem,
-                           unsigned long long mstart, unsigned long long mend)
-{
-       int i, j;
-       unsigned long long start, end, p_start, p_end;
-       struct crash_mem_range temp_range = {0, 0};
-
-       for (i = 0; i < mem->nr_ranges; i++) {
-               start = mem->ranges[i].start;
-               end = mem->ranges[i].end;
-               p_start = mstart;
-               p_end = mend;
-
-               if (mstart > end || mend < start)
-                       continue;
-
-               /* Truncate any area outside of range */
-               if (mstart < start)
-                       p_start = start;
-               if (mend > end)
-                       p_end = end;
-
-               /* Found completely overlapping range */
-               if (p_start == start && p_end == end) {
-                       mem->ranges[i].start = 0;
-                       mem->ranges[i].end = 0;
-                       if (i < mem->nr_ranges - 1) {
-                               /* Shift rest of the ranges to left */
-                               for (j = i; j < mem->nr_ranges - 1; j++) {
-                                       mem->ranges[j].start =
-                                               mem->ranges[j+1].start;
-                                       mem->ranges[j].end =
-                                                       mem->ranges[j+1].end;
-                               }
-
-                               /*
-                                * Continue to check if there are another overlapping ranges
-                                * from the current position because of shifting the above
-                                * mem ranges.
-                                */
-                               i--;
-                               mem->nr_ranges--;
-                               continue;
-                       }
-                       mem->nr_ranges--;
-                       return 0;
-               }
-
-               if (p_start > start && p_end < end) {
-                       /* Split original range */
-                       mem->ranges[i].end = p_start - 1;
-                       temp_range.start = p_end + 1;
-                       temp_range.end = end;
-               } else if (p_start != start)
-                       mem->ranges[i].end = p_start - 1;
-               else
-                       mem->ranges[i].start = p_end + 1;
-               break;
-       }
-
-       /* If a split happened, add the split to array */
-       if (!temp_range.end)
-               return 0;
-
-       /* Split happened */
-       if (i == mem->max_nr_ranges - 1)
-               return -ENOMEM;
-
-       /* Location where new range should go */
-       j = i + 1;
-       if (j < mem->nr_ranges) {
-               /* Move over all ranges one slot towards the end */
-               for (i = mem->nr_ranges - 1; i >= j; i--)
-                       mem->ranges[i + 1] = mem->ranges[i];
-       }
-
-       mem->ranges[j].start = temp_range.start;
-       mem->ranges[j].end = temp_range.end;
-       mem->nr_ranges++;
-       return 0;
-}
-
-int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
-                         void **addr, unsigned long *sz)
-{
-       Elf64_Ehdr *ehdr;
-       Elf64_Phdr *phdr;
-       unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
-       unsigned char *buf;
-       unsigned int cpu, i;
-       unsigned long long notes_addr;
-       unsigned long mstart, mend;
-
-       /* extra phdr for vmcoreinfo ELF note */
-       nr_phdr = nr_cpus + 1;
-       nr_phdr += mem->nr_ranges;
-
-       /*
-        * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
-        * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64).
-        * I think this is required by tools like gdb. So same physical
-        * memory will be mapped in two ELF headers. One will contain kernel
-        * text virtual addresses and other will have __va(physical) addresses.
-        */
-
-       nr_phdr++;
-       elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
-       elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
-
-       buf = vzalloc(elf_sz);
-       if (!buf)
-               return -ENOMEM;
-
-       ehdr = (Elf64_Ehdr *)buf;
-       phdr = (Elf64_Phdr *)(ehdr + 1);
-       memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
-       ehdr->e_ident[EI_CLASS] = ELFCLASS64;
-       ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
-       ehdr->e_ident[EI_VERSION] = EV_CURRENT;
-       ehdr->e_ident[EI_OSABI] = ELF_OSABI;
-       memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
-       ehdr->e_type = ET_CORE;
-       ehdr->e_machine = ELF_ARCH;
-       ehdr->e_version = EV_CURRENT;
-       ehdr->e_phoff = sizeof(Elf64_Ehdr);
-       ehdr->e_ehsize = sizeof(Elf64_Ehdr);
-       ehdr->e_phentsize = sizeof(Elf64_Phdr);
-
-       /* Prepare one phdr of type PT_NOTE for each present CPU */
-       for_each_present_cpu(cpu) {
-               phdr->p_type = PT_NOTE;
-               notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
-               phdr->p_offset = phdr->p_paddr = notes_addr;
-               phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
-               (ehdr->e_phnum)++;
-               phdr++;
-       }
-
-       /* Prepare one PT_NOTE header for vmcoreinfo */
-       phdr->p_type = PT_NOTE;
-       phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
-       phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
-       (ehdr->e_phnum)++;
-       phdr++;
-
-       /* Prepare PT_LOAD type program header for kernel text region */
-       if (need_kernel_map) {
-               phdr->p_type = PT_LOAD;
-               phdr->p_flags = PF_R|PF_W|PF_X;
-               phdr->p_vaddr = (unsigned long) _text;
-               phdr->p_filesz = phdr->p_memsz = _end - _text;
-               phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
-               ehdr->e_phnum++;
-               phdr++;
-       }
-
-       /* Go through all the ranges in mem->ranges[] and prepare phdr */
-       for (i = 0; i < mem->nr_ranges; i++) {
-               mstart = mem->ranges[i].start;
-               mend = mem->ranges[i].end;
-
-               phdr->p_type = PT_LOAD;
-               phdr->p_flags = PF_R|PF_W|PF_X;
-               phdr->p_offset  = mstart;
-
-               phdr->p_paddr = mstart;
-               phdr->p_vaddr = (unsigned long) __va(mstart);
-               phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
-               phdr->p_align = 0;
-               ehdr->e_phnum++;
-               pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
-                       phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
-                       ehdr->e_phnum, phdr->p_offset);
-               phdr++;
-       }
-
-       *addr = buf;
-       *sz = elf_sz;
-       return 0;
-}
+#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */