Merge tag 'riscv-for-linus-5.15-mw0' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 5 Sep 2021 18:31:23 +0000 (11:31 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 5 Sep 2021 18:31:23 +0000 (11:31 -0700)
Pull RISC-V updates from Palmer Dabbelt:

 - support PC-relative instructions (auipc and branches) in kprobes

 - support for forced IRQ threading

 - support for the hlt/nohlt kernel command line options, via the
   generic idle loop

 - show the edge/level triggered behavior of interrupts
   in /proc/interrupts

 - a handful of cleanups to our address mapping mechanisms

 - support for allocating gigantic hugepages via CMA

 - support for the undefined behavior sanitizer (UBSAN)

 - a handful of cleanups to the VDSO that allow the kernel to build with
   LLD.

 - support for hugepage migration

* tag 'riscv-for-linus-5.15-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (21 commits)
  riscv: add support for hugepage migration
  RISC-V: Fix VDSO build for !MMU
  riscv: use strscpy to replace strlcpy
  riscv: explicitly use symbol offsets for VDSO
  riscv: Enable Undefined Behavior Sanitizer UBSAN
  riscv: Keep the riscv Kconfig selects sorted
  riscv: Support allocating gigantic hugepages using CMA
  riscv: fix the global name pfn_base confliction error
  riscv: Move early fdt mapping creation in its own function
  riscv: Simplify BUILTIN_DTB device tree mapping handling
  riscv: Use __maybe_unused instead of #ifdefs around variable declarations
  riscv: Get rid of map_size parameter to create_kernel_page_table
  riscv: Introduce va_kernel_pa_offset for 32-bit kernel
  riscv: Optimize kernel virtual address conversion macro
  dt-bindings: riscv: add starfive jh7100 bindings
  riscv: Enable GENERIC_IRQ_SHOW_LEVEL
  riscv: Enable idle generic idle loop
  riscv: Allow forced irq threading
  riscv: Implement thread_struct whitelist for hardened usercopy
  riscv: kprobes: implement the branch instructions
  ...

13 files changed:
Documentation/devicetree/bindings/riscv/starfive.yaml [new file with mode: 0644]
arch/riscv/Kconfig
arch/riscv/Makefile
arch/riscv/include/asm/page.h
arch/riscv/include/asm/processor.h
arch/riscv/include/asm/vdso.h
arch/riscv/kernel/probes/decode-insn.c
arch/riscv/kernel/probes/simulate-insn.c
arch/riscv/kernel/setup.c
arch/riscv/kernel/vdso/Makefile
arch/riscv/kernel/vdso/gen_vdso_offsets.sh [new file with mode: 0755]
arch/riscv/kernel/vdso/so2s.sh [deleted file]
arch/riscv/mm/init.c

diff --git a/Documentation/devicetree/bindings/riscv/starfive.yaml b/Documentation/devicetree/bindings/riscv/starfive.yaml
new file mode 100644 (file)
index 0000000..5b36243
--- /dev/null
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/riscv/starfive.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive SoC-based boards
+
+maintainers:
+  - Michael Zhu <michael.zhu@starfivetech.com>
+  - Drew Fustini <drew@beagleboard.org>
+
+description:
+  StarFive SoC-based boards
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+      - items:
+          - const: beagle,beaglev-starlight-jh7100-r0
+          - const: starfive,jh7100
+
+additionalProperties: true
+
+...
index 1452584..baf60fc 100644 (file)
@@ -13,9 +13,7 @@ config 32BIT
 config RISCV
        def_bool y
        select ARCH_CLOCKSOURCE_INIT
-       select ARCH_SUPPORTS_ATOMIC_RMW
-       select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
-       select ARCH_STACKWALK
+       select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
        select ARCH_HAS_BINFMT_FLAT
        select ARCH_HAS_DEBUG_VM_PGTABLE
        select ARCH_HAS_DEBUG_VIRTUAL if MMU
@@ -31,8 +29,12 @@ config RISCV
        select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
        select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+       select ARCH_HAS_UBSAN_SANITIZE_ALL
        select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
        select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+       select ARCH_STACKWALK
+       select ARCH_SUPPORTS_ATOMIC_RMW
+       select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
        select ARCH_SUPPORTS_HUGETLBFS if MMU
        select ARCH_USE_MEMTEST
        select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
@@ -48,9 +50,11 @@ config RISCV
        select GENERIC_CLOCKEVENTS_BROADCAST if SMP
        select GENERIC_EARLY_IOREMAP
        select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
+       select GENERIC_IDLE_POLL_SETUP
        select GENERIC_IOREMAP
        select GENERIC_IRQ_MULTI_HANDLER
        select GENERIC_IRQ_SHOW
+       select GENERIC_IRQ_SHOW_LEVEL
        select GENERIC_LIB_DEVMEM_IS_ALLOWED
        select GENERIC_PCI_IOMAP
        select GENERIC_PTDUMP if MMU
@@ -70,6 +74,7 @@ config RISCV
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
+       select HAVE_ARCH_THREAD_STRUCT_WHITELIST
        select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
        select HAVE_ASM_MODVERSIONS
        select HAVE_CONTEXT_TRACKING
@@ -95,6 +100,7 @@ config RISCV
        select HAVE_STACKPROTECTOR
        select HAVE_SYSCALL_TRACEPOINTS
        select IRQ_DOMAIN
+       select IRQ_FORCED_THREADING
        select MODULES_USE_ELF_RELA if MODULES
        select MODULE_SECTIONS if MODULES
        select OF
index dcfbd2a..01906a9 100644 (file)
@@ -108,6 +108,12 @@ PHONY += vdso_install
 vdso_install:
        $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
 
+ifeq ($(CONFIG_MMU),y)
+prepare: vdso_prepare
+vdso_prepare: prepare0
+       $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h
+endif
+
 ifneq ($(CONFIG_XIP_KERNEL),y)
 ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
 KBUILD_IMAGE := $(boot)/loader.bin
index b0ca505..109c97e 100644 (file)
@@ -79,8 +79,8 @@ typedef struct page *pgtable_t;
 #endif
 
 #ifdef CONFIG_MMU
-extern unsigned long pfn_base;
-#define ARCH_PFN_OFFSET                (pfn_base)
+extern unsigned long riscv_pfn_base;
+#define ARCH_PFN_OFFSET                (riscv_pfn_base)
 #else
 #define ARCH_PFN_OFFSET                (PAGE_OFFSET >> PAGE_SHIFT)
 #endif /* CONFIG_MMU */
@@ -91,10 +91,8 @@ struct kernel_mapping {
        uintptr_t size;
        /* Offset between linear mapping virtual address and kernel load address */
        unsigned long va_pa_offset;
-#ifdef CONFIG_64BIT
        /* Offset between kernel mapping virtual address and kernel load address */
        unsigned long va_kernel_pa_offset;
-#endif
        unsigned long va_kernel_xip_pa_offset;
 #ifdef CONFIG_XIP_KERNEL
        uintptr_t xiprom;
@@ -105,11 +103,11 @@ struct kernel_mapping {
 extern struct kernel_mapping kernel_map;
 extern phys_addr_t phys_ram_base;
 
-#ifdef CONFIG_64BIT
 #define is_kernel_mapping(x)   \
        ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
+
 #define is_linear_mapping(x)   \
-       ((x) >= PAGE_OFFSET && (x) < kernel_map.virt_addr)
+       ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < kernel_map.virt_addr))
 
 #define linear_mapping_pa_to_va(x)     ((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
 #define kernel_mapping_pa_to_va(y)     ({                                              \
@@ -123,7 +121,7 @@ extern phys_addr_t phys_ram_base;
 #define linear_mapping_va_to_pa(x)     ((unsigned long)(x) - kernel_map.va_pa_offset)
 #define kernel_mapping_va_to_pa(y) ({                                          \
        unsigned long _y = y;                                                   \
-       (_y < kernel_map.virt_addr + XIP_OFFSET) ?                                      \
+       (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ?     \
                ((unsigned long)(_y) - kernel_map.va_kernel_xip_pa_offset) :            \
                ((unsigned long)(_y) - kernel_map.va_kernel_pa_offset - XIP_OFFSET);    \
        })
@@ -133,15 +131,6 @@ extern phys_addr_t phys_ram_base;
        is_linear_mapping(_x) ?                                                 \
                linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x);      \
        })
-#else
-#define is_kernel_mapping(x)   \
-       ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
-#define is_linear_mapping(x)   \
-       ((x) >= PAGE_OFFSET)
-
-#define __pa_to_va_nodebug(x)  ((void *)((unsigned long) (x) + kernel_map.va_pa_offset))
-#define __va_to_pa_nodebug(x)  ((unsigned long)(x) - kernel_map.va_pa_offset)
-#endif /* CONFIG_64BIT */
 
 #ifdef CONFIG_DEBUG_VIRTUAL
 extern phys_addr_t __virt_to_phys(unsigned long x);
index 021ed64..46b492c 100644 (file)
@@ -37,6 +37,14 @@ struct thread_struct {
        unsigned long bad_cause;
 };
 
+/* Whitelist the fstate from the task_struct for hardened usercopy */
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+                                               unsigned long *size)
+{
+       *offset = offsetof(struct thread_struct, fstate);
+       *size = sizeof_field(struct thread_struct, fstate);
+}
+
 #define INIT_THREAD {                                  \
        .sp = sizeof(init_stack) + (long)&init_stack,   \
 }
index 1453a2f..893e471 100644 (file)
@@ -8,26 +8,25 @@
 #ifndef _ASM_RISCV_VDSO_H
 #define _ASM_RISCV_VDSO_H
 
+
+/*
+ * All systems with an MMU have a VDSO, but systems without an MMU don't
+ * support shared libraries and therefor don't have one.
+ */
+#ifdef CONFIG_MMU
+
 #include <linux/types.h>
+#include <generated/vdso-offsets.h>
 
 #ifndef CONFIG_GENERIC_TIME_VSYSCALL
 struct vdso_data {
 };
 #endif
 
-/*
- * The VDSO symbols are mapped into Linux so we can just use regular symbol
- * addressing to get their offsets in userspace.  The symbols are mapped at an
- * offset of 0, but since the linker must support setting weak undefined
- * symbols to the absolute address 0 it also happens to support other low
- * addresses even when the code model suggests those low addresses would not
- * otherwise be availiable.
- */
 #define VDSO_SYMBOL(base, name)                                                        \
-({                                                                             \
-       extern const char __vdso_##name[];                                      \
-       (void __user *)((unsigned long)(base) + __vdso_##name);                 \
-})
+       (void __user *)((unsigned long)(base) + __vdso_##name##_offset)
+
+#endif /* CONFIG_MMU */
 
 asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
 
index 0ed043a..64f6183 100644 (file)
@@ -38,11 +38,10 @@ riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api)
        RISCV_INSN_REJECTED(c_ebreak,           insn);
 #endif
 
-       RISCV_INSN_REJECTED(auipc,              insn);
-       RISCV_INSN_REJECTED(branch,             insn);
-
        RISCV_INSN_SET_SIMULATE(jal,            insn);
        RISCV_INSN_SET_SIMULATE(jalr,           insn);
+       RISCV_INSN_SET_SIMULATE(auipc,          insn);
+       RISCV_INSN_SET_SIMULATE(branch,         insn);
 
        return INSN_GOOD;
 }
index 2519ce2..d73e96f 100644 (file)
@@ -83,3 +83,115 @@ bool __kprobes simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *reg
 
        return ret;
 }
+
+#define auipc_rd_idx(opcode) \
+       ((opcode >> 7) & 0x1f)
+
+#define auipc_imm(opcode) \
+       ((((opcode) >> 12) & 0xfffff) << 12)
+
+#if __riscv_xlen == 64
+#define auipc_offset(opcode)   sign_extend64(auipc_imm(opcode), 31)
+#elif __riscv_xlen == 32
+#define auipc_offset(opcode)   auipc_imm(opcode)
+#else
+#error "Unexpected __riscv_xlen"
+#endif
+
+bool __kprobes simulate_auipc(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+       /*
+        * auipc instruction:
+        *  31        12 11 7 6      0
+        * | imm[31:12] | rd | opcode |
+        *        20       5     7
+        */
+
+       u32 rd_idx = auipc_rd_idx(opcode);
+       unsigned long rd_val = addr + auipc_offset(opcode);
+
+       if (!rv_insn_reg_set_val(regs, rd_idx, rd_val))
+               return false;
+
+       instruction_pointer_set(regs, addr + 4);
+
+       return true;
+}
+
+#define branch_rs1_idx(opcode) \
+       (((opcode) >> 15) & 0x1f)
+
+#define branch_rs2_idx(opcode) \
+       (((opcode) >> 20) & 0x1f)
+
+#define branch_funct3(opcode) \
+       (((opcode) >> 12) & 0x7)
+
+#define branch_imm(opcode) \
+       (((((opcode) >>  8) & 0xf ) <<  1) | \
+        ((((opcode) >> 25) & 0x3f) <<  5) | \
+        ((((opcode) >>  7) & 0x1 ) << 11) | \
+        ((((opcode) >> 31) & 0x1 ) << 12))
+
+#define branch_offset(opcode) \
+       sign_extend32((branch_imm(opcode)), 12)
+
+#define BRANCH_BEQ     0x0
+#define BRANCH_BNE     0x1
+#define BRANCH_BLT     0x4
+#define BRANCH_BGE     0x5
+#define BRANCH_BLTU    0x6
+#define BRANCH_BGEU    0x7
+
+bool __kprobes simulate_branch(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+       /*
+        * branch instructions:
+        *      31    30       25 24 20 19 15 14    12 11       8    7      6      0
+        * | imm[12] | imm[10:5] | rs2 | rs1 | funct3 | imm[4:1] | imm[11] | opcode |
+        *     1           6        5     5      3         4         1         7
+        *     imm[12|10:5]        rs2   rs1    000       imm[4:1|11]       1100011  BEQ
+        *     imm[12|10:5]        rs2   rs1    001       imm[4:1|11]       1100011  BNE
+        *     imm[12|10:5]        rs2   rs1    100       imm[4:1|11]       1100011  BLT
+        *     imm[12|10:5]        rs2   rs1    101       imm[4:1|11]       1100011  BGE
+        *     imm[12|10:5]        rs2   rs1    110       imm[4:1|11]       1100011  BLTU
+        *     imm[12|10:5]        rs2   rs1    111       imm[4:1|11]       1100011  BGEU
+        */
+
+       s32 offset;
+       s32 offset_tmp;
+       unsigned long rs1_val;
+       unsigned long rs2_val;
+
+       if (!rv_insn_reg_get_val(regs, branch_rs1_idx(opcode), &rs1_val) ||
+           !rv_insn_reg_get_val(regs, branch_rs2_idx(opcode), &rs2_val))
+               return false;
+
+       offset_tmp = branch_offset(opcode);
+       switch (branch_funct3(opcode)) {
+       case BRANCH_BEQ:
+               offset = (rs1_val == rs2_val) ? offset_tmp : 4;
+               break;
+       case BRANCH_BNE:
+               offset = (rs1_val != rs2_val) ? offset_tmp : 4;
+               break;
+       case BRANCH_BLT:
+               offset = ((long)rs1_val < (long)rs2_val) ? offset_tmp : 4;
+               break;
+       case BRANCH_BGE:
+               offset = ((long)rs1_val >= (long)rs2_val) ? offset_tmp : 4;
+               break;
+       case BRANCH_BLTU:
+               offset = (rs1_val < rs2_val) ? offset_tmp : 4;
+               break;
+       case BRANCH_BGEU:
+               offset = (rs1_val >= rs2_val) ? offset_tmp : 4;
+               break;
+       default:
+               return false;
+       }
+
+       instruction_pointer_set(regs, addr + offset);
+
+       return true;
+}
index 120b2f6..b9620e5 100644 (file)
@@ -255,7 +255,7 @@ static void __init parse_dtb(void)
 
        pr_err("No DTB passed to the kernel\n");
 #ifdef CONFIG_CMDLINE_FORCE
-       strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+       strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
        pr_info("Forcing kernel command line to: %s\n", boot_command_line);
 #endif
 }
index 24d936c..f2e0656 100644 (file)
@@ -23,10 +23,10 @@ ifneq ($(c-gettimeofday-y),)
 endif
 
 # Build rules
-targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-syms.S
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds
 obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
 
-obj-y += vdso.o vdso-syms.o
+obj-y += vdso.o
 CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 
 # Disable -pg to prevent insert call site
@@ -36,6 +36,7 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
 GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
 KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
 
 # Force dependency
 $(obj)/vdso.o: $(obj)/vdso.so
@@ -43,20 +44,22 @@ $(obj)/vdso.o: $(obj)/vdso.so
 # link rule for the .so file, .lds has to be first
 $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
        $(call if_changed,vdsold)
-LDFLAGS_vdso.so.dbg = -shared -s -soname=linux-vdso.so.1 \
+LDFLAGS_vdso.so.dbg = -shared -S -soname=linux-vdso.so.1 \
        --build-id=sha1 --hash-style=both --eh-frame-hdr
 
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO. With ld --just-symbols we can then
-# refer to these symbols in the kernel code rather than hand-coded addresses.
-$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE
-       $(call if_changed,so2s)
-
 # strip rule for the .so file
 $(obj)/%.so: OBJCOPYFLAGS := -S
 $(obj)/%.so: $(obj)/%.so.dbg FORCE
        $(call if_changed,objcopy)
 
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+       cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
+       $(call if_changed,vdsosym)
+
 # actual build commands
 # The DSO images are built using a special linker script
 # Make sure only to export the intended __vdso_xxx symbol offsets.
@@ -65,11 +68,6 @@ quiet_cmd_vdsold = VDSOLD  $@
                    $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
                    rm $@.tmp
 
-# Extracts symbol offsets from the VDSO, converting them into an assembly file
-# that contains the same symbols at the same offsets.
-quiet_cmd_so2s = SO2S    $@
-      cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@
-
 # install commands for the unstripped file
 quiet_cmd_vdso_install = INSTALL $@
       cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
diff --git a/arch/riscv/kernel/vdso/gen_vdso_offsets.sh b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh
new file mode 100755 (executable)
index 0000000..c2e5613
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+LC_ALL=C
+sed -n -e 's/^[0]\+\(0[0-9a-fA-F]*\) . \(__vdso_[a-zA-Z0-9_]*\)$/\#define \2_offset\t0x\1/p'
diff --git a/arch/riscv/kernel/vdso/so2s.sh b/arch/riscv/kernel/vdso/so2s.sh
deleted file mode 100755 (executable)
index e64cb6d..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0+
-# Copyright 2020 Palmer Dabbelt <palmerdabbelt@google.com>
-
-sed 's!\([0-9a-f]*\) T \([a-z0-9_]*\)\(@@LINUX_4.15\)*!.global \2\n.set \2,0x\1!' \
-| grep '^\.'
index fc818c8..c0cddf0 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/set_memory.h>
 #include <linux/dma-map-ops.h>
 #include <linux/crash_dump.h>
+#include <linux/hugetlb.h>
 
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
@@ -222,6 +223,8 @@ static void __init setup_bootmem(void)
 
        early_init_fdt_scan_reserved_mem();
        dma_contiguous_reserve(dma32_phys_limit);
+       if (IS_ENABLED(CONFIG_64BIT))
+               hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
        memblock_allow_resize();
 }
 
@@ -234,14 +237,15 @@ static struct pt_alloc_ops _pt_ops __initdata;
 #define pt_ops _pt_ops
 #endif
 
-unsigned long pfn_base __ro_after_init;
-EXPORT_SYMBOL(pfn_base);
+unsigned long riscv_pfn_base __ro_after_init;
+EXPORT_SYMBOL(riscv_pfn_base);
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
 
 pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 
 #ifdef CONFIG_XIP_KERNEL
 #define trampoline_pg_dir      ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
@@ -322,7 +326,6 @@ static void __init create_pte_mapping(pte_t *ptep,
 static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
 static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
 static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
-static pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 
 #ifdef CONFIG_XIP_KERNEL
 #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
@@ -408,6 +411,7 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)     \
        create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
 #define fixmap_pgd_next                fixmap_pte
+#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
 #endif
 
 void __init create_pgd_mapping(pgd_t *pgdp,
@@ -515,49 +519,80 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
 #endif
 
 #ifdef CONFIG_XIP_KERNEL
-static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size,
+static void __init create_kernel_page_table(pgd_t *pgdir,
                                            __always_unused bool early)
 {
        uintptr_t va, end_va;
 
        /* Map the flash resident part */
        end_va = kernel_map.virt_addr + kernel_map.xiprom_sz;
-       for (va = kernel_map.virt_addr; va < end_va; va += map_size)
+       for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
                create_pgd_mapping(pgdir, va,
                                   kernel_map.xiprom + (va - kernel_map.virt_addr),
-                                  map_size, PAGE_KERNEL_EXEC);
+                                  PMD_SIZE, PAGE_KERNEL_EXEC);
 
        /* Map the data in RAM */
        end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size;
-       for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += map_size)
+       for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE)
                create_pgd_mapping(pgdir, va,
                                   kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)),
-                                  map_size, PAGE_KERNEL);
+                                  PMD_SIZE, PAGE_KERNEL);
 }
 #else
-static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size,
-                                           bool early)
+static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
 {
        uintptr_t va, end_va;
 
        end_va = kernel_map.virt_addr + kernel_map.size;
-       for (va = kernel_map.virt_addr; va < end_va; va += map_size)
+       for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
                create_pgd_mapping(pgdir, va,
                                   kernel_map.phys_addr + (va - kernel_map.virt_addr),
-                                  map_size,
+                                  PMD_SIZE,
                                   early ?
                                        PAGE_KERNEL_EXEC : pgprot_from_va(va));
 }
 #endif
 
-asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+/*
+ * Setup a 4MB mapping that encompasses the device tree: for 64-bit kernel,
+ * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
+ * entry.
+ */
+static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
 {
-       uintptr_t __maybe_unused pa;
-       uintptr_t map_size;
-#ifndef __PAGETABLE_PMD_FOLDED
-       pmd_t fix_bmap_spmd, fix_bmap_epmd;
+#ifndef CONFIG_BUILTIN_DTB
+       uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
+
+       create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+                          IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa,
+                          PGDIR_SIZE,
+                          IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
+
+       if (IS_ENABLED(CONFIG_64BIT)) {
+               create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
+                                  pa, PMD_SIZE, PAGE_KERNEL);
+               create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
+                                  pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
+       }
+
+       dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
+#else
+       /*
+        * For 64-bit kernel, __va can't be used since it would return a linear
+        * mapping address whereas dtb_early_va will be used before
+        * setup_vm_final installs the linear mapping. For 32-bit kernel, as the
+        * kernel is mapped in the linear mapping, that makes no difference.
+        */
+       dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
 #endif
 
+       dtb_early_pa = dtb_pa;
+}
+
+asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+{
+       pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
+
        kernel_map.virt_addr = KERNEL_LINK_ADDR;
 
 #ifdef CONFIG_XIP_KERNEL
@@ -573,23 +608,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
        kernel_map.phys_addr = (uintptr_t)(&_start);
        kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
 #endif
-
        kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
-#ifdef CONFIG_64BIT
        kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
-#endif
-
-       pfn_base = PFN_DOWN(kernel_map.phys_addr);
 
-       /*
-        * Enforce boot alignment requirements of RV32 and
-        * RV64 by only allowing PMD or PGD mappings.
-        */
-       map_size = PMD_SIZE;
+       riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
 
        /* Sanity check alignment and size */
        BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
-       BUG_ON((kernel_map.phys_addr % map_size) != 0);
+       BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
 
 #ifdef CONFIG_64BIT
        /*
@@ -634,50 +660,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
         * us to reach paging_init(). We map all memory banks later
         * in setup_vm_final() below.
         */
-       create_kernel_page_table(early_pg_dir, map_size, true);
+       create_kernel_page_table(early_pg_dir, true);
 
-#ifndef __PAGETABLE_PMD_FOLDED
-       /* Setup early PMD for DTB */
-       create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
-                          (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE);
-#ifndef CONFIG_BUILTIN_DTB
-       /* Create two consecutive PMD mappings for FDT early scan */
-       pa = dtb_pa & ~(PMD_SIZE - 1);
-       create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
-                          pa, PMD_SIZE, PAGE_KERNEL);
-       create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
-                          pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
-       dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
-#else /* CONFIG_BUILTIN_DTB */
-#ifdef CONFIG_64BIT
-       /*
-        * __va can't be used since it would return a linear mapping address
-        * whereas dtb_early_va will be used before setup_vm_final installs
-        * the linear mapping.
-        */
-       dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
-#else
-       dtb_early_va = __va(dtb_pa);
-#endif /* CONFIG_64BIT */
-#endif /* CONFIG_BUILTIN_DTB */
-#else
-#ifndef CONFIG_BUILTIN_DTB
-       /* Create two consecutive PGD mappings for FDT early scan */
-       pa = dtb_pa & ~(PGDIR_SIZE - 1);
-       create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
-                          pa, PGDIR_SIZE, PAGE_KERNEL);
-       create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
-                          pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
-       dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
-#else /* CONFIG_BUILTIN_DTB */
-#ifdef CONFIG_64BIT
-       dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
-#else
-       dtb_early_va = __va(dtb_pa);
-#endif /* CONFIG_64BIT */
-#endif /* CONFIG_BUILTIN_DTB */
-#endif
-       dtb_early_pa = dtb_pa;
+       /* Setup early mapping for FDT early scan */
+       create_fdt_early_page_table(early_pg_dir, dtb_pa);
 
        /*
         * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
@@ -752,7 +738,7 @@ static void __init setup_vm_final(void)
 
 #ifdef CONFIG_64BIT
        /* Map the kernel */
-       create_kernel_page_table(swapper_pg_dir, PMD_SIZE, false);
+       create_kernel_page_table(swapper_pg_dir, false);
 #endif
 
        /* Clear fixmap PTE and PMD mappings */