Merge tag 'riscv-for-linus-6.8-mw4' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 20 Jan 2024 19:06:04 +0000 (11:06 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 20 Jan 2024 19:06:04 +0000 (11:06 -0800)
Pull more RISC-V updates from Palmer Dabbelt:

 - Support for tuning for systems with fast misaligned accesses.

 - Support for SBI-based suspend.

 - Support for the new SBI debug console extension.

 - The T-Head CMOs now use PA-based flushes.

 - Support for enabling the V extension in kernel code.

 - Optimized IP checksum routines.

 - Various ftrace improvements.

 - Support for archrandom, which depends on the Zkr extension.

 - The build is no longer broken under NET=n, KUNIT=y for ports that
   don't define their own ipv6 checksum.

* tag 'riscv-for-linus-6.8-mw4' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (56 commits)
  lib: checksum: Fix build with CONFIG_NET=n
  riscv: lib: Check if output in asm goto supported
  riscv: Fix build error on rv32 + XIP
  riscv: optimize ELF relocation function in riscv
  RISC-V: Implement archrandom when Zkr is available
  riscv: Optimize hweight API with Zbb extension
  riscv: add dependency among Image(.gz), loader(.bin), and vmlinuz.efi
  samples: ftrace: Add RISC-V support for SAMPLE_FTRACE_DIRECT[_MULTI]
  riscv: ftrace: Add DYNAMIC_FTRACE_WITH_DIRECT_CALLS support
  riscv: ftrace: Make function graph use ftrace directly
  riscv: select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
  lib/Kconfig.debug: Update AS_HAS_NON_CONST_LEB128 comment and name
  riscv: Restrict DWARF5 when building with LLVM to known working versions
  riscv: Hoist linker relaxation disabling logic into Kconfig
  kunit: Add tests for csum_ipv6_magic and ip_fast_csum
  riscv: Add checksum library
  riscv: Add checksum header
  riscv: Add static key for misaligned accesses
  asm-generic: Improve csum_fold
  RISC-V: selftests: cbo: Ensure asm operands match constraints
  ...

71 files changed:
Documentation/devicetree/bindings/riscv/cpus.yaml
Documentation/devicetree/bindings/riscv/extensions.yaml
Documentation/features/vm/TLB/arch-support.txt
arch/riscv/Kconfig
arch/riscv/Kconfig.errata
arch/riscv/Makefile
arch/riscv/configs/defconfig
arch/riscv/errata/thead/errata.c
arch/riscv/include/asm/arch_hweight.h [new file with mode: 0644]
arch/riscv/include/asm/archrandom.h [new file with mode: 0644]
arch/riscv/include/asm/asm-extable.h
arch/riscv/include/asm/asm-prototypes.h
arch/riscv/include/asm/bitops.h
arch/riscv/include/asm/checksum.h [new file with mode: 0644]
arch/riscv/include/asm/cpufeature.h
arch/riscv/include/asm/csr.h
arch/riscv/include/asm/entry-common.h
arch/riscv/include/asm/errata_list.h
arch/riscv/include/asm/ftrace.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/processor.h
arch/riscv/include/asm/sbi.h
arch/riscv/include/asm/simd.h [new file with mode: 0644]
arch/riscv/include/asm/switch_to.h
arch/riscv/include/asm/thread_info.h
arch/riscv/include/asm/tlbbatch.h [new file with mode: 0644]
arch/riscv/include/asm/tlbflush.h
arch/riscv/include/asm/vector.h
arch/riscv/include/asm/word-at-a-time.h
arch/riscv/include/asm/xor.h [new file with mode: 0644]
arch/riscv/kernel/Makefile
arch/riscv/kernel/cpufeature.c
arch/riscv/kernel/entry.S
arch/riscv/kernel/ftrace.c
arch/riscv/kernel/kernel_mode_vector.c [new file with mode: 0644]
arch/riscv/kernel/mcount-dyn.S
arch/riscv/kernel/module.c
arch/riscv/kernel/pi/cmdline_early.c
arch/riscv/kernel/process.c
arch/riscv/kernel/ptrace.c
arch/riscv/kernel/sbi.c
arch/riscv/kernel/signal.c
arch/riscv/kernel/suspend.c
arch/riscv/kernel/vector.c
arch/riscv/lib/Makefile
arch/riscv/lib/csum.c [new file with mode: 0644]
arch/riscv/lib/riscv_v_helpers.c [new file with mode: 0644]
arch/riscv/lib/uaccess.S
arch/riscv/lib/uaccess_vector.S [new file with mode: 0644]
arch/riscv/lib/xor.S [new file with mode: 0644]
arch/riscv/mm/extable.c
arch/riscv/mm/init.c
arch/riscv/mm/tlbflush.c
drivers/tty/hvc/Kconfig
drivers/tty/hvc/hvc_riscv_sbi.c
drivers/tty/serial/Kconfig
drivers/tty/serial/earlycon-riscv-sbi.c
include/asm-generic/checksum.h
lib/Kconfig.debug
lib/checksum_kunit.c
samples/ftrace/ftrace-direct-modify.c
samples/ftrace/ftrace-direct-multi-modify.c
samples/ftrace/ftrace-direct-multi.c
samples/ftrace/ftrace-direct-too.c
samples/ftrace/ftrace-direct.c
tools/testing/selftests/riscv/hwprobe/cbo.c
tools/testing/selftests/riscv/hwprobe/hwprobe.c
tools/testing/selftests/riscv/mm/mmap_test.h
tools/testing/selftests/riscv/vector/v_initval_nolibc.c
tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
tools/testing/selftests/riscv/vector/vstate_prctl.c

index 23646b6..9d8670c 100644 (file)
@@ -63,8 +63,8 @@ properties:
 
   mmu-type:
     description:
-      Identifies the MMU address translation mode used on this
-      hart.  These values originate from the RISC-V Privileged
+      Identifies the largest MMU address translation mode supported by
+      this hart.  These values originate from the RISC-V Privileged
       Specification document, available from
       https://riscv.org/specifications/
     $ref: /schemas/types.yaml#/definitions/string
@@ -80,6 +80,11 @@ properties:
     description:
       The blocksize in bytes for the Zicbom cache operations.
 
+  riscv,cbop-block-size:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      The blocksize in bytes for the Zicbop cache operations.
+
   riscv,cboz-block-size:
     $ref: /schemas/types.yaml#/definitions/uint32
     description:
index 27beedb..63d81dc 100644 (file)
@@ -48,7 +48,7 @@ properties:
       insensitive, letters in the riscv,isa string must be all
       lowercase.
     $ref: /schemas/types.yaml#/definitions/string
-    pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$
+    pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[0-9a-z])+)?(?:_[hsxz](?:[0-9a-z])+)*$
     deprecated: true
 
   riscv,isa-base:
index 8fd2207..d222bd3 100644 (file)
@@ -20,7 +20,7 @@
     |    openrisc: |  ..  |
     |      parisc: | TODO |
     |     powerpc: | TODO |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: | TODO |
index b549499..bffbd86 100644 (file)
@@ -53,6 +53,7 @@ config RISCV
        select ARCH_USE_MEMTEST
        select ARCH_USE_QUEUED_RWLOCKS
        select ARCH_USES_CFI_TRAPS if CFI_CLANG
+       select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
        select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
        select ARCH_WANT_FRAME_POINTERS
        select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
@@ -66,9 +67,10 @@ config RISCV
        select CLINT_TIMER if !MMU
        select CLONE_BACKWARDS
        select COMMON_CLK
-       select CPU_PM if CPU_IDLE || HIBERNATION
+       select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND
        select EDAC_SUPPORT
        select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE)
+       select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE
        select GENERIC_ARCH_TOPOLOGY
        select GENERIC_ATOMIC64 if !64BIT
        select GENERIC_CLOCKEVENTS_BROADCAST if SMP
@@ -115,6 +117,7 @@ config RISCV
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DMA_CONTIGUOUS if MMU
        select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
+       select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
        select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
        select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
        select HAVE_FUNCTION_GRAPH_TRACER
@@ -142,6 +145,8 @@ config RISCV
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_RETHOOK if !XIP_KERNEL
        select HAVE_RSEQ
+       select HAVE_SAMPLE_FTRACE_DIRECT
+       select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
        select HAVE_STACKPROTECTOR
        select HAVE_SYSCALL_TRACEPOINTS
        select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
@@ -183,6 +188,20 @@ config HAVE_SHADOW_CALL_STACK
        # https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a484e843e6eeb51f0cb7b8819e50da6d2444d769
        depends on $(ld-option,--no-relax-gp)
 
+config RISCV_USE_LINKER_RELAXATION
+       def_bool y
+       # https://github.com/llvm/llvm-project/commit/6611d58f5bbcbec77262d392e2923e1d680f6985
+       depends on !LD_IS_LLD || LLD_VERSION >= 150000
+
+# https://github.com/llvm/llvm-project/commit/bbc0f99f3bc96f1db16f649fc21dd18e5b0918f6
+config ARCH_HAS_BROKEN_DWARF5
+       def_bool y
+       depends on RISCV_USE_LINKER_RELAXATION
+       # https://github.com/llvm/llvm-project/commit/1df5ea29b43690b6622db2cad7b745607ca4de6a
+       depends on AS_IS_LLVM && AS_VERSION < 180000
+       # https://github.com/llvm/llvm-project/commit/7ffabb61a5569444b5ac9322e22e5471cc5e4a77
+       depends on LD_IS_LLD && LLD_VERSION < 180000
+
 config ARCH_MMAP_RND_BITS_MIN
        default 18 if 64BIT
        default 8
@@ -529,6 +548,28 @@ config RISCV_ISA_V_DEFAULT_ENABLE
 
          If you don't know what to do here, say Y.
 
+config RISCV_ISA_V_UCOPY_THRESHOLD
+       int "Threshold size for vectorized user copies"
+       depends on RISCV_ISA_V
+       default 768
+       help
+         Prefer using vectorized copy_to_user()/copy_from_user() when the
+         workload size exceeds this value.
+
+config RISCV_ISA_V_PREEMPTIVE
+       bool "Run kernel-mode Vector with kernel preemption"
+       depends on PREEMPTION
+       depends on RISCV_ISA_V
+       default y
+       help
+         Usually, in-kernel SIMD routines are run with preemption disabled.
+         Functions which envoke long running SIMD thus must yield core's
+         vector unit to prevent blocking other tasks for too long.
+
+         This config allows kernel to run SIMD without explicitly disable
+         preemption. Enabling this config will result in higher memory
+         consumption due to the allocation of per-task's kernel Vector context.
+
 config TOOLCHAIN_HAS_ZBB
        bool
        default y
@@ -655,6 +696,20 @@ config RISCV_MISALIGNED
          load/store for both kernel and userspace. When disable, misaligned
          accesses will generate SIGBUS in userspace and panic in kernel.
 
+config RISCV_EFFICIENT_UNALIGNED_ACCESS
+       bool "Assume the CPU supports fast unaligned memory accesses"
+       depends on NONPORTABLE
+       select DCACHE_WORD_ACCESS if MMU
+       select HAVE_EFFICIENT_UNALIGNED_ACCESS
+       help
+         Say Y here if you want the kernel to assume that the CPU supports
+         efficient unaligned memory accesses.  When enabled, this option
+         improves the performance of the kernel on such CPUs.  However, the
+         kernel will run much more slowly, or will not be able to run at all,
+         on CPUs that do not support efficient unaligned memory accesses.
+
+         If unsure what to do here, say N.
+
 endmenu # "Platform type"
 
 menu "Kernel features"
index f5c432b..910ba88 100644 (file)
@@ -98,6 +98,7 @@ config ERRATA_THEAD_CMO
        depends on ERRATA_THEAD && MMU
        select DMA_DIRECT_REMAP
        select RISCV_DMA_NONCOHERENT
+       select RISCV_NONSTANDARD_CACHE_OPS
        default y
        help
          This will apply the cache management errata to handle the
index a74be78..0b7d109 100644 (file)
@@ -43,8 +43,7 @@ else
        KBUILD_LDFLAGS += -melf32lriscv
 endif
 
-ifeq ($(CONFIG_LD_IS_LLD),y)
-ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 150000),y)
+ifndef CONFIG_RISCV_USE_LINKER_RELAXATION
        KBUILD_CFLAGS += -mno-relax
        KBUILD_AFLAGS += -mno-relax
 ifndef CONFIG_AS_IS_LLVM
@@ -52,7 +51,6 @@ ifndef CONFIG_AS_IS_LLVM
        KBUILD_AFLAGS += -Wa,-mno-relax
 endif
 endif
-endif
 
 ifeq ($(CONFIG_SHADOW_CALL_STACK),y)
        KBUILD_LDFLAGS += --no-relax-gp
@@ -108,7 +106,9 @@ KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)
 # unaligned accesses.  While unaligned accesses are explicitly allowed in the
 # RISC-V ISA, they're emulated by machine mode traps on all extant
 # architectures.  It's faster to have GCC emit only aligned accesses.
+ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS),y)
 KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
+endif
 
 ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
 prepare: stack_protector_prepare
@@ -163,6 +163,8 @@ BOOT_TARGETS := Image Image.gz loader loader.bin xipImage vmlinuz.efi
 
 all:   $(notdir $(KBUILD_IMAGE))
 
+loader.bin: loader
+Image.gz loader vmlinuz.efi: Image
 $(BOOT_TARGETS): vmlinux
        $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
        @$(kecho) '  Kernel: $(boot)/$@ is ready'
index 9058812..eaf34e8 100644 (file)
@@ -149,6 +149,7 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_VIRTIO=y
index 0554ed4..b1c410b 100644 (file)
 #include <asm/alternative.h>
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
+#include <asm/dma-noncoherent.h>
 #include <asm/errata_list.h>
 #include <asm/hwprobe.h>
+#include <asm/io.h>
 #include <asm/patch.h>
 #include <asm/vendorid_list.h>
 
@@ -33,6 +35,69 @@ static bool errata_probe_pbmt(unsigned int stage,
        return false;
 }
 
+/*
+ * th.dcache.ipa rs1 (invalidate, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000001    01010      rs1       000      00000  0001011
+ * th.dcache.iva rs1 (invalidate, virtual address)
+ *   0000001    00110      rs1       000      00000  0001011
+ *
+ * th.dcache.cpa rs1 (clean, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000001    01001      rs1       000      00000  0001011
+ * th.dcache.cva rs1 (clean, virtual address)
+ *   0000001    00101      rs1       000      00000  0001011
+ *
+ * th.dcache.cipa rs1 (clean then invalidate, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000001    01011      rs1       000      00000  0001011
+ * th.dcache.civa rs1 (clean then invalidate, virtual address)
+ *   0000001    00111      rs1       000      00000  0001011
+ *
+ * th.sync.s (make sure all cache operations finished)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000000    11001     00000      000      00000  0001011
+ */
+#define THEAD_INVAL_A0 ".long 0x02a5000b"
+#define THEAD_CLEAN_A0 ".long 0x0295000b"
+#define THEAD_FLUSH_A0 ".long 0x02b5000b"
+#define THEAD_SYNC_S   ".long 0x0190000b"
+
+#define THEAD_CMO_OP(_op, _start, _size, _cachesize)                   \
+asm volatile("mv a0, %1\n\t"                                           \
+            "j 2f\n\t"                                                 \
+            "3:\n\t"                                                   \
+            THEAD_##_op##_A0 "\n\t"                                    \
+            "add a0, a0, %0\n\t"                                       \
+            "2:\n\t"                                                   \
+            "bltu a0, %2, 3b\n\t"                                      \
+            THEAD_SYNC_S                                               \
+            : : "r"(_cachesize),                                       \
+                "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),  \
+                "r"((unsigned long)(_start) + (_size))                 \
+            : "a0")
+
+static void thead_errata_cache_inv(phys_addr_t paddr, size_t size)
+{
+       THEAD_CMO_OP(INVAL, paddr, size, riscv_cbom_block_size);
+}
+
+static void thead_errata_cache_wback(phys_addr_t paddr, size_t size)
+{
+       THEAD_CMO_OP(CLEAN, paddr, size, riscv_cbom_block_size);
+}
+
+static void thead_errata_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+       THEAD_CMO_OP(FLUSH, paddr, size, riscv_cbom_block_size);
+}
+
+static const struct riscv_nonstd_cache_ops thead_errata_cmo_ops = {
+       .wback = &thead_errata_cache_wback,
+       .inv = &thead_errata_cache_inv,
+       .wback_inv = &thead_errata_cache_wback_inv,
+};
+
 static bool errata_probe_cmo(unsigned int stage,
                             unsigned long arch_id, unsigned long impid)
 {
@@ -48,6 +113,7 @@ static bool errata_probe_cmo(unsigned int stage,
        if (stage == RISCV_ALTERNATIVES_BOOT) {
                riscv_cbom_block_size = L1_CACHE_BYTES;
                riscv_noncoherent_supported();
+               riscv_noncoherent_register_cache_ops(&thead_errata_cmo_ops);
        }
 
        return true;
@@ -77,8 +143,7 @@ static u32 thead_errata_probe(unsigned int stage,
        if (errata_probe_pbmt(stage, archid, impid))
                cpu_req_errata |= BIT(ERRATA_THEAD_PBMT);
 
-       if (errata_probe_cmo(stage, archid, impid))
-               cpu_req_errata |= BIT(ERRATA_THEAD_CMO);
+       errata_probe_cmo(stage, archid, impid);
 
        if (errata_probe_pmu(stage, archid, impid))
                cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h
new file mode 100644 (file)
index 0000000..c20236a
--- /dev/null
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Based on arch/x86/include/asm/arch_hweight.h
+ */
+
+#ifndef _ASM_RISCV_HWEIGHT_H
+#define _ASM_RISCV_HWEIGHT_H
+
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
+
+#if (BITS_PER_LONG == 64)
+#define CPOPW  "cpopw "
+#elif (BITS_PER_LONG == 32)
+#define CPOPW  "cpop "
+#else
+#error "Unexpected BITS_PER_LONG"
+#endif
+
+static __always_inline unsigned int __arch_hweight32(unsigned int w)
+{
+#ifdef CONFIG_RISCV_ISA_ZBB
+       asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+                                     RISCV_ISA_EXT_ZBB, 1)
+                         : : : : legacy);
+
+       asm (".option push\n"
+            ".option arch,+zbb\n"
+            CPOPW "%0, %0\n"
+            ".option pop\n"
+            : "+r" (w) : :);
+
+       return w;
+
+legacy:
+#endif
+       return __sw_hweight32(w);
+}
+
+static inline unsigned int __arch_hweight16(unsigned int w)
+{
+       return __arch_hweight32(w & 0xffff);
+}
+
+static inline unsigned int __arch_hweight8(unsigned int w)
+{
+       return __arch_hweight32(w & 0xff);
+}
+
+#if BITS_PER_LONG == 64
+static __always_inline unsigned long __arch_hweight64(__u64 w)
+{
+# ifdef CONFIG_RISCV_ISA_ZBB
+       asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+                                     RISCV_ISA_EXT_ZBB, 1)
+                         : : : : legacy);
+
+       asm (".option push\n"
+            ".option arch,+zbb\n"
+            "cpop %0, %0\n"
+            ".option pop\n"
+            : "+r" (w) : :);
+
+       return w;
+
+legacy:
+# endif
+       return __sw_hweight64(w);
+}
+#else /* BITS_PER_LONG == 64 */
+static inline unsigned long __arch_hweight64(__u64 w)
+{
+       return  __arch_hweight32((u32)w) +
+               __arch_hweight32((u32)(w >> 32));
+}
+#endif /* !(BITS_PER_LONG == 64) */
+
+#endif /* _ASM_RISCV_HWEIGHT_H */
diff --git a/arch/riscv/include/asm/archrandom.h b/arch/riscv/include/asm/archrandom.h
new file mode 100644 (file)
index 0000000..5345360
--- /dev/null
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernel interface for the RISCV arch_random_* functions
+ *
+ * Copyright (c) 2023 Rivos Inc.
+ *
+ */
+
+#ifndef ASM_RISCV_ARCHRANDOM_H
+#define ASM_RISCV_ARCHRANDOM_H
+
+#include <asm/csr.h>
+#include <asm/processor.h>
+
+#define SEED_RETRY_LOOPS 100
+
+static inline bool __must_check csr_seed_long(unsigned long *v)
+{
+       unsigned int retry = SEED_RETRY_LOOPS, valid_seeds = 0;
+       const int needed_seeds = sizeof(long) / sizeof(u16);
+       u16 *entropy = (u16 *)v;
+
+       do {
+               /*
+                * The SEED CSR must be accessed with a read-write instruction.
+                */
+               unsigned long csr_seed = csr_swap(CSR_SEED, 0);
+               unsigned long opst = csr_seed & SEED_OPST_MASK;
+
+               switch (opst) {
+               case SEED_OPST_ES16:
+                       entropy[valid_seeds++] = csr_seed & SEED_ENTROPY_MASK;
+                       if (valid_seeds == needed_seeds)
+                               return true;
+                       break;
+
+               case SEED_OPST_DEAD:
+                       pr_err_once("archrandom: Unrecoverable error\n");
+                       return false;
+
+               case SEED_OPST_BIST:
+               case SEED_OPST_WAIT:
+               default:
+                       cpu_relax();
+                       continue;
+               }
+       } while (--retry);
+
+       return false;
+}
+
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
+{
+       return 0;
+}
+
+static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
+{
+       if (!max_longs)
+               return 0;
+
+       /*
+        * If Zkr is supported and csr_seed_long succeeds, we return one long
+        * worth of entropy.
+        */
+       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZKR) && csr_seed_long(v))
+               return 1;
+
+       return 0;
+}
+
+#endif /* ASM_RISCV_ARCHRANDOM_H */
index 00a96e7..0c8bfd5 100644 (file)
@@ -6,6 +6,7 @@
 #define EX_TYPE_FIXUP                  1
 #define EX_TYPE_BPF                    2
 #define EX_TYPE_UACCESS_ERR_ZERO       3
+#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
 
 #ifdef CONFIG_MMU
 
 #define EX_DATA_REG_ZERO_SHIFT 5
 #define EX_DATA_REG_ZERO       GENMASK(9, 5)
 
+#define EX_DATA_REG_DATA_SHIFT 0
+#define EX_DATA_REG_DATA       GENMASK(4, 0)
+#define EX_DATA_REG_ADDR_SHIFT 5
+#define EX_DATA_REG_ADDR       GENMASK(9, 5)
+
 #define EX_DATA_REG(reg, gpr)                                          \
        "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
 
 #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err)                     \
        _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
 
+#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr)           \
+       __DEFINE_ASM_GPR_NUMS                                                   \
+       __ASM_EXTABLE_RAW(#insn, #fixup,                                        \
+                         __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD),          \
+                         "("                                                   \
+                           EX_DATA_REG(DATA, data) " | "                       \
+                           EX_DATA_REG(ADDR, addr)                             \
+                         ")")
+
 #endif /* __ASSEMBLY__ */
 
 #else /* CONFIG_MMU */
index 36b955c..cd627ec 100644 (file)
@@ -9,6 +9,33 @@ long long __lshrti3(long long a, int b);
 long long __ashrti3(long long a, int b);
 long long __ashlti3(long long a, int b);
 
+#ifdef CONFIG_RISCV_ISA_V
+
+#ifdef CONFIG_MMU
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n);
+#endif /* CONFIG_MMU  */
+
+void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
+                const unsigned long *__restrict p2);
+void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
+                const unsigned long *__restrict p2,
+                const unsigned long *__restrict p3);
+void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
+                const unsigned long *__restrict p2,
+                const unsigned long *__restrict p3,
+                const unsigned long *__restrict p4);
+void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
+                const unsigned long *__restrict p2,
+                const unsigned long *__restrict p3,
+                const unsigned long *__restrict p4,
+                const unsigned long *__restrict p5);
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs);
+asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs);
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
+#endif /* CONFIG_RISCV_ISA_V */
 
 #define DECLARE_DO_ERROR_INFO(name)    asmlinkage void name(struct pt_regs *regs)
 
index 224b4dc..9ffc355 100644 (file)
@@ -271,7 +271,9 @@ legacy:
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/sched.h>
 
-#include <asm-generic/bitops/hweight.h>
+#include <asm/arch_hweight.h>
+
+#include <asm-generic/bitops/const_hweight.h>
 
 #if (BITS_PER_LONG == 64)
 #define __AMO(op)      "amo" #op ".d"
diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h
new file mode 100644 (file)
index 0000000..a5b60b5
--- /dev/null
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Checksum routines
+ *
+ * Copyright (C) 2023 Rivos Inc.
+ */
+#ifndef __ASM_RISCV_CHECKSUM_H
+#define __ASM_RISCV_CHECKSUM_H
+
+#include <linux/in6.h>
+#include <linux/uaccess.h>
+
+#define ip_fast_csum ip_fast_csum
+
+extern unsigned int do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+
+/* Default version is sufficient for 32 bit */
+#ifndef CONFIG_32BIT
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+                       const struct in6_addr *daddr,
+                       __u32 len, __u8 proto, __wsum sum);
+#endif
+
+/* Define riscv versions of functions before importing asm-generic/checksum.h */
+#include <asm-generic/checksum.h>
+
+/**
+ * Quickly compute an IP checksum with the assumption that IPv4 headers will
+ * always be in multiples of 32-bits, and have an ihl of at least 5.
+ *
+ * @ihl: the number of 32 bit segments and must be greater than or equal to 5.
+ * @iph: assumed to be word aligned given that NET_IP_ALIGN is set to 2 on
+ *  riscv, defining IP headers to be aligned.
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+       unsigned long csum = 0;
+       int pos = 0;
+
+       do {
+               csum += ((const unsigned int *)iph)[pos];
+               if (IS_ENABLED(CONFIG_32BIT))
+                       csum += csum < ((const unsigned int *)iph)[pos];
+       } while (++pos < ihl);
+
+       /*
+        * ZBB only saves three instructions on 32-bit and five on 64-bit so not
+        * worth checking if supported without Alternatives.
+        */
+       if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
+           IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+               unsigned long fold_temp;
+
+               asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+                                             RISCV_ISA_EXT_ZBB, 1)
+                   :
+                   :
+                   :
+                   : no_zbb);
+
+               if (IS_ENABLED(CONFIG_32BIT)) {
+                       asm(".option push                               \n\
+                       .option arch,+zbb                               \n\
+                               not     %[fold_temp], %[csum]           \n\
+                               rori    %[csum], %[csum], 16            \n\
+                               sub     %[csum], %[fold_temp], %[csum]  \n\
+                       .option pop"
+                       : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
+               } else {
+                       asm(".option push                               \n\
+                       .option arch,+zbb                               \n\
+                               rori    %[fold_temp], %[csum], 32       \n\
+                               add     %[csum], %[fold_temp], %[csum]  \n\
+                               srli    %[csum], %[csum], 32            \n\
+                               not     %[fold_temp], %[csum]           \n\
+                               roriw   %[csum], %[csum], 16            \n\
+                               subw    %[csum], %[fold_temp], %[csum]  \n\
+                       .option pop"
+                       : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
+               }
+               return (__force __sum16)(csum >> 16);
+       }
+no_zbb:
+#ifndef CONFIG_32BIT
+       csum += ror64(csum, 32);
+       csum >>= 32;
+#endif
+       return csum_fold((__force __wsum)csum);
+}
+
+#endif /* __ASM_RISCV_CHECKSUM_H */
index fbdde8b..5a626ed 100644 (file)
@@ -135,4 +135,6 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
        return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
 }
 
+DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
+
 #endif
index 306a19a..5100140 100644 (file)
 #define CSR_VTYPE              0xc21
 #define CSR_VLENB              0xc22
 
+/* Scalar Crypto Extension - Entropy */
+#define CSR_SEED               0x015
+#define SEED_OPST_MASK         _AC(0xC0000000, UL)
+#define SEED_OPST_BIST         _AC(0x00000000, UL)
+#define SEED_OPST_WAIT         _AC(0x40000000, UL)
+#define SEED_OPST_ES16         _AC(0x80000000, UL)
+#define SEED_OPST_DEAD         _AC(0xC0000000, UL)
+#define SEED_ENTROPY_MASK      _AC(0xFFFF, UL)
+
 #ifdef CONFIG_RISCV_M_MODE
 # define CSR_STATUS    CSR_MSTATUS
 # define CSR_IE                CSR_MIE
index 7ab5e34..2293e53 100644 (file)
@@ -4,6 +4,23 @@
 #define _ASM_RISCV_ENTRY_COMMON_H
 
 #include <asm/stacktrace.h>
+#include <asm/thread_info.h>
+#include <asm/vector.h>
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+                                                 unsigned long ti_work)
+{
+       if (ti_work & _TIF_RISCV_V_DEFER_RESTORE) {
+               clear_thread_flag(TIF_RISCV_V_DEFER_RESTORE);
+               /*
+                * We are already called with irq disabled, so go without
+                * keeping track of riscv_v_flags.
+                */
+               riscv_v_vstate_restore(&current->thread.vstate, regs);
+       }
+}
+
+#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
 
 void handle_page_fault(struct pt_regs *regs);
 void handle_break(struct pt_regs *regs);
index 83ed25e..ea33288 100644 (file)
@@ -24,9 +24,8 @@
 
 #ifdef CONFIG_ERRATA_THEAD
 #define        ERRATA_THEAD_PBMT 0
-#define        ERRATA_THEAD_CMO 1
-#define        ERRATA_THEAD_PMU 2
-#define        ERRATA_THEAD_NUMBER 3
+#define        ERRATA_THEAD_PMU 1
+#define        ERRATA_THEAD_NUMBER 2
 #endif
 
 #ifdef __ASSEMBLY__
@@ -94,54 +93,17 @@ asm volatile(ALTERNATIVE(                                           \
 #define ALT_THEAD_PMA(_val)
 #endif
 
-/*
- * th.dcache.ipa rs1 (invalidate, physical address)
- * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
- *   0000001    01010      rs1       000      00000  0001011
- * th.dache.iva rs1 (invalida, virtual address)
- *   0000001    00110      rs1       000      00000  0001011
- *
- * th.dcache.cpa rs1 (clean, physical address)
- * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
- *   0000001    01001      rs1       000      00000  0001011
- * th.dcache.cva rs1 (clean, virtual address)
- *   0000001    00101      rs1       000      00000  0001011
- *
- * th.dcache.cipa rs1 (clean then invalidate, physical address)
- * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
- *   0000001    01011      rs1       000      00000  0001011
- * th.dcache.civa rs1 (... virtual address)
- *   0000001    00111      rs1       000      00000  0001011
- *
- * th.sync.s (make sure all cache operations finished)
- * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
- *   0000000    11001     00000      000      00000  0001011
- */
-#define THEAD_INVAL_A0 ".long 0x0265000b"
-#define THEAD_CLEAN_A0 ".long 0x0255000b"
-#define THEAD_FLUSH_A0 ".long 0x0275000b"
-#define THEAD_SYNC_S   ".long 0x0190000b"
-
 #define ALT_CMO_OP(_op, _start, _size, _cachesize)                     \
-asm volatile(ALTERNATIVE_2(                                            \
-       __nops(6),                                                      \
+asm volatile(ALTERNATIVE(                                              \
+       __nops(5),                                                      \
        "mv a0, %1\n\t"                                                 \
        "j 2f\n\t"                                                      \
        "3:\n\t"                                                        \
        CBO_##_op(a0)                                                   \
        "add a0, a0, %0\n\t"                                            \
        "2:\n\t"                                                        \
-       "bltu a0, %2, 3b\n\t"                                           \
-       "nop", 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,        \
-       "mv a0, %1\n\t"                                                 \
-       "j 2f\n\t"                                                      \
-       "3:\n\t"                                                        \
-       THEAD_##_op##_A0 "\n\t"                                         \
-       "add a0, a0, %0\n\t"                                            \
-       "2:\n\t"                                                        \
-       "bltu a0, %2, 3b\n\t"                                           \
-       THEAD_SYNC_S, THEAD_VENDOR_ID,                                  \
-                       ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)      \
+       "bltu a0, %2, 3b\n\t",                                          \
+       0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM)               \
        : : "r"(_cachesize),                                            \
            "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),       \
            "r"((unsigned long)(_start) + (_size))                      \
index 2b2f5df..3291721 100644 (file)
@@ -128,7 +128,23 @@ do {                                                                       \
 struct dyn_ftrace;
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
 #define ftrace_init_nop ftrace_init_nop
-#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+struct ftrace_ops;
+struct ftrace_regs;
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+                      struct ftrace_ops *op, struct ftrace_regs *fregs);
+#define ftrace_graph_func ftrace_graph_func
+
+static inline void __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
+{
+               regs->t1 = addr;
+}
+#define arch_ftrace_set_direct_caller(fregs, addr) \
+       __arch_ftrace_set_direct_caller(&(fregs)->regs, addr)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+#endif /* __ASSEMBLY__ */
 
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
index e3ffef1..0c94260 100644 (file)
@@ -865,7 +865,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
 #define TASK_SIZE_MIN  (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
 
 #ifdef CONFIG_COMPAT
-#define TASK_SIZE_32   (_AC(0x80000000, UL) - PAGE_SIZE)
+#define TASK_SIZE_32   (_AC(0x80000000, UL))
 #define TASK_SIZE      (test_thread_flag(TIF_32BIT) ? \
                         TASK_SIZE_32 : TASK_SIZE_64)
 #else
index f19f861..a8509cc 100644 (file)
@@ -16,7 +16,7 @@
 
 #ifdef CONFIG_64BIT
 #define DEFAULT_MAP_WINDOW     (UL(1) << (MMAP_VA_BITS - 1))
-#define STACK_TOP_MAX          TASK_SIZE_64
+#define STACK_TOP_MAX          TASK_SIZE
 
 #define arch_get_mmap_end(addr, len, flags)                    \
 ({                                                             \
 struct task_struct;
 struct pt_regs;
 
+/*
+ * We use a flag to track in-kernel Vector context. Currently the flag has the
+ * following meaning:
+ *
+ *  - bit 0: indicates whether the in-kernel Vector context is active. The
+ *    activation of this state disables the preemption. On a non-RT kernel, it
+ *    also disable bh.
+ *  - bits 8: is used for tracking preemptible kernel-mode Vector, when
+ *    RISCV_ISA_V_PREEMPTIVE is enabled. Calling kernel_vector_begin() does not
+ *    disable the preemption if the thread's kernel_vstate.datap is allocated.
+ *    Instead, the kernel set this bit field. Then the trap entry/exit code
+ *    knows if we are entering/exiting the context that owns preempt_v.
+ *     - 0: the task is not using preempt_v
+ *     - 1: the task is actively using preempt_v. But whether does the task own
+ *          the preempt_v context is decided by bits in RISCV_V_CTX_DEPTH_MASK.
+ *  - bit 16-23 are RISCV_V_CTX_DEPTH_MASK, used by context tracking routine
+ *     when preempt_v starts:
+ *     - 0: the task is actively using, and own preempt_v context.
+ *     - non-zero: the task was using preempt_v, but then took a trap within.
+ *       Thus, the task does not own preempt_v. Any use of Vector will have to
+ *       save preempt_v, if dirty, and fallback to non-preemptible kernel-mode
+ *       Vector.
+ *  - bit 30: The in-kernel preempt_v context is saved, and requries to be
+ *    restored when returning to the context that owns the preempt_v.
+ *  - bit 31: The in-kernel preempt_v context is dirty, as signaled by the
+ *    trap entry code. Any context switches out-of current task need to save
+ *    it to the task's in-kernel V context. Also, any traps nesting on-top-of
+ *    preempt_v requesting to use V needs a save.
+ */
+#define RISCV_V_CTX_DEPTH_MASK         0x00ff0000
+
+#define RISCV_V_CTX_UNIT_DEPTH         0x00010000
+#define RISCV_KERNEL_MODE_V            0x00000001
+#define RISCV_PREEMPT_V                        0x00000100
+#define RISCV_PREEMPT_V_DIRTY          0x80000000
+#define RISCV_PREEMPT_V_NEED_RESTORE   0x40000000
+
 /* CPU-specific state of a task */
 struct thread_struct {
        /* Callee-saved registers */
@@ -81,9 +118,11 @@ struct thread_struct {
        unsigned long s[12];    /* s[0]: frame pointer */
        struct __riscv_d_ext_state fstate;
        unsigned long bad_cause;
-       unsigned long vstate_ctrl;
+       u32 riscv_v_flags;
+       u32 vstate_ctrl;
        struct __riscv_v_ext_state vstate;
        unsigned long align_ctl;
+       struct __riscv_v_ext_state kernel_vstate;
 };
 
 /* Whitelist the fstate from the task_struct for hardened usercopy */
index b6f898c..6e68f8d 100644 (file)
@@ -29,6 +29,7 @@ enum sbi_ext_id {
        SBI_EXT_RFENCE = 0x52464E43,
        SBI_EXT_HSM = 0x48534D,
        SBI_EXT_SRST = 0x53525354,
+       SBI_EXT_SUSP = 0x53555350,
        SBI_EXT_PMU = 0x504D55,
        SBI_EXT_DBCN = 0x4442434E,
        SBI_EXT_STA = 0x535441,
@@ -115,6 +116,14 @@ enum sbi_srst_reset_reason {
        SBI_SRST_RESET_REASON_SYS_FAILURE,
 };
 
+enum sbi_ext_susp_fid {
+       SBI_EXT_SUSP_SYSTEM_SUSPEND = 0,
+};
+
+enum sbi_ext_susp_sleep_type {
+       SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM = 0,
+};
+
 enum sbi_ext_pmu_fid {
        SBI_EXT_PMU_NUM_COUNTERS = 0,
        SBI_EXT_PMU_COUNTER_GET_INFO,
@@ -288,8 +297,13 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
                        unsigned long arg3, unsigned long arg4,
                        unsigned long arg5);
 
+#ifdef CONFIG_RISCV_SBI_V01
 void sbi_console_putchar(int ch);
 int sbi_console_getchar(void);
+#else
+static inline void sbi_console_putchar(int ch) { }
+static inline int sbi_console_getchar(void) { return -ENOENT; }
+#endif
 long sbi_get_mvendorid(void);
 long sbi_get_marchid(void);
 long sbi_get_mimpid(void);
@@ -346,6 +360,11 @@ static inline unsigned long sbi_mk_version(unsigned long major,
 }
 
 int sbi_err_map_linux_errno(int err);
+
+extern bool sbi_debug_console_available;
+int sbi_debug_console_write(const char *bytes, unsigned int num_bytes);
+int sbi_debug_console_read(char *bytes, unsigned int num_bytes);
+
 #else /* CONFIG_RISCV_SBI */
 static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; }
 static inline void sbi_init(void) {}
diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h
new file mode 100644 (file)
index 0000000..54efbf5
--- /dev/null
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef __ASM_SIMD_H
+#define __ASM_SIMD_H
+
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+
+#include <asm/vector.h>
+
+#ifdef CONFIG_RISCV_ISA_V
+/*
+ * may_use_simd - whether it is allowable at this time to issue vector
+ *                instructions or access the vector register file
+ *
+ * Callers must not assume that the result remains true beyond the next
+ * preempt_enable() or return from softirq context.
+ */
+static __must_check inline bool may_use_simd(void)
+{
+       /*
+        * RISCV_KERNEL_MODE_V is only set while preemption is disabled,
+        * and is clear whenever preemption is enabled.
+        */
+       if (in_hardirq() || in_nmi())
+               return false;
+
+       /*
+        * Nesting is acheived in preempt_v by spreading the control for
+        * preemptible and non-preemptible kernel-mode Vector into two fields.
+        * Always try to match with prempt_v if kernel V-context exists. Then,
+        * fallback to check non preempt_v if nesting happens, or if the config
+        * is not set.
+        */
+       if (IS_ENABLED(CONFIG_RISCV_ISA_V_PREEMPTIVE) && current->thread.kernel_vstate.datap) {
+               if (!riscv_preempt_v_started(current))
+                       return true;
+       }
+       /*
+        * Non-preemptible kernel-mode Vector temporarily disables bh. So we
+        * must not return true on irq_disabled(). Otherwise we would fail the
+        * lockdep check calling local_bh_enable()
+        */
+       return !irqs_disabled() && !(riscv_v_flags() & RISCV_KERNEL_MODE_V);
+}
+
+#else /* ! CONFIG_RISCV_ISA_V */
+
+static __must_check inline bool may_use_simd(void)
+{
+       return false;
+}
+
+#endif /* ! CONFIG_RISCV_ISA_V */
+
+#endif
index f90d8e4..7efdb05 100644 (file)
@@ -53,8 +53,7 @@ static inline void __switch_to_fpu(struct task_struct *prev,
        struct pt_regs *regs;
 
        regs = task_pt_regs(prev);
-       if (unlikely(regs->status & SR_SD))
-               fstate_save(prev, regs);
+       fstate_save(prev, regs);
        fstate_restore(next, task_pt_regs(next));
 }
 
index 4856697..5d47334 100644 (file)
@@ -102,12 +102,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 #define TIF_NOTIFY_SIGNAL      9       /* signal notifications exist */
 #define TIF_UPROBE             10      /* uprobe breakpoint or singlestep */
 #define TIF_32BIT              11      /* compat-mode 32bit process */
+#define TIF_RISCV_V_DEFER_RESTORE      12 /* restore Vector before returing to user */
 
 #define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_SIGNAL     (1 << TIF_NOTIFY_SIGNAL)
 #define _TIF_UPROBE            (1 << TIF_UPROBE)
+#define _TIF_RISCV_V_DEFER_RESTORE     (1 << TIF_RISCV_V_DEFER_RESTORE)
 
 #define _TIF_WORK_MASK \
        (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
diff --git a/arch/riscv/include/asm/tlbbatch.h b/arch/riscv/include/asm/tlbbatch.h
new file mode 100644 (file)
index 0000000..46014f7
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+#ifndef _ASM_RISCV_TLBBATCH_H
+#define _ASM_RISCV_TLBBATCH_H
+
+#include <linux/cpumask.h>
+
+struct arch_tlbflush_unmap_batch {
+       struct cpumask cpumask;
+};
+
+#endif /* _ASM_RISCV_TLBBATCH_H */
index a60416b..928f096 100644 (file)
@@ -47,6 +47,14 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
                        unsigned long end);
 #endif
+
+bool arch_tlbbatch_should_defer(struct mm_struct *mm);
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+                              struct mm_struct *mm,
+                              unsigned long uaddr);
+void arch_flush_tlb_batched_pending(struct mm_struct *mm);
+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+
 #else /* CONFIG_SMP && CONFIG_MMU */
 
 #define flush_tlb_all() local_flush_tlb_all()
index 87aaef6..0cd6f0a 100644 (file)
 extern unsigned long riscv_v_vsize;
 int riscv_v_setup_vsize(void);
 bool riscv_v_first_use_handler(struct pt_regs *regs);
+void kernel_vector_begin(void);
+void kernel_vector_end(void);
+void get_cpu_vector_context(void);
+void put_cpu_vector_context(void);
+void riscv_v_thread_free(struct task_struct *tsk);
+void __init riscv_v_setup_ctx_cache(void);
+void riscv_v_thread_alloc(struct task_struct *tsk);
+
+static inline u32 riscv_v_flags(void)
+{
+       return READ_ONCE(current->thread.riscv_v_flags);
+}
 
 static __always_inline bool has_vector(void)
 {
@@ -162,36 +174,89 @@ static inline void riscv_v_vstate_discard(struct pt_regs *regs)
        __riscv_v_vstate_dirty(regs);
 }
 
-static inline void riscv_v_vstate_save(struct task_struct *task,
+static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
                                       struct pt_regs *regs)
 {
        if ((regs->status & SR_VS) == SR_VS_DIRTY) {
-               struct __riscv_v_ext_state *vstate = &task->thread.vstate;
-
                __riscv_v_vstate_save(vstate, vstate->datap);
                __riscv_v_vstate_clean(regs);
        }
 }
 
-static inline void riscv_v_vstate_restore(struct task_struct *task,
+static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
                                          struct pt_regs *regs)
 {
        if ((regs->status & SR_VS) != SR_VS_OFF) {
-               struct __riscv_v_ext_state *vstate = &task->thread.vstate;
-
                __riscv_v_vstate_restore(vstate, vstate->datap);
                __riscv_v_vstate_clean(regs);
        }
 }
 
+static inline void riscv_v_vstate_set_restore(struct task_struct *task,
+                                             struct pt_regs *regs)
+{
+       if ((regs->status & SR_VS) != SR_VS_OFF) {
+               set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
+               riscv_v_vstate_on(regs);
+       }
+}
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static inline bool riscv_preempt_v_dirty(struct task_struct *task)
+{
+       return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_DIRTY);
+}
+
+static inline bool riscv_preempt_v_restore(struct task_struct *task)
+{
+       return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_NEED_RESTORE);
+}
+
+static inline void riscv_preempt_v_clear_dirty(struct task_struct *task)
+{
+       barrier();
+       task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY;
+}
+
+static inline void riscv_preempt_v_set_restore(struct task_struct *task)
+{
+       barrier();
+       task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE;
+}
+
+static inline bool riscv_preempt_v_started(struct task_struct *task)
+{
+       return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V);
+}
+
+#else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */
+static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; }
+static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; }
+static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; }
+#define riscv_preempt_v_clear_dirty(tsk)       do {} while (0)
+#define riscv_preempt_v_set_restore(tsk)       do {} while (0)
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
 static inline void __switch_to_vector(struct task_struct *prev,
                                      struct task_struct *next)
 {
        struct pt_regs *regs;
 
-       regs = task_pt_regs(prev);
-       riscv_v_vstate_save(prev, regs);
-       riscv_v_vstate_restore(next, task_pt_regs(next));
+       if (riscv_preempt_v_started(prev)) {
+               if (riscv_preempt_v_dirty(prev)) {
+                       __riscv_v_vstate_save(&prev->thread.kernel_vstate,
+                                             prev->thread.kernel_vstate.datap);
+                       riscv_preempt_v_clear_dirty(prev);
+               }
+       } else {
+               regs = task_pt_regs(prev);
+               riscv_v_vstate_save(&prev->thread.vstate, regs);
+       }
+
+       if (riscv_preempt_v_started(next))
+               riscv_preempt_v_set_restore(next);
+       else
+               riscv_v_vstate_set_restore(next, task_pt_regs(next));
 }
 
 void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
@@ -208,11 +273,14 @@ static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
 static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
 #define riscv_v_vsize (0)
 #define riscv_v_vstate_discard(regs)           do {} while (0)
-#define riscv_v_vstate_save(task, regs)                do {} while (0)
-#define riscv_v_vstate_restore(task, regs)     do {} while (0)
+#define riscv_v_vstate_save(vstate, regs)      do {} while (0)
+#define riscv_v_vstate_restore(vstate, regs)   do {} while (0)
 #define __switch_to_vector(__prev, __next)     do {} while (0)
 #define riscv_v_vstate_off(regs)               do {} while (0)
 #define riscv_v_vstate_on(regs)                        do {} while (0)
+#define riscv_v_thread_free(tsk)               do {} while (0)
+#define  riscv_v_setup_ctx_cache()             do {} while (0)
+#define riscv_v_thread_alloc(tsk)              do {} while (0)
 
 #endif /* CONFIG_RISCV_ISA_V */
 
index 7c086ac..f3f031e 100644 (file)
@@ -9,6 +9,7 @@
 #define _ASM_RISCV_WORD_AT_A_TIME_H
 
 
+#include <asm/asm-extable.h>
 #include <linux/kernel.h>
 
 struct word_at_a_time {
@@ -45,4 +46,30 @@ static inline unsigned long find_zero(unsigned long mask)
 /* The mask we created is directly usable as a bytemask */
 #define zero_bytemask(mask) (mask)
 
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+       unsigned long ret;
+
+       /* Load word from unaligned pointer addr */
+       asm(
+       "1:     " REG_L " %0, %2\n"
+       "2:\n"
+       _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
+       : "=&r" (ret)
+       : "r" (addr), "m" (*(unsigned long *)addr));
+
+       return ret;
+}
+
+#endif /* CONFIG_DCACHE_WORD_ACCESS */
+
 #endif /* _ASM_RISCV_WORD_AT_A_TIME_H */
diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644 (file)
index 0000000..9601186
--- /dev/null
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_RISCV_ISA_V
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+#include <asm/asm-prototypes.h>
+
+static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
+                        const unsigned long *__restrict p2)
+{
+       kernel_vector_begin();
+       xor_regs_2_(bytes, p1, p2);
+       kernel_vector_end();
+}
+
+static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
+                        const unsigned long *__restrict p2,
+                        const unsigned long *__restrict p3)
+{
+       kernel_vector_begin();
+       xor_regs_3_(bytes, p1, p2, p3);
+       kernel_vector_end();
+}
+
+static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
+                        const unsigned long *__restrict p2,
+                        const unsigned long *__restrict p3,
+                        const unsigned long *__restrict p4)
+{
+       kernel_vector_begin();
+       xor_regs_4_(bytes, p1, p2, p3, p4);
+       kernel_vector_end();
+}
+
+static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
+                        const unsigned long *__restrict p2,
+                        const unsigned long *__restrict p3,
+                        const unsigned long *__restrict p4,
+                        const unsigned long *__restrict p5)
+{
+       kernel_vector_begin();
+       xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+       kernel_vector_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+       .name = "rvv",
+       .do_2 = xor_vector_2,
+       .do_3 = xor_vector_3,
+       .do_4 = xor_vector_4,
+       .do_5 = xor_vector_5
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES           \
+       do {        \
+               xor_speed(&xor_block_8regs);    \
+               xor_speed(&xor_block_32regs);    \
+               if (has_vector()) { \
+                       xor_speed(&xor_block_rvv);\
+               } \
+       } while (0)
+#endif
index c92c623..f719107 100644 (file)
@@ -64,6 +64,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
 obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
 obj-$(CONFIG_FPU)              += fpu.o
 obj-$(CONFIG_RISCV_ISA_V)      += vector.o
+obj-$(CONFIG_RISCV_ISA_V)      += kernel_mode_vector.o
 obj-$(CONFIG_SMP)              += smpboot.o
 obj-$(CONFIG_SMP)              += smp.o
 obj-$(CONFIG_SMP)              += cpu_ops.o
index e32591e..89920f8 100644 (file)
@@ -8,8 +8,10 @@
 
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
+#include <linux/cpu.h>
 #include <linux/cpuhotplug.h>
 #include <linux/ctype.h>
+#include <linux/jump_label.h>
 #include <linux/log2.h>
 #include <linux/memory.h>
 #include <linux/module.h>
@@ -44,6 +46,8 @@ struct riscv_isainfo hart_isa[NR_CPUS];
 /* Performance information */
 DEFINE_PER_CPU(long, misaligned_access_speed);
 
+static cpumask_t fast_misaligned_access;
+
 /**
  * riscv_isa_extension_base() - Get base extension word
  *
@@ -784,6 +788,16 @@ static int check_unaligned_access(void *param)
                (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
 
        per_cpu(misaligned_access_speed, cpu) = speed;
+
+       /*
+        * Set the value of fast_misaligned_access of a CPU. These operations
+        * are atomic to avoid race conditions.
+        */
+       if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
+               cpumask_set_cpu(cpu, &fast_misaligned_access);
+       else
+               cpumask_clear_cpu(cpu, &fast_misaligned_access);
+
        return 0;
 }
 
@@ -796,13 +810,69 @@ static void check_unaligned_access_nonboot_cpu(void *param)
                check_unaligned_access(pages[cpu]);
 }
 
+DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
+
+static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
+{
+       if (cpumask_weight(mask) == weight)
+               static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
+       else
+               static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
+}
+
+static void set_unaligned_access_static_branches_except_cpu(int cpu)
+{
+       /*
+        * Same as set_unaligned_access_static_branches, except excludes the
+        * given CPU from the result. When a CPU is hotplugged into an offline
+        * state, this function is called before the CPU is set to offline in
+        * the cpumask, and thus the CPU needs to be explicitly excluded.
+        */
+
+       cpumask_t fast_except_me;
+
+       cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
+       cpumask_clear_cpu(cpu, &fast_except_me);
+
+       modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
+}
+
+static void set_unaligned_access_static_branches(void)
+{
+       /*
+        * This will be called after check_unaligned_access_all_cpus so the
+        * result of unaligned access speed for all CPUs will be available.
+        *
+        * To avoid the number of online cpus changing between reading
+        * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
+        * held before calling this function.
+        */
+
+       cpumask_t fast_and_online;
+
+       cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
+
+       modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
+}
+
+static int lock_and_set_unaligned_access_static_branch(void)
+{
+       cpus_read_lock();
+       set_unaligned_access_static_branches();
+       cpus_read_unlock();
+
+       return 0;
+}
+
+arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
+
 static int riscv_online_cpu(unsigned int cpu)
 {
        static struct page *buf;
 
        /* We are already set since the last check */
        if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
-               return 0;
+               goto exit;
 
        buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
        if (!buf) {
@@ -812,6 +882,17 @@ static int riscv_online_cpu(unsigned int cpu)
 
        check_unaligned_access(buf);
        __free_pages(buf, MISALIGNED_BUFFER_ORDER);
+
+exit:
+       set_unaligned_access_static_branches();
+
+       return 0;
+}
+
+static int riscv_offline_cpu(unsigned int cpu)
+{
+       set_unaligned_access_static_branches_except_cpu(cpu);
+
        return 0;
 }
 
@@ -846,9 +927,12 @@ static int check_unaligned_access_all_cpus(void)
        /* Check core 0. */
        smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
 
-       /* Setup hotplug callback for any new CPUs that come online. */
+       /*
+        * Setup hotplug callbacks for any new CPUs that come online or go
+        * offline.
+        */
        cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
-                                 riscv_online_cpu, NULL);
+                                 riscv_online_cpu, riscv_offline_cpu);
 
 out:
        unaligned_emulation_finish();
index 54ca456..9d1a305 100644 (file)
@@ -83,6 +83,10 @@ SYM_CODE_START(handle_exception)
        /* Load the kernel shadow call stack pointer if coming from userspace */
        scs_load_current_if_task_changed s5
 
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+       move a0, sp
+       call riscv_v_context_nesting_start
+#endif
        move a0, sp /* pt_regs */
        la ra, ret_from_exception
 
@@ -138,6 +142,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
         */
        csrw CSR_SCRATCH, tp
 1:
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+       move a0, sp
+       call riscv_v_context_nesting_end
+#endif
        REG_L a0, PT_STATUS(sp)
        /*
         * The current load reservation is effectively part of the processor's
index 03a6434..f5aa24d 100644 (file)
@@ -178,32 +178,28 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+                      struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+       struct pt_regs *regs = arch_ftrace_get_regs(fregs);
+       unsigned long *parent = (unsigned long *)&regs->ra;
+
+       prepare_ftrace_return(parent, ip, frame_pointer(regs));
+}
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 extern void ftrace_graph_call(void);
-extern void ftrace_graph_regs_call(void);
 int ftrace_enable_ftrace_graph_caller(void)
 {
-       int ret;
-
-       ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
-                                   (unsigned long)&prepare_ftrace_return, true, true);
-       if (ret)
-               return ret;
-
-       return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+       return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
                                    (unsigned long)&prepare_ftrace_return, true, true);
 }
 
 int ftrace_disable_ftrace_graph_caller(void)
 {
-       int ret;
-
-       ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
-                                   (unsigned long)&prepare_ftrace_return, false, true);
-       if (ret)
-               return ret;
-
-       return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+       return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
                                    (unsigned long)&prepare_ftrace_return, false, true);
 }
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 #endif /* CONFIG_DYNAMIC_FTRACE */
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
new file mode 100644 (file)
index 0000000..6afe80c
--- /dev/null
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+#include <asm/simd.h>
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+#include <asm/asm-prototypes.h>
+#endif
+
+static inline void riscv_v_flags_set(u32 flags)
+{
+       WRITE_ONCE(current->thread.riscv_v_flags, flags);
+}
+
+static inline void riscv_v_start(u32 flags)
+{
+       int orig;
+
+       orig = riscv_v_flags();
+       BUG_ON((orig & flags) != 0);
+       riscv_v_flags_set(orig | flags);
+       barrier();
+}
+
+static inline void riscv_v_stop(u32 flags)
+{
+       int orig;
+
+       barrier();
+       orig = riscv_v_flags();
+       BUG_ON((orig & flags) == 0);
+       riscv_v_flags_set(orig & ~flags);
+}
+
+/*
+ * Claim ownership of the CPU vector context for use by the calling context.
+ *
+ * The caller may freely manipulate the vector context metadata until
+ * put_cpu_vector_context() is called.
+ */
+void get_cpu_vector_context(void)
+{
+       /*
+        * disable softirqs so it is impossible for softirqs to nest
+        * get_cpu_vector_context() when kernel is actively using Vector.
+        */
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_bh_disable();
+       else
+               preempt_disable();
+
+       riscv_v_start(RISCV_KERNEL_MODE_V);
+}
+
+/*
+ * Release the CPU vector context.
+ *
+ * Must be called from a context in which get_cpu_vector_context() was
+ * previously called, with no call to put_cpu_vector_context() in the
+ * meantime.
+ */
+void put_cpu_vector_context(void)
+{
+       riscv_v_stop(RISCV_KERNEL_MODE_V);
+
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_bh_enable();
+       else
+               preempt_enable();
+}
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static __always_inline u32 *riscv_v_flags_ptr(void)
+{
+       return &current->thread.riscv_v_flags;
+}
+
+static inline void riscv_preempt_v_set_dirty(void)
+{
+       *riscv_v_flags_ptr() |= RISCV_PREEMPT_V_DIRTY;
+}
+
+static inline void riscv_preempt_v_reset_flags(void)
+{
+       *riscv_v_flags_ptr() &= ~(RISCV_PREEMPT_V_DIRTY | RISCV_PREEMPT_V_NEED_RESTORE);
+}
+
+static inline void riscv_v_ctx_depth_inc(void)
+{
+       *riscv_v_flags_ptr() += RISCV_V_CTX_UNIT_DEPTH;
+}
+
+static inline void riscv_v_ctx_depth_dec(void)
+{
+       *riscv_v_flags_ptr() -= RISCV_V_CTX_UNIT_DEPTH;
+}
+
+static inline u32 riscv_v_ctx_get_depth(void)
+{
+       return *riscv_v_flags_ptr() & RISCV_V_CTX_DEPTH_MASK;
+}
+
+static int riscv_v_stop_kernel_context(void)
+{
+       if (riscv_v_ctx_get_depth() != 0 || !riscv_preempt_v_started(current))
+               return 1;
+
+       riscv_preempt_v_clear_dirty(current);
+       riscv_v_stop(RISCV_PREEMPT_V);
+       return 0;
+}
+
+static int riscv_v_start_kernel_context(bool *is_nested)
+{
+       struct __riscv_v_ext_state *kvstate, *uvstate;
+
+       kvstate = &current->thread.kernel_vstate;
+       if (!kvstate->datap)
+               return -ENOENT;
+
+       if (riscv_preempt_v_started(current)) {
+               WARN_ON(riscv_v_ctx_get_depth() == 0);
+               *is_nested = true;
+               get_cpu_vector_context();
+               if (riscv_preempt_v_dirty(current)) {
+                       __riscv_v_vstate_save(kvstate, kvstate->datap);
+                       riscv_preempt_v_clear_dirty(current);
+               }
+               riscv_preempt_v_set_restore(current);
+               return 0;
+       }
+
+       /* Transfer the ownership of V from user to kernel, then save */
+       riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY);
+       if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) {
+               uvstate = &current->thread.vstate;
+               __riscv_v_vstate_save(uvstate, uvstate->datap);
+       }
+       riscv_preempt_v_clear_dirty(current);
+       return 0;
+}
+
+/* low-level V context handling code, called with irq disabled */
+asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs)
+{
+       int depth;
+
+       if (!riscv_preempt_v_started(current))
+               return;
+
+       depth = riscv_v_ctx_get_depth();
+       if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY)
+               riscv_preempt_v_set_dirty();
+
+       riscv_v_ctx_depth_inc();
+}
+
+asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs)
+{
+       struct __riscv_v_ext_state *vstate = &current->thread.kernel_vstate;
+       u32 depth;
+
+       WARN_ON(!irqs_disabled());
+
+       if (!riscv_preempt_v_started(current))
+               return;
+
+       riscv_v_ctx_depth_dec();
+       depth = riscv_v_ctx_get_depth();
+       if (depth == 0) {
+               if (riscv_preempt_v_restore(current)) {
+                       __riscv_v_vstate_restore(vstate, vstate->datap);
+                       __riscv_v_vstate_clean(regs);
+                       riscv_preempt_v_reset_flags();
+               }
+       }
+}
+#else
+#define riscv_v_start_kernel_context(nested)   (-ENOENT)
+#define riscv_v_stop_kernel_context()          (-ENOENT)
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
+/*
+ * kernel_vector_begin(): obtain the CPU vector registers for use by the calling
+ * context
+ *
+ * Must not be called unless may_use_simd() returns true.
+ * Task context in the vector registers is saved back to memory as necessary.
+ *
+ * A matching call to kernel_vector_end() must be made before returning from the
+ * calling context.
+ *
+ * The caller may freely use the vector registers until kernel_vector_end() is
+ * called.
+ */
+void kernel_vector_begin(void)
+{
+       bool nested = false;
+
+       if (WARN_ON(!has_vector()))
+               return;
+
+       BUG_ON(!may_use_simd());
+
+       if (riscv_v_start_kernel_context(&nested)) {
+               get_cpu_vector_context();
+               riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
+       }
+
+       if (!nested)
+               riscv_v_vstate_set_restore(current, task_pt_regs(current));
+
+       riscv_v_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_vector_begin);
+
+/*
+ * kernel_vector_end(): give the CPU vector registers back to the current task
+ *
+ * Must be called from a context in which kernel_vector_begin() was previously
+ * called, with no call to kernel_vector_end() in the meantime.
+ *
+ * The caller must not use the vector registers after this function is called,
+ * unless kernel_vector_begin() is called again in the meantime.
+ */
+void kernel_vector_end(void)
+{
+       if (WARN_ON(!has_vector()))
+               return;
+
+       riscv_v_disable();
+
+       if (riscv_v_stop_kernel_context())
+               put_cpu_vector_context();
+}
+EXPORT_SYMBOL_GPL(kernel_vector_end);
index 79dc812..b756128 100644 (file)
        .endm
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-       .macro SAVE_ALL
+
+/**
+* SAVE_ABI_REGS - save regs against the pt_regs struct
+*
+* @all: tell if saving all the regs
+*
+* If all is set, all the regs will be saved, otherwise only ABI
+* related regs (a0-a7,epc,ra and optional s0) will be saved.
+*
+* After the stack is established,
+*
+* 0(sp) stores the PC of the traced function which can be accessed
+* by &(fregs)->regs->epc in tracing function. Note that the real
+* function entry address should be computed with -FENTRY_RA_OFFSET.
+*
+* 8(sp) stores the function return address (i.e. parent IP) that
+* can be accessed by &(fregs)->regs->ra in tracing function.
+*
+* The other regs are saved at the respective localtion and accessed
+* by the respective pt_regs member.
+*
+* Here is the layout of stack for your reference.
+*
+* PT_SIZE_ON_STACK  ->  +++++++++
+*                       + ..... +
+*                       + t3-t6 +
+*                       + s2-s11+
+*                       + a0-a7 + --++++-> ftrace_caller saved
+*                       + s1    +   +
+*                       + s0    + --+
+*                       + t0-t2 +   +
+*                       + tp    +   +
+*                       + gp    +   +
+*                       + sp    +   +
+*                       + ra    + --+ // parent IP
+*               sp  ->  + epc   + --+ // PC
+*                       +++++++++
+**/
+       .macro SAVE_ABI_REGS, all=0
        addi    sp, sp, -PT_SIZE_ON_STACK
 
-       REG_S t0,  PT_EPC(sp)
-       REG_S x1,  PT_RA(sp)
-       REG_S x2,  PT_SP(sp)
-       REG_S x3,  PT_GP(sp)
-       REG_S x4,  PT_TP(sp)
-       REG_S x5,  PT_T0(sp)
-       save_from_x6_to_x31
+       REG_S   t0,  PT_EPC(sp)
+       REG_S   x1,  PT_RA(sp)
+
+       // save the ABI regs
+
+       REG_S   x10, PT_A0(sp)
+       REG_S   x11, PT_A1(sp)
+       REG_S   x12, PT_A2(sp)
+       REG_S   x13, PT_A3(sp)
+       REG_S   x14, PT_A4(sp)
+       REG_S   x15, PT_A5(sp)
+       REG_S   x16, PT_A6(sp)
+       REG_S   x17, PT_A7(sp)
+
+       // save the leftover regs
+
+       .if \all == 1
+       REG_S   x2, PT_SP(sp)
+       REG_S   x3, PT_GP(sp)
+       REG_S   x4, PT_TP(sp)
+       REG_S   x5, PT_T0(sp)
+       REG_S   x6, PT_T1(sp)
+       REG_S   x7, PT_T2(sp)
+       REG_S   x8, PT_S0(sp)
+       REG_S   x9, PT_S1(sp)
+       REG_S   x18, PT_S2(sp)
+       REG_S   x19, PT_S3(sp)
+       REG_S   x20, PT_S4(sp)
+       REG_S   x21, PT_S5(sp)
+       REG_S   x22, PT_S6(sp)
+       REG_S   x23, PT_S7(sp)
+       REG_S   x24, PT_S8(sp)
+       REG_S   x25, PT_S9(sp)
+       REG_S   x26, PT_S10(sp)
+       REG_S   x27, PT_S11(sp)
+       REG_S   x28, PT_T3(sp)
+       REG_S   x29, PT_T4(sp)
+       REG_S   x30, PT_T5(sp)
+       REG_S   x31, PT_T6(sp)
+
+       // save s0 if FP_TEST defined
+
+       .else
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+       REG_S   x8, PT_S0(sp)
+#endif
+       .endif
        .endm
 
-       .macro RESTORE_ALL
-       REG_L x1,  PT_RA(sp)
-       REG_L x2,  PT_SP(sp)
-       REG_L x3,  PT_GP(sp)
-       REG_L x4,  PT_TP(sp)
-       /* Restore t0 with PT_EPC */
-       REG_L x5,  PT_EPC(sp)
-       restore_from_x6_to_x31
+       .macro RESTORE_ABI_REGS, all=0
+       REG_L   t0, PT_EPC(sp)
+       REG_L   x1, PT_RA(sp)
+       REG_L   x10, PT_A0(sp)
+       REG_L   x11, PT_A1(sp)
+       REG_L   x12, PT_A2(sp)
+       REG_L   x13, PT_A3(sp)
+       REG_L   x14, PT_A4(sp)
+       REG_L   x15, PT_A5(sp)
+       REG_L   x16, PT_A6(sp)
+       REG_L   x17, PT_A7(sp)
 
+       .if \all == 1
+       REG_L   x2, PT_SP(sp)
+       REG_L   x3, PT_GP(sp)
+       REG_L   x4, PT_TP(sp)
+       REG_L   x6, PT_T1(sp)
+       REG_L   x7, PT_T2(sp)
+       REG_L   x8, PT_S0(sp)
+       REG_L   x9, PT_S1(sp)
+       REG_L   x18, PT_S2(sp)
+       REG_L   x19, PT_S3(sp)
+       REG_L   x20, PT_S4(sp)
+       REG_L   x21, PT_S5(sp)
+       REG_L   x22, PT_S6(sp)
+       REG_L   x23, PT_S7(sp)
+       REG_L   x24, PT_S8(sp)
+       REG_L   x25, PT_S9(sp)
+       REG_L   x26, PT_S10(sp)
+       REG_L   x27, PT_S11(sp)
+       REG_L   x28, PT_T3(sp)
+       REG_L   x29, PT_T4(sp)
+       REG_L   x30, PT_T5(sp)
+       REG_L   x31, PT_T6(sp)
+
+       .else
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+       REG_L   x8, PT_S0(sp)
+#endif
+       .endif
        addi    sp, sp, PT_SIZE_ON_STACK
        .endm
+
+       .macro PREPARE_ARGS
+       addi    a0, t0, -FENTRY_RA_OFFSET
+       la      a1, function_trace_op
+       REG_L   a2, 0(a1)
+       mv      a1, ra
+       mv      a3, sp
+       .endm
+
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 
+#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 SYM_FUNC_START(ftrace_caller)
        SAVE_ABI
 
@@ -105,34 +224,39 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
        call    ftrace_stub
 #endif
        RESTORE_ABI
-       jr t0
+       jr      t0
 SYM_FUNC_END(ftrace_caller)
 
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 SYM_FUNC_START(ftrace_regs_caller)
-       SAVE_ALL
-
-       addi    a0, t0, -FENTRY_RA_OFFSET
-       la      a1, function_trace_op
-       REG_L   a2, 0(a1)
-       mv      a1, ra
-       mv      a3, sp
+       mv      t1, zero
+       SAVE_ABI_REGS 1
+       PREPARE_ARGS
 
 SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
        call    ftrace_stub
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       addi    a0, sp, PT_RA
-       REG_L   a1, PT_EPC(sp)
-       addi    a1, a1, -FENTRY_RA_OFFSET
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-       mv      a2, s0
-#endif
-SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL)
+       RESTORE_ABI_REGS 1
+       bnez    t1, .Ldirect
+       jr      t0
+.Ldirect:
+       jr      t1
+SYM_FUNC_END(ftrace_regs_caller)
+
+SYM_FUNC_START(ftrace_caller)
+       SAVE_ABI_REGS 0
+       PREPARE_ARGS
+
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
        call    ftrace_stub
-#endif
 
-       RESTORE_ALL
-       jr t0
-SYM_FUNC_END(ftrace_regs_caller)
+       RESTORE_ABI_REGS 0
+       jr      t0
+SYM_FUNC_END(ftrace_caller)
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+SYM_CODE_START(ftrace_stub_direct_tramp)
+       jr      t0
+SYM_CODE_END(ftrace_stub_direct_tramp)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
index 862834b..5e5a826 100644 (file)
@@ -723,8 +723,8 @@ static int add_relocation_to_accumulate(struct module *me, int type,
 
                        if (!bucket) {
                                kfree(entry);
-                               kfree(rel_head);
                                kfree(rel_head->rel_entry);
+                               kfree(rel_head);
                                return -ENOMEM;
                        }
 
@@ -747,6 +747,10 @@ initialize_relocation_hashtable(unsigned int num_relocations,
 {
        /* Can safely assume that bits is not greater than sizeof(long) */
        unsigned long hashtable_size = roundup_pow_of_two(num_relocations);
+       /*
+        * When hashtable_size == 1, hashtable_bits == 0.
+        * This is valid because the hashing algorithm returns 0 in this case.
+        */
        unsigned int hashtable_bits = ilog2(hashtable_size);
 
        /*
@@ -760,10 +764,10 @@ initialize_relocation_hashtable(unsigned int num_relocations,
        hashtable_size <<= should_double_size;
 
        *relocation_hashtable = kmalloc_array(hashtable_size,
-                                             sizeof(*relocation_hashtable),
+                                             sizeof(**relocation_hashtable),
                                              GFP_KERNEL);
        if (!*relocation_hashtable)
-               return -ENOMEM;
+               return 0;
 
        __hash_init(*relocation_hashtable, hashtable_size);
 
@@ -779,6 +783,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
        Elf_Sym *sym;
        void *location;
        unsigned int i, type;
+       unsigned int j_idx = 0;
        Elf_Addr v;
        int res;
        unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel);
@@ -789,8 +794,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
        hashtable_bits = initialize_relocation_hashtable(num_relocations,
                                                         &relocation_hashtable);
 
-       if (hashtable_bits < 0)
-               return hashtable_bits;
+       if (!relocation_hashtable)
+               return -ENOMEM;
 
        INIT_LIST_HEAD(&used_buckets_list);
 
@@ -829,9 +834,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
                v = sym->st_value + rel[i].r_addend;
 
                if (type == R_RISCV_PCREL_LO12_I || type == R_RISCV_PCREL_LO12_S) {
-                       unsigned int j;
+                       unsigned int j = j_idx;
+                       bool found = false;
 
-                       for (j = 0; j < sechdrs[relsec].sh_size / sizeof(*rel); j++) {
+                       do {
                                unsigned long hi20_loc =
                                        sechdrs[sechdrs[relsec].sh_info].sh_addr
                                        + rel[j].r_offset;
@@ -860,16 +866,26 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
                                        hi20 = (offset + 0x800) & 0xfffff000;
                                        lo12 = offset - hi20;
                                        v = lo12;
+                                       found = true;
 
                                        break;
                                }
-                       }
-                       if (j == sechdrs[relsec].sh_size / sizeof(*rel)) {
+
+                               j++;
+                               if (j > sechdrs[relsec].sh_size / sizeof(*rel))
+                                       j = 0;
+
+                       } while (j_idx != j);
+
+                       if (!found) {
                                pr_err(
                                  "%s: Can not find HI20 relocation information\n",
                                  me->name);
                                return -EINVAL;
                        }
+
+                       /* Record the previous j-loop end index */
+                       j_idx = j;
                }
 
                if (reloc_handlers[type].accumulate_handler)
index 68e786c..f6d4ded 100644 (file)
@@ -38,8 +38,7 @@ static char *get_early_cmdline(uintptr_t dtb_pa)
        if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) ||
            IS_ENABLED(CONFIG_CMDLINE_FORCE) ||
            fdt_cmdline_size == 0 /* CONFIG_CMDLINE_FALLBACK */) {
-               strncat(early_cmdline, CONFIG_CMDLINE,
-                       COMMAND_LINE_SIZE - fdt_cmdline_size);
+               strlcat(early_cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
        }
 
        return early_cmdline;
index 4f21d97..92922db 100644 (file)
@@ -171,6 +171,7 @@ void flush_thread(void)
        riscv_v_vstate_off(task_pt_regs(current));
        kfree(current->thread.vstate.datap);
        memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+       clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE);
 #endif
 }
 
@@ -178,7 +179,7 @@ void arch_release_task_struct(struct task_struct *tsk)
 {
        /* Free the vector context of datap. */
        if (has_vector())
-               kfree(tsk->thread.vstate.datap);
+               riscv_v_thread_free(tsk);
 }
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
@@ -187,6 +188,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
        *dst = *src;
        /* clear entire V context, including datap for a new task */
        memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+       memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state));
+       clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE);
 
        return 0;
 }
@@ -221,7 +224,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
                childregs->a0 = 0; /* Return value of fork() */
                p->thread.s[0] = 0;
        }
+       p->thread.riscv_v_flags = 0;
+       if (has_vector())
+               riscv_v_thread_alloc(p);
        p->thread.ra = (unsigned long)ret_from_fork;
        p->thread.sp = (unsigned long)childregs; /* kernel sp */
        return 0;
 }
+
+void __init arch_task_cache_init(void)
+{
+       riscv_v_setup_ctx_cache();
+}
index 2afe460..e8515aa 100644 (file)
@@ -99,8 +99,11 @@ static int riscv_vr_get(struct task_struct *target,
         * Ensure the vector registers have been saved to the memory before
         * copying them to membuf.
         */
-       if (target == current)
-               riscv_v_vstate_save(current, task_pt_regs(current));
+       if (target == current) {
+               get_cpu_vector_context();
+               riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
+               put_cpu_vector_context();
+       }
 
        ptrace_vstate.vstart = vstate->vstart;
        ptrace_vstate.vl = vstate->vl;
index 5a62ed1..e66e099 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/bits.h>
 #include <linux/init.h>
+#include <linux/mm.h>
 #include <linux/pm.h>
 #include <linux/reboot.h>
 #include <asm/sbi.h>
@@ -571,6 +572,66 @@ long sbi_get_mimpid(void)
 }
 EXPORT_SYMBOL_GPL(sbi_get_mimpid);
 
+bool sbi_debug_console_available;
+
+int sbi_debug_console_write(const char *bytes, unsigned int num_bytes)
+{
+       phys_addr_t base_addr;
+       struct sbiret ret;
+
+       if (!sbi_debug_console_available)
+               return -EOPNOTSUPP;
+
+       if (is_vmalloc_addr(bytes))
+               base_addr = page_to_phys(vmalloc_to_page(bytes)) +
+                           offset_in_page(bytes);
+       else
+               base_addr = __pa(bytes);
+       if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes))
+               num_bytes = PAGE_SIZE - offset_in_page(bytes);
+
+       if (IS_ENABLED(CONFIG_32BIT))
+               ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
+                               num_bytes, lower_32_bits(base_addr),
+                               upper_32_bits(base_addr), 0, 0, 0);
+       else
+               ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
+                               num_bytes, base_addr, 0, 0, 0, 0);
+
+       if (ret.error == SBI_ERR_FAILURE)
+               return -EIO;
+       return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value;
+}
+
+int sbi_debug_console_read(char *bytes, unsigned int num_bytes)
+{
+       phys_addr_t base_addr;
+       struct sbiret ret;
+
+       if (!sbi_debug_console_available)
+               return -EOPNOTSUPP;
+
+       if (is_vmalloc_addr(bytes))
+               base_addr = page_to_phys(vmalloc_to_page(bytes)) +
+                           offset_in_page(bytes);
+       else
+               base_addr = __pa(bytes);
+       if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes))
+               num_bytes = PAGE_SIZE - offset_in_page(bytes);
+
+       if (IS_ENABLED(CONFIG_32BIT))
+               ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ,
+                               num_bytes, lower_32_bits(base_addr),
+                               upper_32_bits(base_addr), 0, 0, 0);
+       else
+               ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ,
+                               num_bytes, base_addr, 0, 0, 0, 0);
+
+       if (ret.error == SBI_ERR_FAILURE)
+               return -EIO;
+       return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value;
+}
+
 void __init sbi_init(void)
 {
        int ret;
@@ -612,6 +673,11 @@ void __init sbi_init(void)
                        sbi_srst_reboot_nb.priority = 192;
                        register_restart_handler(&sbi_srst_reboot_nb);
                }
+               if ((sbi_spec_version >= sbi_mk_version(2, 0)) &&
+                   (sbi_probe_extension(SBI_EXT_DBCN) > 0)) {
+                       pr_info("SBI DBCN extension detected\n");
+                       sbi_debug_console_available = true;
+               }
        } else {
                __sbi_set_timer = __sbi_set_timer_v01;
                __sbi_send_ipi  = __sbi_send_ipi_v01;
index 33dfb50..501e66d 100644 (file)
@@ -86,7 +86,10 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec)
        /* datap is designed to be 16 byte aligned for better performance */
        WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16)));
 
-       riscv_v_vstate_save(current, regs);
+       get_cpu_vector_context();
+       riscv_v_vstate_save(&current->thread.vstate, regs);
+       put_cpu_vector_context();
+
        /* Copy everything of vstate but datap. */
        err = __copy_to_user(&state->v_state, &current->thread.vstate,
                             offsetof(struct __riscv_v_ext_state, datap));
@@ -134,7 +137,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
        if (unlikely(err))
                return err;
 
-       riscv_v_vstate_restore(current, regs);
+       riscv_v_vstate_set_restore(current, regs);
 
        return err;
 }
index 3c89b8e..2395093 100644 (file)
@@ -4,8 +4,12 @@
  * Copyright (c) 2022 Ventana Micro Systems Inc.
  */
 
+#define pr_fmt(fmt) "suspend: " fmt
+
 #include <linux/ftrace.h>
+#include <linux/suspend.h>
 #include <asm/csr.h>
+#include <asm/sbi.h>
 #include <asm/suspend.h>
 
 void suspend_save_csrs(struct suspend_context *context)
@@ -85,3 +89,43 @@ int cpu_suspend(unsigned long arg,
 
        return rc;
 }
+
+#ifdef CONFIG_RISCV_SBI
+static int sbi_system_suspend(unsigned long sleep_type,
+                             unsigned long resume_addr,
+                             unsigned long opaque)
+{
+       struct sbiret ret;
+
+       ret = sbi_ecall(SBI_EXT_SUSP, SBI_EXT_SUSP_SYSTEM_SUSPEND,
+                       sleep_type, resume_addr, opaque, 0, 0, 0);
+       if (ret.error)
+               return sbi_err_map_linux_errno(ret.error);
+
+       return ret.value;
+}
+
+static int sbi_system_suspend_enter(suspend_state_t state)
+{
+       return cpu_suspend(SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM, sbi_system_suspend);
+}
+
+static const struct platform_suspend_ops sbi_system_suspend_ops = {
+       .valid = suspend_valid_only_mem,
+       .enter = sbi_system_suspend_enter,
+};
+
+static int __init sbi_system_suspend_init(void)
+{
+       if (sbi_spec_version >= sbi_mk_version(2, 0) &&
+           sbi_probe_extension(SBI_EXT_SUSP) > 0) {
+               pr_info("SBI SUSP extension detected\n");
+               if (IS_ENABLED(CONFIG_SUSPEND))
+                       suspend_set_ops(&sbi_system_suspend_ops);
+       }
+
+       return 0;
+}
+
+arch_initcall(sbi_system_suspend_init);
+#endif /* CONFIG_RISCV_SBI */
index 578b629..6727d1d 100644 (file)
 #include <asm/bug.h>
 
 static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE);
+static struct kmem_cache *riscv_v_user_cachep;
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static struct kmem_cache *riscv_v_kernel_cachep;
+#endif
 
 unsigned long riscv_v_vsize __read_mostly;
 EXPORT_SYMBOL_GPL(riscv_v_vsize);
@@ -47,6 +51,21 @@ int riscv_v_setup_vsize(void)
        return 0;
 }
 
+void __init riscv_v_setup_ctx_cache(void)
+{
+       if (!has_vector())
+               return;
+
+       riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx",
+                                                        riscv_v_vsize, 16, SLAB_PANIC,
+                                                        0, riscv_v_vsize, NULL);
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+       riscv_v_kernel_cachep = kmem_cache_create("riscv_vector_kctx",
+                                                 riscv_v_vsize, 16,
+                                                 SLAB_PANIC, NULL);
+#endif
+}
+
 static bool insn_is_vector(u32 insn_buf)
 {
        u32 opcode = insn_buf & __INSN_OPCODE_MASK;
@@ -80,20 +99,37 @@ static bool insn_is_vector(u32 insn_buf)
        return false;
 }
 
-static int riscv_v_thread_zalloc(void)
+static int riscv_v_thread_zalloc(struct kmem_cache *cache,
+                                struct __riscv_v_ext_state *ctx)
 {
        void *datap;
 
-       datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
+       datap = kmem_cache_zalloc(cache, GFP_KERNEL);
        if (!datap)
                return -ENOMEM;
 
-       current->thread.vstate.datap = datap;
-       memset(&current->thread.vstate, 0, offsetof(struct __riscv_v_ext_state,
-                                                   datap));
+       ctx->datap = datap;
+       memset(ctx, 0, offsetof(struct __riscv_v_ext_state, datap));
        return 0;
 }
 
+void riscv_v_thread_alloc(struct task_struct *tsk)
+{
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+       riscv_v_thread_zalloc(riscv_v_kernel_cachep, &tsk->thread.kernel_vstate);
+#endif
+}
+
+void riscv_v_thread_free(struct task_struct *tsk)
+{
+       if (tsk->thread.vstate.datap)
+               kmem_cache_free(riscv_v_user_cachep, tsk->thread.vstate.datap);
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+       if (tsk->thread.kernel_vstate.datap)
+               kmem_cache_free(riscv_v_kernel_cachep, tsk->thread.kernel_vstate.datap);
+#endif
+}
+
 #define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)
 #define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2)
 #define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK)
@@ -122,7 +158,8 @@ static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt,
        ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt);
        if (inherit)
                ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
-       tsk->thread.vstate_ctrl = ctrl;
+       tsk->thread.vstate_ctrl &= ~PR_RISCV_V_VSTATE_CTRL_MASK;
+       tsk->thread.vstate_ctrl |= ctrl;
 }
 
 bool riscv_v_vstate_ctrl_user_allowed(void)
@@ -162,12 +199,12 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
         * context where VS has been off. So, try to allocate the user's V
         * context and resume execution.
         */
-       if (riscv_v_thread_zalloc()) {
+       if (riscv_v_thread_zalloc(riscv_v_user_cachep, &current->thread.vstate)) {
                force_sig(SIGBUS);
                return true;
        }
        riscv_v_vstate_on(regs);
-       riscv_v_vstate_restore(current, regs);
+       riscv_v_vstate_set_restore(current, regs);
        return true;
 }
 
index 26cb250..bd6e6c1 100644 (file)
@@ -6,8 +6,14 @@ lib-y                  += memmove.o
 lib-y                  += strcmp.o
 lib-y                  += strlen.o
 lib-y                  += strncmp.o
+lib-y                  += csum.o
+ifeq ($(CONFIG_MMU), y)
+lib-$(CONFIG_RISCV_ISA_V)      += uaccess_vector.o
+endif
 lib-$(CONFIG_MMU)      += uaccess.o
 lib-$(CONFIG_64BIT)    += tishift.o
 lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
 
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+lib-$(CONFIG_RISCV_ISA_V)      += xor.o
+lib-$(CONFIG_RISCV_ISA_V)      += riscv_v_helpers.o
diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c
new file mode 100644 (file)
index 0000000..af3df52
--- /dev/null
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Checksum library
+ *
+ * Influenced by arch/arm64/lib/csum.c
+ * Copyright (C) 2023 Rivos Inc.
+ */
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/jump_label.h>
+#include <linux/kasan-checks.h>
+#include <linux/kernel.h>
+
+#include <asm/cpufeature.h>
+
+#include <net/checksum.h>
+
+/* Default version is sufficient for 32 bit */
+#ifndef CONFIG_32BIT
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+                       const struct in6_addr *daddr,
+                       __u32 len, __u8 proto, __wsum csum)
+{
+       unsigned int ulen, uproto;
+       unsigned long sum = (__force unsigned long)csum;
+
+       sum += (__force unsigned long)saddr->s6_addr32[0];
+       sum += (__force unsigned long)saddr->s6_addr32[1];
+       sum += (__force unsigned long)saddr->s6_addr32[2];
+       sum += (__force unsigned long)saddr->s6_addr32[3];
+
+       sum += (__force unsigned long)daddr->s6_addr32[0];
+       sum += (__force unsigned long)daddr->s6_addr32[1];
+       sum += (__force unsigned long)daddr->s6_addr32[2];
+       sum += (__force unsigned long)daddr->s6_addr32[3];
+
+       ulen = (__force unsigned int)htonl((unsigned int)len);
+       sum += ulen;
+
+       uproto = (__force unsigned int)htonl(proto);
+       sum += uproto;
+
+       /*
+        * Zbb support saves 4 instructions, so not worth checking without
+        * alternatives if supported
+        */
+       if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
+           IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+               unsigned long fold_temp;
+
+               /*
+                * Zbb is likely available when the kernel is compiled with Zbb
+                * support, so nop when Zbb is available and jump when Zbb is
+                * not available.
+                */
+               asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+                                             RISCV_ISA_EXT_ZBB, 1)
+                                 :
+                                 :
+                                 :
+                                 : no_zbb);
+               asm(".option push                                       \n\
+               .option arch,+zbb                                       \n\
+                       rori    %[fold_temp], %[sum], 32                \n\
+                       add     %[sum], %[fold_temp], %[sum]            \n\
+                       srli    %[sum], %[sum], 32                      \n\
+                       not     %[fold_temp], %[sum]                    \n\
+                       roriw   %[sum], %[sum], 16                      \n\
+                       subw    %[sum], %[fold_temp], %[sum]            \n\
+               .option pop"
+               : [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp));
+               return (__force __sum16)(sum >> 16);
+       }
+no_zbb:
+       sum += ror64(sum, 32);
+       sum >>= 32;
+       return csum_fold((__force __wsum)sum);
+}
+EXPORT_SYMBOL(csum_ipv6_magic);
+#endif /* !CONFIG_32BIT */
+
+#ifdef CONFIG_32BIT
+#define OFFSET_MASK 3
+#elif CONFIG_64BIT
+#define OFFSET_MASK 7
+#endif
+
+static inline __no_sanitize_address unsigned long
+do_csum_common(const unsigned long *ptr, const unsigned long *end,
+              unsigned long data)
+{
+       unsigned int shift;
+       unsigned long csum = 0, carry = 0;
+
+       /*
+        * Do 32-bit reads on RV32 and 64-bit reads otherwise. This should be
+        * faster than doing 32-bit reads on architectures that support larger
+        * reads.
+        */
+       while (ptr < end) {
+               csum += data;
+               carry += csum < data;
+               data = *(ptr++);
+       }
+
+       /*
+        * Perform alignment (and over-read) bytes on the tail if any bytes
+        * leftover.
+        */
+       shift = ((long)ptr - (long)end) * 8;
+#ifdef __LITTLE_ENDIAN
+       data = (data << shift) >> shift;
+#else
+       data = (data >> shift) << shift;
+#endif
+       csum += data;
+       carry += csum < data;
+       csum += carry;
+       csum += csum < carry;
+
+       return csum;
+}
+
+/*
+ * Algorithm accounts for buff being misaligned.
+ * If buff is not aligned, will over-read bytes but not use the bytes that it
+ * shouldn't. The same thing will occur on the tail-end of the read.
+ */
+static inline __no_sanitize_address unsigned int
+do_csum_with_alignment(const unsigned char *buff, int len)
+{
+       unsigned int offset, shift;
+       unsigned long csum, data;
+       const unsigned long *ptr, *end;
+
+       /*
+        * Align address to closest word (double word on rv64) that comes before
+        * buff. This should always be in the same page and cache line.
+        * Directly call KASAN with the alignment we will be using.
+        */
+       offset = (unsigned long)buff & OFFSET_MASK;
+       kasan_check_read(buff, len);
+       ptr = (const unsigned long *)(buff - offset);
+
+       /*
+        * Clear the most significant bytes that were over-read if buff was not
+        * aligned.
+        */
+       shift = offset * 8;
+       data = *(ptr++);
+#ifdef __LITTLE_ENDIAN
+       data = (data >> shift) << shift;
+#else
+       data = (data << shift) >> shift;
+#endif
+       end = (const unsigned long *)(buff + len);
+       csum = do_csum_common(ptr, end, data);
+
+#ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
+       /*
+        * Zbb support saves 6 instructions, so not worth checking without
+        * alternatives if supported
+        */
+       if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
+           IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+               unsigned long fold_temp;
+
+               /*
+                * Zbb is likely available when the kernel is compiled with Zbb
+                * support, so nop when Zbb is available and jump when Zbb is
+                * not available.
+                */
+               asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+                                             RISCV_ISA_EXT_ZBB, 1)
+                                 :
+                                 :
+                                 :
+                                 : no_zbb);
+
+#ifdef CONFIG_32BIT
+               asm_volatile_goto(".option push                 \n\
+               .option arch,+zbb                               \n\
+                       rori    %[fold_temp], %[csum], 16       \n\
+                       andi    %[offset], %[offset], 1         \n\
+                       add     %[csum], %[fold_temp], %[csum]  \n\
+                       beq     %[offset], zero, %l[end]        \n\
+                       rev8    %[csum], %[csum]                \n\
+               .option pop"
+                       : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
+                       : [offset] "r" (offset)
+                       :
+                       : end);
+
+               return (unsigned short)csum;
+#else /* !CONFIG_32BIT */
+               asm_volatile_goto(".option push                 \n\
+               .option arch,+zbb                               \n\
+                       rori    %[fold_temp], %[csum], 32       \n\
+                       add     %[csum], %[fold_temp], %[csum]  \n\
+                       srli    %[csum], %[csum], 32            \n\
+                       roriw   %[fold_temp], %[csum], 16       \n\
+                       addw    %[csum], %[fold_temp], %[csum]  \n\
+                       andi    %[offset], %[offset], 1         \n\
+                       beq     %[offset], zero, %l[end]        \n\
+                       rev8    %[csum], %[csum]                \n\
+               .option pop"
+                       : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
+                       : [offset] "r" (offset)
+                       :
+                       : end);
+
+               return (csum << 16) >> 48;
+#endif /* !CONFIG_32BIT */
+end:
+               return csum >> 16;
+       }
+no_zbb:
+#endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */
+#ifndef CONFIG_32BIT
+       csum += ror64(csum, 32);
+       csum >>= 32;
+#endif
+       csum = (u32)csum + ror32((u32)csum, 16);
+       if (offset & 1)
+               return (u16)swab32(csum);
+       return csum >> 16;
+}
+
+/*
+ * Does not perform alignment, should only be used if machine has fast
+ * misaligned accesses, or when buff is known to be aligned.
+ */
+static inline __no_sanitize_address unsigned int
+do_csum_no_alignment(const unsigned char *buff, int len)
+{
+       unsigned long csum, data;
+       const unsigned long *ptr, *end;
+
+       ptr = (const unsigned long *)(buff);
+       data = *(ptr++);
+
+       kasan_check_read(buff, len);
+
+       end = (const unsigned long *)(buff + len);
+       csum = do_csum_common(ptr, end, data);
+
+       /*
+        * Zbb support saves 6 instructions, so not worth checking without
+        * alternatives if supported
+        */
+       if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
+           IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+               unsigned long fold_temp;
+
+               /*
+                * Zbb is likely available when the kernel is compiled with Zbb
+                * support, so nop when Zbb is available and jump when Zbb is
+                * not available.
+                */
+               asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+                                             RISCV_ISA_EXT_ZBB, 1)
+                                 :
+                                 :
+                                 :
+                                 : no_zbb);
+
+#ifdef CONFIG_32BIT
+               asm (".option push                              \n\
+               .option arch,+zbb                               \n\
+                       rori    %[fold_temp], %[csum], 16       \n\
+                       add     %[csum], %[fold_temp], %[csum]  \n\
+               .option pop"
+                       : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
+                       :
+                       : );
+
+#else /* !CONFIG_32BIT */
+               asm (".option push                              \n\
+               .option arch,+zbb                               \n\
+                       rori    %[fold_temp], %[csum], 32       \n\
+                       add     %[csum], %[fold_temp], %[csum]  \n\
+                       srli    %[csum], %[csum], 32            \n\
+                       roriw   %[fold_temp], %[csum], 16       \n\
+                       addw    %[csum], %[fold_temp], %[csum]  \n\
+               .option pop"
+                       : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
+                       :
+                       : );
+#endif /* !CONFIG_32BIT */
+               return csum >> 16;
+       }
+no_zbb:
+#ifndef CONFIG_32BIT
+       csum += ror64(csum, 32);
+       csum >>= 32;
+#endif
+       csum = (u32)csum + ror32((u32)csum, 16);
+       return csum >> 16;
+}
+
+/*
+ * Perform a checksum on an arbitrary memory address.
+ * Will do a light-weight address alignment if buff is misaligned, unless
+ * cpu supports fast misaligned accesses.
+ */
+unsigned int do_csum(const unsigned char *buff, int len)
+{
+       if (unlikely(len <= 0))
+               return 0;
+
+       /*
+        * Significant performance gains can be seen by not doing alignment
+        * on machines with fast misaligned accesses.
+        *
+        * There is some duplicate code between the "with_alignment" and
+        * "no_alignment" implmentations, but the overlap is too awkward to be
+        * able to fit in one function without introducing multiple static
+        * branches. The largest chunk of overlap was delegated into the
+        * do_csum_common function.
+        */
+       if (static_branch_likely(&fast_misaligned_access_speed_key))
+               return do_csum_no_alignment(buff, len);
+
+       if (((unsigned long)buff & OFFSET_MASK) == 0)
+               return do_csum_no_alignment(buff, len);
+
+       return do_csum_with_alignment(buff, len);
+}
diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
new file mode 100644 (file)
index 0000000..be38a93
--- /dev/null
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 SiFive
+ * Author: Andy Chiu <andy.chiu@sifive.com>
+ */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#include <asm/vector.h>
+#include <asm/simd.h>
+
+#ifdef CONFIG_MMU
+#include <asm/asm-prototypes.h>
+#endif
+
+#ifdef CONFIG_MMU
+size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD;
+int __asm_vector_usercopy(void *dst, void *src, size_t n);
+int fallback_scalar_usercopy(void *dst, void *src, size_t n);
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
+{
+       size_t remain, copied;
+
+       /* skip has_vector() check because it has been done by the asm  */
+       if (!may_use_simd())
+               goto fallback;
+
+       kernel_vector_begin();
+       remain = __asm_vector_usercopy(dst, src, n);
+       kernel_vector_end();
+
+       if (remain) {
+               copied = n - remain;
+               dst += copied;
+               src += copied;
+               n = remain;
+               goto fallback;
+       }
+
+       return remain;
+
+fallback:
+       return fallback_scalar_usercopy(dst, src, n);
+}
+#endif
index a9d356d..bc22c07 100644 (file)
@@ -3,6 +3,8 @@
 #include <asm/asm.h>
 #include <asm/asm-extable.h>
 #include <asm/csr.h>
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
 
        .macro fixup op reg addr lbl
 100:
        .endm
 
 SYM_FUNC_START(__asm_copy_to_user)
+#ifdef CONFIG_RISCV_ISA_V
+       ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V)
+       REG_L   t0, riscv_v_usercopy_threshold
+       bltu    a2, t0, fallback_scalar_usercopy
+       tail enter_vector_usercopy
+#endif
+SYM_FUNC_START(fallback_scalar_usercopy)
 
        /* Enable access to user memory */
        li t6, SR_SUM
@@ -181,6 +190,7 @@ SYM_FUNC_START(__asm_copy_to_user)
        sub a0, t5, a0
        ret
 SYM_FUNC_END(__asm_copy_to_user)
+SYM_FUNC_END(fallback_scalar_usercopy)
 EXPORT_SYMBOL(__asm_copy_to_user)
 SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
 EXPORT_SYMBOL(__asm_copy_from_user)
diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
new file mode 100644 (file)
index 0000000..51ab558
--- /dev/null
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+#include <asm/asm-extable.h>
+#include <asm/csr.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+       .macro fixup op reg addr lbl
+100:
+       \op \reg, \addr
+       _asm_extable    100b, \lbl
+       .endm
+
+SYM_FUNC_START(__asm_vector_usercopy)
+       /* Enable access to user memory */
+       li      t6, SR_SUM
+       csrs    CSR_STATUS, t6
+
+loop:
+       vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+       fixup vle8.v vData, (pSrc), 10f
+       sub iNum, iNum, iVL
+       add pSrc, pSrc, iVL
+       fixup vse8.v vData, (pDst), 11f
+       add pDst, pDst, iVL
+       bnez iNum, loop
+
+       /* Exception fixup for vector load is shared with normal exit */
+10:
+       /* Disable access to user memory */
+       csrc    CSR_STATUS, t6
+       mv      a0, iNum
+       ret
+
+       /* Exception fixup code for vector store. */
+11:
+       /* Undo the subtraction after vle8.v */
+       add     iNum, iNum, iVL
+       /* Make sure the scalar fallback skip already processed bytes */
+       csrr    t2, CSR_VSTART
+       sub     iNum, iNum, t2
+       j       10b
+SYM_FUNC_END(__asm_vector_usercopy)
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644 (file)
index 0000000..b28f243
--- /dev/null
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/linkage.h>
+#include <linux/export.h>
+#include <asm/asm.h>
+
+SYM_FUNC_START(xor_regs_2_)
+       vsetvli a3, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a3
+       vxor.vv v16, v0, v8
+       add a2, a2, a3
+       vse8.v v16, (a1)
+       add a1, a1, a3
+       bnez a0, xor_regs_2_
+       ret
+SYM_FUNC_END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+SYM_FUNC_START(xor_regs_3_)
+       vsetvli a4, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a4
+       vxor.vv v0, v0, v8
+       vle8.v v16, (a3)
+       add a2, a2, a4
+       vxor.vv v16, v0, v16
+       add a3, a3, a4
+       vse8.v v16, (a1)
+       add a1, a1, a4
+       bnez a0, xor_regs_3_
+       ret
+SYM_FUNC_END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+SYM_FUNC_START(xor_regs_4_)
+       vsetvli a5, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a5
+       vxor.vv v0, v0, v8
+       vle8.v v16, (a3)
+       add a2, a2, a5
+       vxor.vv v0, v0, v16
+       vle8.v v24, (a4)
+       add a3, a3, a5
+       vxor.vv v16, v0, v24
+       add a4, a4, a5
+       vse8.v v16, (a1)
+       add a1, a1, a5
+       bnez a0, xor_regs_4_
+       ret
+SYM_FUNC_END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+SYM_FUNC_START(xor_regs_5_)
+       vsetvli a6, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a6
+       vxor.vv v0, v0, v8
+       vle8.v v16, (a3)
+       add a2, a2, a6
+       vxor.vv v0, v0, v16
+       vle8.v v24, (a4)
+       add a3, a3, a6
+       vxor.vv v0, v0, v24
+       vle8.v v8, (a5)
+       add a4, a4, a6
+       vxor.vv v16, v0, v8
+       add a5, a5, a6
+       vse8.v v16, (a1)
+       add a1, a1, a6
+       bnez a0, xor_regs_5_
+       ret
+SYM_FUNC_END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)
index 35484d8..dd1530a 100644 (file)
@@ -27,6 +27,14 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex,
        return true;
 }
 
+static inline unsigned long regs_get_gpr(struct pt_regs *regs, unsigned int offset)
+{
+       if (unlikely(!offset || offset > MAX_REG_OFFSET))
+               return 0;
+
+       return *(unsigned long *)((unsigned long)regs + offset);
+}
+
 static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
                                unsigned long val)
 {
@@ -50,6 +58,27 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
        return true;
 }
 
+static bool
+ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
+                                 struct pt_regs *regs)
+{
+       int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
+       int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
+       unsigned long data, addr, offset;
+
+       addr = regs_get_gpr(regs, reg_addr * sizeof(unsigned long));
+
+       offset = addr & 0x7UL;
+       addr &= ~0x7UL;
+
+       data = *(unsigned long *)addr >> (offset * 8);
+
+       regs_set_gpr(regs, reg_data * sizeof(unsigned long), data);
+
+       regs->epc = get_ex_fixup(ex);
+       return true;
+}
+
 bool fixup_exception(struct pt_regs *regs)
 {
        const struct exception_table_entry *ex;
@@ -65,6 +94,8 @@ bool fixup_exception(struct pt_regs *regs)
                return ex_handler_bpf(ex, regs);
        case EX_TYPE_UACCESS_ERR_ZERO:
                return ex_handler_uaccess_err_zero(ex, regs);
+       case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
+               return ex_handler_load_unaligned_zeropad(ex, regs);
        }
 
        BUG();
index a659373..32cad6a 100644 (file)
@@ -1060,7 +1060,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
        kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset;
 
 #ifdef CONFIG_XIP_KERNEL
+#ifdef CONFIG_64BIT
        kernel_map.page_offset = PAGE_OFFSET_L3;
+#else
+       kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
+#endif
        kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
        kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
 
@@ -1387,10 +1391,29 @@ void __init misc_mem_init(void)
 }
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+                              unsigned long addr, unsigned long next)
+{
+       pmd_set_huge(pmd, virt_to_phys(p), PAGE_KERNEL);
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
+                               unsigned long addr, unsigned long next)
+{
+       vmemmap_verify((pte_t *)pmdp, node, addr, next);
+       return 1;
+}
+
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
                               struct vmem_altmap *altmap)
 {
-       return vmemmap_populate_basepages(start, end, node, NULL);
+       /*
+        * Note that SPARSEMEM_VMEMMAP is only selected for rv64 and that we
+        * can't use hugepage mappings for 2-level page table because in case of
+        * memory hotplug, we are not able to update all the page tables with
+        * the new PMDs.
+        */
+       return vmemmap_populate_hugepages(start, end, node, NULL);
 }
 #endif
 
index 8aadc5f..8d12b26 100644 (file)
@@ -98,29 +98,23 @@ static void __ipi_flush_tlb_range_asid(void *info)
        local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
 }
 
-static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
-                             unsigned long size, unsigned long stride)
+static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid,
+                             unsigned long start, unsigned long size,
+                             unsigned long stride)
 {
        struct flush_tlb_range_data ftd;
-       const struct cpumask *cmask;
-       unsigned long asid = FLUSH_TLB_NO_ASID;
        bool broadcast;
 
-       if (mm) {
-               unsigned int cpuid;
+       if (cpumask_empty(cmask))
+               return;
 
-               cmask = mm_cpumask(mm);
-               if (cpumask_empty(cmask))
-                       return;
+       if (cmask != cpu_online_mask) {
+               unsigned int cpuid;
 
                cpuid = get_cpu();
                /* check if the tlbflush needs to be sent to other CPUs */
                broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
-
-               if (static_branch_unlikely(&use_asid_allocator))
-                       asid = atomic_long_read(&mm->context.id) & asid_mask;
        } else {
-               cmask = cpu_online_mask;
                broadcast = true;
        }
 
@@ -140,25 +134,34 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
                local_flush_tlb_range_asid(start, size, stride, asid);
        }
 
-       if (mm)
+       if (cmask != cpu_online_mask)
                put_cpu();
 }
 
+static inline unsigned long get_mm_asid(struct mm_struct *mm)
+{
+       return static_branch_unlikely(&use_asid_allocator) ?
+                       atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID;
+}
+
 void flush_tlb_mm(struct mm_struct *mm)
 {
-       __flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+       __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm),
+                         0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
 }
 
 void flush_tlb_mm_range(struct mm_struct *mm,
                        unsigned long start, unsigned long end,
                        unsigned int page_size)
 {
-       __flush_tlb_range(mm, start, end - start, page_size);
+       __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm),
+                         start, end - start, page_size);
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 {
-       __flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE);
+       __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+                         addr, PAGE_SIZE, PAGE_SIZE);
 }
 
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
@@ -190,18 +193,44 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
                }
        }
 
-       __flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
+       __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+                         start, end - start, stride_size);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-       __flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
+       __flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID,
+                         start, end - start, PAGE_SIZE);
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
                        unsigned long end)
 {
-       __flush_tlb_range(vma->vm_mm, start, end - start, PMD_SIZE);
+       __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+                         start, end - start, PMD_SIZE);
 }
 #endif
+
+bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+       return true;
+}
+
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+                              struct mm_struct *mm,
+                              unsigned long uaddr)
+{
+       cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+}
+
+void arch_flush_tlb_batched_pending(struct mm_struct *mm)
+{
+       flush_tlb_mm(mm);
+}
+
+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+       __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
+                         FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+}
index 4f9264d..6e05c5c 100644 (file)
@@ -108,7 +108,7 @@ config HVC_DCC_SERIALIZE_SMP
 
 config HVC_RISCV_SBI
        bool "RISC-V SBI console support"
-       depends on RISCV_SBI_V01
+       depends on RISCV_SBI
        select HVC_DRIVER
        help
          This enables support for console output via RISC-V SBI calls, which
index a725912..cede8a5 100644 (file)
@@ -40,21 +40,44 @@ static ssize_t hvc_sbi_tty_get(uint32_t vtermno, u8 *buf, size_t count)
        return i;
 }
 
-static const struct hv_ops hvc_sbi_ops = {
+static const struct hv_ops hvc_sbi_v01_ops = {
        .get_chars = hvc_sbi_tty_get,
        .put_chars = hvc_sbi_tty_put,
 };
 
-static int __init hvc_sbi_init(void)
+static ssize_t hvc_sbi_dbcn_tty_put(uint32_t vtermno, const u8 *buf, size_t count)
 {
-       return PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_ops, 16));
+       return sbi_debug_console_write(buf, count);
 }
-device_initcall(hvc_sbi_init);
 
-static int __init hvc_sbi_console_init(void)
+static ssize_t hvc_sbi_dbcn_tty_get(uint32_t vtermno, u8 *buf, size_t count)
 {
-       hvc_instantiate(0, 0, &hvc_sbi_ops);
+       return sbi_debug_console_read(buf, count);
+}
+
+static const struct hv_ops hvc_sbi_dbcn_ops = {
+       .put_chars = hvc_sbi_dbcn_tty_put,
+       .get_chars = hvc_sbi_dbcn_tty_get,
+};
+
+static int __init hvc_sbi_init(void)
+{
+       int err;
+
+       if (sbi_debug_console_available) {
+               err = PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_dbcn_ops, 256));
+               if (err)
+                       return err;
+               hvc_instantiate(0, 0, &hvc_sbi_dbcn_ops);
+       } else if (IS_ENABLED(CONFIG_RISCV_SBI_V01)) {
+               err = PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_v01_ops, 256));
+               if (err)
+                       return err;
+               hvc_instantiate(0, 0, &hvc_sbi_v01_ops);
+       } else {
+               return -ENODEV;
+       }
 
        return 0;
 }
-console_initcall(hvc_sbi_console_init);
+device_initcall(hvc_sbi_init);
index 8b1f575..ffcf488 100644 (file)
@@ -87,7 +87,7 @@ config SERIAL_EARLYCON_SEMIHOST
 
 config SERIAL_EARLYCON_RISCV_SBI
        bool "Early console using RISC-V SBI"
-       depends on RISCV_SBI_V01
+       depends on RISCV_SBI
        select SERIAL_CORE
        select SERIAL_CORE_CONSOLE
        select SERIAL_EARLYCON
index 27afb0b..0162155 100644 (file)
@@ -15,17 +15,38 @@ static void sbi_putc(struct uart_port *port, unsigned char c)
        sbi_console_putchar(c);
 }
 
-static void sbi_console_write(struct console *con,
-                             const char *s, unsigned n)
+static void sbi_0_1_console_write(struct console *con,
+                                 const char *s, unsigned int n)
 {
        struct earlycon_device *dev = con->data;
        uart_console_write(&dev->port, s, n, sbi_putc);
 }
 
+static void sbi_dbcn_console_write(struct console *con,
+                                  const char *s, unsigned int n)
+{
+       int ret;
+
+       while (n) {
+               ret = sbi_debug_console_write(s, n);
+               if (ret < 0)
+                       break;
+
+               s += ret;
+               n -= ret;
+       }
+}
+
 static int __init early_sbi_setup(struct earlycon_device *device,
                                  const char *opt)
 {
-       device->con->write = sbi_console_write;
+       if (sbi_debug_console_available)
+               device->con->write = sbi_dbcn_console_write;
+       else if (IS_ENABLED(CONFIG_RISCV_SBI_V01))
+               device->con->write = sbi_0_1_console_write;
+       else
+               return -ENODEV;
+
        return 0;
 }
 EARLYCON_DECLARE(sbi, early_sbi_setup);
index 43e18db..ad928cc 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef __ASM_GENERIC_CHECKSUM_H
 #define __ASM_GENERIC_CHECKSUM_H
 
+#include <linux/bitops.h>
+
 /*
  * computes the checksum of a memory block at buff, length len,
  * and adds in "sum" (32-bit)
@@ -31,9 +33,7 @@ extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
 static inline __sum16 csum_fold(__wsum csum)
 {
        u32 sum = (__force u32)csum;
-       sum = (sum & 0xffff) + (sum >> 16);
-       sum = (sum & 0xffff) + (sum >> 16);
-       return (__force __sum16)~sum;
+       return (__force __sum16)((~sum - ror32(sum, 16)) >> 16);
 }
 #endif
 
index ba25129..975a07f 100644 (file)
@@ -231,9 +231,10 @@ config DEBUG_INFO
          in the "Debug information" choice below, indicating that debug
          information will be generated for build targets.
 
-# Clang is known to generate .{s,u}leb128 with symbol deltas with DWARF5, which
-# some targets may not support: https://sourceware.org/bugzilla/show_bug.cgi?id=27215
-config AS_HAS_NON_CONST_LEB128
+# Clang generates .uleb128 with label differences for DWARF v5, a feature that
+# older binutils ports do not support when utilizing RISC-V style linker
+# relaxation: https://sourceware.org/bugzilla/show_bug.cgi?id=27215
+config AS_HAS_NON_CONST_ULEB128
        def_bool $(as-instr,.uleb128 .Lexpr_end4 - .Lexpr_start3\n.Lexpr_start3:\n.Lexpr_end4:)
 
 choice
@@ -258,7 +259,7 @@ config DEBUG_INFO_NONE
 config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT
        bool "Rely on the toolchain's implicit default DWARF version"
        select DEBUG_INFO
-       depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128)
+       depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128)
        help
          The implicit default version of DWARF debug info produced by a
          toolchain changes over time.
@@ -282,7 +283,8 @@ config DEBUG_INFO_DWARF4
 config DEBUG_INFO_DWARF5
        bool "Generate DWARF Version 5 debuginfo"
        select DEBUG_INFO
-       depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128)
+       depends on !ARCH_HAS_BROKEN_DWARF5
+       depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128)
        help
          Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc
          5.0+ accepts the -gdwarf-5 flag but only had partial support for some
index 0eed92b..225bb77 100644 (file)
@@ -1,15 +1,21 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- * Test cases csum_partial and csum_fold
+ * Test cases csum_partial, csum_fold, ip_fast_csum, csum_ipv6_magic
  */
 
 #include <kunit/test.h>
 #include <asm/checksum.h>
+#include <net/ip6_checksum.h>
 
 #define MAX_LEN 512
 #define MAX_ALIGN 64
 #define TEST_BUFLEN (MAX_LEN + MAX_ALIGN)
 
+#define IPv4_MIN_WORDS 5
+#define IPv4_MAX_WORDS 15
+#define NUM_IPv6_TESTS 200
+#define NUM_IP_FAST_CSUM_TESTS 181
+
 /* Values for a little endian CPU. Byte swap each half on big endian CPU. */
 static const u32 random_init_sum = 0x2847aab;
 static const u8 random_buf[] = {
@@ -209,6 +215,237 @@ static const u32 init_sums_no_overflow[] = {
        0xffff0000, 0xfffffffb,
 };
 
+static const __sum16 expected_csum_ipv6_magic[] = {
+       0x18d4, 0x3085, 0x2e4b, 0xd9f4, 0xbdc8, 0x78f,  0x1034, 0x8422, 0x6fc0,
+       0xd2f6, 0xbeb5, 0x9d3,  0x7e2a, 0x312e, 0x778e, 0xc1bb, 0x7cf2, 0x9d1e,
+       0xca21, 0xf3ff, 0x7569, 0xb02e, 0xca86, 0x7e76, 0x4539, 0x45e3, 0xf28d,
+       0xdf81, 0x8fd5, 0x3b5d, 0x8324, 0xf471, 0x83be, 0x1daf, 0x8c46, 0xe682,
+       0xd1fb, 0x6b2e, 0xe687, 0x2a33, 0x4833, 0x2d67, 0x660f, 0x2e79, 0xd65e,
+       0x6b62, 0x6672, 0x5dbd, 0x8680, 0xbaa5, 0x2229, 0x2125, 0x2d01, 0x1cc0,
+       0x6d36, 0x33c0, 0xee36, 0xd832, 0x9820, 0x8a31, 0x53c5, 0x2e2,  0xdb0e,
+       0x49ed, 0x17a7, 0x77a0, 0xd72e, 0x3d72, 0x7dc8, 0x5b17, 0xf55d, 0xa4d9,
+       0x1446, 0x5d56, 0x6b2e, 0x69a5, 0xadb6, 0xff2a, 0x92e,  0xe044, 0x3402,
+       0xbb60, 0xec7f, 0xe7e6, 0x1986, 0x32f4, 0x8f8,  0x5e00, 0x47c6, 0x3059,
+       0x3969, 0xe957, 0x4388, 0x2854, 0x3334, 0xea71, 0xa6de, 0x33f9, 0x83fc,
+       0x37b4, 0x5531, 0x3404, 0x1010, 0xed30, 0x610a, 0xc95,  0x9aed, 0x6ff,
+       0x5136, 0x2741, 0x660e, 0x8b80, 0xf71,  0xa263, 0x88af, 0x7a73, 0x3c37,
+       0x1908, 0x6db5, 0x2e92, 0x1cd2, 0x70c8, 0xee16, 0xe80,  0xcd55, 0x6e6,
+       0x6434, 0x127,  0x655d, 0x2ea0, 0xb4f4, 0xdc20, 0x5671, 0xe462, 0xe52b,
+       0xdb44, 0x3589, 0xc48f, 0xe60b, 0xd2d2, 0x66ad, 0x498,  0x436,  0xb917,
+       0xf0ca, 0x1a6e, 0x1cb7, 0xbf61, 0x2870, 0xc7e8, 0x5b30, 0xe4a5, 0x168,
+       0xadfc, 0xd035, 0xe690, 0xe283, 0xfb27, 0xe4ad, 0xb1a5, 0xf2d5, 0xc4b6,
+       0x8a30, 0xd7d5, 0x7df9, 0x91d5, 0x63ed, 0x2d21, 0x312b, 0xab19, 0xa632,
+       0x8d2e, 0xef06, 0x57b9, 0xc373, 0xbd1f, 0xa41f, 0x8444, 0x9975, 0x90cb,
+       0xc49c, 0xe965, 0x4eff, 0x5a,   0xef6d, 0xe81a, 0xe260, 0x853a, 0xff7a,
+       0x99aa, 0xb06b, 0xee19, 0xcc2c, 0xf34c, 0x7c49, 0xdac3, 0xa71e, 0xc988,
+       0x3845, 0x1014
+};
+
+static const __sum16 expected_fast_csum[] = {
+       0xda83, 0x45da, 0x4f46, 0x4e4f, 0x34e,  0xe902, 0xa5e9, 0x87a5, 0x7187,
+       0x5671, 0xf556, 0x6df5, 0x816d, 0x8f81, 0xbb8f, 0xfbba, 0x5afb, 0xbe5a,
+       0xedbe, 0xabee, 0x6aac, 0xe6b,  0xea0d, 0x67ea, 0x7e68, 0x8a7e, 0x6f8a,
+       0x3a70, 0x9f3a, 0xe89e, 0x75e8, 0x7976, 0xfa79, 0x2cfa, 0x3c2c, 0x463c,
+       0x7146, 0x7a71, 0x547a, 0xfd53, 0x99fc, 0xb699, 0x92b6, 0xdb91, 0xe8da,
+       0x5fe9, 0x1e60, 0xae1d, 0x39ae, 0xf439, 0xa1f4, 0xdda1, 0xede,  0x790f,
+       0x579,  0x1206, 0x9012, 0x2490, 0xd224, 0x5cd2, 0xa65d, 0xca7,  0x220d,
+       0xf922, 0xbf9,  0x920b, 0x1b92, 0x361c, 0x2e36, 0x4d2e, 0x24d,  0x2,
+       0xcfff, 0x90cf, 0xa591, 0x93a5, 0x7993, 0x9579, 0xc894, 0x50c8, 0x5f50,
+       0xd55e, 0xcad5, 0xf3c9, 0x8f4,  0x4409, 0x5043, 0x5b50, 0x55b,  0x2205,
+       0x1e22, 0x801e, 0x3780, 0xe137, 0x7ee0, 0xf67d, 0x3cf6, 0xa53c, 0x2ea5,
+       0x472e, 0x5147, 0xcf51, 0x1bcf, 0x951c, 0x1e95, 0xc71e, 0xe4c7, 0xc3e4,
+       0x3dc3, 0xee3d, 0xa4ed, 0xf9a4, 0xcbf8, 0x75cb, 0xb375, 0x50b4, 0x3551,
+       0xf835, 0x19f8, 0x8c1a, 0x538c, 0xad52, 0xa3ac, 0xb0a3, 0x5cb0, 0x6c5c,
+       0x5b6c, 0xc05a, 0x92c0, 0x4792, 0xbe47, 0x53be, 0x1554, 0x5715, 0x4b57,
+       0xe54a, 0x20e5, 0x21,   0xd500, 0xa1d4, 0xa8a1, 0x57a9, 0xca57, 0x5ca,
+       0x1c06, 0x4f1c, 0xe24e, 0xd9e2, 0xf0d9, 0x4af1, 0x474b, 0x8146, 0xe81,
+       0xfd0e, 0x84fd, 0x7c85, 0xba7c, 0x17ba, 0x4a17, 0x964a, 0xf595, 0xff5,
+       0x5310, 0x3253, 0x6432, 0x4263, 0x2242, 0xe121, 0x32e1, 0xf632, 0xc5f5,
+       0x21c6, 0x7d22, 0x8e7c, 0x418e, 0x5641, 0x3156, 0x7c31, 0x737c, 0x373,
+       0x2503, 0xc22a, 0x3c2,  0x4a04, 0x8549, 0x5285, 0xa352, 0xe8a3, 0x6fe8,
+       0x1a6f, 0x211a, 0xe021, 0x38e0, 0x7638, 0xf575, 0x9df5, 0x169e, 0xf116,
+       0x23f1, 0xcd23, 0xece,  0x660f, 0x4866, 0x6a48, 0x716a, 0xee71, 0xa2ee,
+       0xb8a2, 0x61b9, 0xa361, 0xf7a2, 0x26f7, 0x1127, 0x6611, 0xe065, 0x36e0,
+       0x1837, 0x3018, 0x1c30, 0x721b, 0x3e71, 0xe43d, 0x99e4, 0x9e9a, 0xb79d,
+       0xa9b7, 0xcaa,  0xeb0c, 0x4eb,  0x1305, 0x8813, 0xb687, 0xa9b6, 0xfba9,
+       0xd7fb, 0xccd8, 0x2ecd, 0x652f, 0xae65, 0x3fae, 0x3a40, 0x563a, 0x7556,
+       0x2776, 0x1228, 0xef12, 0xf9ee, 0xcef9, 0x56cf, 0xa956, 0x24a9, 0xba24,
+       0x5fba, 0x665f, 0xf465, 0x8ff4, 0x6d8f, 0x346d, 0x5f34, 0x385f, 0xd137,
+       0xb8d0, 0xacb8, 0x55ac, 0x7455, 0xe874, 0x89e8, 0xd189, 0xa0d1, 0xb2a0,
+       0xb8b2, 0x36b8, 0x5636, 0xd355, 0x8d3,  0x1908, 0x2118, 0xc21,  0x990c,
+       0x8b99, 0x158c, 0x7815, 0x9e78, 0x6f9e, 0x4470, 0x1d44, 0x341d, 0x2634,
+       0x3f26, 0x793e, 0xc79,  0xcc0b, 0x26cc, 0xd126, 0x1fd1, 0xb41f, 0xb6b4,
+       0x22b7, 0xa122, 0xa1,   0x7f01, 0x837e, 0x3b83, 0xaf3b, 0x6fae, 0x916f,
+       0xb490, 0xffb3, 0xceff, 0x50cf, 0x7550, 0x7275, 0x1272, 0x2613, 0xaa26,
+       0xd5aa, 0x7d5,  0x9607, 0x96,   0xb100, 0xf8b0, 0x4bf8, 0xdd4c, 0xeddd,
+       0x98ed, 0x2599, 0x9325, 0xeb92, 0x8feb, 0xcc8f, 0x2acd, 0x392b, 0x3b39,
+       0xcb3b, 0x6acb, 0xd46a, 0xb8d4, 0x6ab8, 0x106a, 0x2f10, 0x892f, 0x789,
+       0xc806, 0x45c8, 0x7445, 0x3c74, 0x3a3c, 0xcf39, 0xd7ce, 0x58d8, 0x6e58,
+       0x336e, 0x1034, 0xee10, 0xe9ed, 0xc2e9, 0x3fc2, 0xd53e, 0xd2d4, 0xead2,
+       0x8fea, 0x2190, 0x1162, 0xbe11, 0x8cbe, 0x6d8c, 0xfb6c, 0x6dfb, 0xd36e,
+       0x3ad3, 0xf3a,  0x870e, 0xc287, 0x53c3, 0xc54,  0x5b0c, 0x7d5a, 0x797d,
+       0xec79, 0x5dec, 0x4d5e, 0x184e, 0xd618, 0x60d6, 0xb360, 0x98b3, 0xf298,
+       0xb1f2, 0x69b1, 0xf969, 0xef9,  0xab0e, 0x21ab, 0xe321, 0x24e3, 0x8224,
+       0x5481, 0x5954, 0x7a59, 0xff7a, 0x7dff, 0x1a7d, 0xa51a, 0x46a5, 0x6b47,
+       0xe6b,  0x830e, 0xa083, 0xff9f, 0xd0ff, 0xffd0, 0xe6ff, 0x7de7, 0xc67d,
+       0xd0c6, 0x61d1, 0x3a62, 0xc3b,  0x150c, 0x1715, 0x4517, 0x5345, 0x3954,
+       0xdd39, 0xdadd, 0x32db, 0x6a33, 0xd169, 0x86d1, 0xb687, 0x3fb6, 0x883f,
+       0xa487, 0x39a4, 0x2139, 0xbe20, 0xffbe, 0xedfe, 0x8ded, 0x368e, 0xc335,
+       0x51c3, 0x9851, 0xf297, 0xd6f2, 0xb9d6, 0x95ba, 0x2096, 0xea1f, 0x76e9,
+       0x4e76, 0xe04d, 0xd0df, 0x80d0, 0xa280, 0xfca2, 0x75fc, 0xef75, 0x32ef,
+       0x6833, 0xdf68, 0xc4df, 0x76c4, 0xb77,  0xb10a, 0xbfb1, 0x58bf, 0x5258,
+       0x4d52, 0x6c4d, 0x7e6c, 0xb67e, 0xccb5, 0x8ccc, 0xbe8c, 0xc8bd, 0x9ac8,
+       0xa99b, 0x52a9, 0x2f53, 0xc30,  0x3e0c, 0xb83d, 0x83b7, 0x5383, 0x7e53,
+       0x4f7e, 0xe24e, 0xb3e1, 0x8db3, 0x618e, 0xc861, 0xfcc8, 0x34fc, 0x9b35,
+       0xaa9b, 0xb1aa, 0x5eb1, 0x395e, 0x8639, 0xd486, 0x8bd4, 0x558b, 0x2156,
+       0xf721, 0x4ef6, 0x14f,  0x7301, 0xdd72, 0x49de, 0x894a, 0x9889, 0x8898,
+       0x7788, 0x7b77, 0x637b, 0xb963, 0xabb9, 0x7cab, 0xc87b, 0x21c8, 0xcb21,
+       0xdfca, 0xbfdf, 0xf2bf, 0x6af2, 0x626b, 0xb261, 0x3cb2, 0xc63c, 0xc9c6,
+       0xc9c9, 0xb4c9, 0xf9b4, 0x91f9, 0x4091, 0x3a40, 0xcc39, 0xd1cb, 0x7ed1,
+       0x537f, 0x6753, 0xa167, 0xba49, 0x88ba, 0x7789, 0x3877, 0xf037, 0xd3ef,
+       0xb5d4, 0x55b6, 0xa555, 0xeca4, 0xa1ec, 0xb6a2, 0x7b7,  0x9507, 0xfd94,
+       0x82fd, 0x5c83, 0x765c, 0x9676, 0x3f97, 0xda3f, 0x6fda, 0x646f, 0x3064,
+       0x5e30, 0x655e, 0x6465, 0xcb64, 0xcdca, 0x4ccd, 0x3f4c, 0x243f, 0x6f24,
+       0x656f, 0x6065, 0x3560, 0x3b36, 0xac3b, 0x4aac, 0x714a, 0x7e71, 0xda7e,
+       0x7fda, 0xda7f, 0x6fda, 0xff6f, 0xc6ff, 0xedc6, 0xd4ed, 0x70d5, 0xeb70,
+       0xa3eb, 0x80a3, 0xca80, 0x3fcb, 0x2540, 0xf825, 0x7ef8, 0xf87e, 0x73f8,
+       0xb474, 0xb4b4, 0x92b5, 0x9293, 0x93,   0x3500, 0x7134, 0x9071, 0xfa8f,
+       0x51fa, 0x1452, 0xba13, 0x7ab9, 0x957a, 0x8a95, 0x6e8a, 0x6d6e, 0x7c6d,
+       0x447c, 0x9744, 0x4597, 0x8945, 0xef88, 0x8fee, 0x3190, 0x4831, 0x8447,
+       0xa183, 0x1da1, 0xd41d, 0x2dd4, 0x4f2e, 0xc94e, 0xcbc9, 0xc9cb, 0x9ec9,
+       0x319e, 0xd531, 0x20d5, 0x4021, 0xb23f, 0x29b2, 0xd828, 0xecd8, 0x5ded,
+       0xfc5d, 0x4dfc, 0xd24d, 0x6bd2, 0x5f6b, 0xb35e, 0x7fb3, 0xee7e, 0x56ee,
+       0xa657, 0x68a6, 0x8768, 0x7787, 0xb077, 0x4cb1, 0x764c, 0xb175, 0x7b1,
+       0x3d07, 0x603d, 0x3560, 0x3e35, 0xb03d, 0xd6b0, 0xc8d6, 0xd8c8, 0x8bd8,
+       0x3e8c, 0x303f, 0xd530, 0xf1d4, 0x42f1, 0xca42, 0xddca, 0x41dd, 0x3141,
+       0x132,  0xe901, 0x8e9,  0xbe09, 0xe0bd, 0x2ce0, 0x862d, 0x3986, 0x9139,
+       0x6d91, 0x6a6d, 0x8d6a, 0x1b8d, 0xac1b, 0xedab, 0x54ed, 0xc054, 0xcebf,
+       0xc1ce, 0x5c2,  0x3805, 0x6038, 0x5960, 0xd359, 0xdd3,  0xbe0d, 0xafbd,
+       0x6daf, 0x206d, 0x2c20, 0x862c, 0x8e86, 0xec8d, 0xa2ec, 0xa3a2, 0x51a3,
+       0x8051, 0xfd7f, 0x91fd, 0xa292, 0xaf14, 0xeeae, 0x59ef, 0x535a, 0x8653,
+       0x3986, 0x9539, 0xb895, 0xa0b8, 0x26a0, 0x2227, 0xc022, 0x77c0, 0xad77,
+       0x46ad, 0xaa46, 0x60aa, 0x8560, 0x4785, 0xd747, 0x45d7, 0x2346, 0x5f23,
+       0x25f,  0x1d02, 0x71d,  0x8206, 0xc82,  0x180c, 0x3018, 0x4b30, 0x4b,
+       0x3001, 0x1230, 0x2d12, 0x8c2d, 0x148d, 0x4015, 0x5f3f, 0x3d5f, 0x6b3d,
+       0x396b, 0x473a, 0xf746, 0x44f7, 0x8945, 0x3489, 0xcb34, 0x84ca, 0xd984,
+       0xf0d9, 0xbcf0, 0x63bd, 0x3264, 0xf332, 0x45f3, 0x7346, 0x5673, 0xb056,
+       0xd3b0, 0x4ad4, 0x184b, 0x7d18, 0x6c7d, 0xbb6c, 0xfeba, 0xe0fe, 0x10e1,
+       0x5410, 0x2954, 0x9f28, 0x3a9f, 0x5a3a, 0xdb59, 0xbdc,  0xb40b, 0x1ab4,
+       0x131b, 0x5d12, 0x6d5c, 0xe16c, 0xb0e0, 0x89b0, 0xba88, 0xbb,   0x3c01,
+       0xe13b, 0x6fe1, 0x446f, 0xa344, 0x81a3, 0xfe81, 0xc7fd, 0x38c8, 0xb38,
+       0x1a0b, 0x6d19, 0xf36c, 0x47f3, 0x6d48, 0xb76d, 0xd3b7, 0xd8d2, 0x52d9,
+       0x4b53, 0xa54a, 0x34a5, 0xc534, 0x9bc4, 0xed9b, 0xbeed, 0x3ebe, 0x233e,
+       0x9f22, 0x4a9f, 0x774b, 0x4577, 0xa545, 0x64a5, 0xb65,  0x870b, 0x487,
+       0x9204, 0x5f91, 0xd55f, 0x35d5, 0x1a35, 0x71a,  0x7a07, 0x4e7a, 0xfc4e,
+       0x1efc, 0x481f, 0x7448, 0xde74, 0xa7dd, 0x1ea7, 0xaa1e, 0xcfaa, 0xfbcf,
+       0xedfb, 0x6eee, 0x386f, 0x4538, 0x6e45, 0xd96d, 0x11d9, 0x7912, 0x4b79,
+       0x494b, 0x6049, 0xac5f, 0x65ac, 0x1366, 0x5913, 0xe458, 0x7ae4, 0x387a,
+       0x3c38, 0xb03c, 0x76b0, 0x9376, 0xe193, 0x42e1, 0x7742, 0x6476, 0x3564,
+       0x3c35, 0x6a3c, 0xcc69, 0x94cc, 0x5d95, 0xe5e,  0xee0d, 0x4ced, 0xce4c,
+       0x52ce, 0xaa52, 0xdaaa, 0xe4da, 0x1de5, 0x4530, 0x5445, 0x3954, 0xb639,
+       0x81b6, 0x7381, 0x1574, 0xc215, 0x10c2, 0x3f10, 0x6b3f, 0xe76b, 0x7be7,
+       0xbc7b, 0xf7bb, 0x41f7, 0xcc41, 0x38cc, 0x4239, 0xa942, 0x4a9,  0xc504,
+       0x7cc4, 0x437c, 0x6743, 0xea67, 0x8dea, 0xe88d, 0xd8e8, 0xdcd8, 0x17dd,
+       0x5718, 0x958,  0xa609, 0x41a5, 0x5842, 0x159,  0x9f01, 0x269f, 0x5a26,
+       0x405a, 0xc340, 0xb4c3, 0xd4b4, 0xf4d3, 0xf1f4, 0x39f2, 0xe439, 0x67e4,
+       0x4168, 0xa441, 0xdda3, 0xdedd, 0x9df,  0xab0a, 0xa5ab, 0x9a6,  0xba09,
+       0x9ab9, 0xad9a, 0x5ae,  0xe205, 0xece2, 0xecec, 0x14ed, 0xd614, 0x6bd5,
+       0x916c, 0x3391, 0x6f33, 0x206f, 0x8020, 0x780,  0x7207, 0x2472, 0x8a23,
+       0xb689, 0x3ab6, 0xf739, 0x97f6, 0xb097, 0xa4b0, 0xe6a4, 0x88e6, 0x2789,
+       0xb28,  0x350b, 0x1f35, 0x431e, 0x1043, 0xc30f, 0x79c3, 0x379,  0x5703,
+       0x3256, 0x4732, 0x7247, 0x9d72, 0x489d, 0xd348, 0xa4d3, 0x7ca4, 0xbf7b,
+       0x45c0, 0x7b45, 0x337b, 0x4034, 0x843f, 0xd083, 0x35d0, 0x6335, 0x4d63,
+       0xe14c, 0xcce0, 0xfecc, 0x35ff, 0x5636, 0xf856, 0xeef8, 0x2def, 0xfc2d,
+       0x4fc,  0x6e04, 0xb66d, 0x78b6, 0xbb78, 0x3dbb, 0x9a3d, 0x839a, 0x9283,
+       0x593,  0xd504, 0x23d5, 0x5424, 0xd054, 0x61d0, 0xdb61, 0x17db, 0x1f18,
+       0x381f, 0x9e37, 0x679e, 0x1d68, 0x381d, 0x8038, 0x917f, 0x491,  0xbb04,
+       0x23bb, 0x4124, 0xd41,  0xa30c, 0x8ba3, 0x8b8b, 0xc68b, 0xd2c6, 0xebd2,
+       0x93eb, 0xbd93, 0x99bd, 0x1a99, 0xea19, 0x58ea, 0xcf58, 0x73cf, 0x1073,
+       0x9e10, 0x139e, 0xea13, 0xcde9, 0x3ecd, 0x883f, 0xf89,  0x180f, 0x2a18,
+       0x212a, 0xce20, 0x73ce, 0xf373, 0x60f3, 0xad60, 0x4093, 0x8e40, 0xb98e,
+       0xbfb9, 0xf1bf, 0x8bf1, 0x5e8c, 0xe95e, 0x14e9, 0x4e14, 0x1c4e, 0x7f1c,
+       0xe77e, 0x6fe7, 0xf26f, 0x13f2, 0x8b13, 0xda8a, 0x5fda, 0xea5f, 0x4eea,
+       0xa84f, 0x88a8, 0x1f88, 0x2820, 0x9728, 0x5a97, 0x3f5b, 0xb23f, 0x70b2,
+       0x2c70, 0x232d, 0xf623, 0x4f6,  0x905,  0x7509, 0xd675, 0x28d7, 0x9428,
+       0x3794, 0xf036, 0x2bf0, 0xba2c, 0xedb9, 0xd7ed, 0x59d8, 0xed59, 0x4ed,
+       0xe304, 0x18e3, 0x5c19, 0x3d5c, 0x753d, 0x6d75, 0x956d, 0x7f95, 0xc47f,
+       0x83c4, 0xa84,  0x2e0a, 0x5f2e, 0xb95f, 0x77b9, 0x6d78, 0xf46d, 0x1bf4,
+       0xed1b, 0xd6ed, 0xe0d6, 0x5e1,  0x3905, 0x5638, 0xa355, 0x99a2, 0xbe99,
+       0xb4bd, 0x85b4, 0x2e86, 0x542e, 0x6654, 0xd765, 0x73d7, 0x3a74, 0x383a,
+       0x2638, 0x7826, 0x7677, 0x9a76, 0x7e99, 0x2e7e, 0xea2d, 0xa6ea, 0x8a7,
+       0x109,  0x3300, 0xad32, 0x5fad, 0x465f, 0x2f46, 0xc62f, 0xd4c5, 0xad5,
+       0xcb0a, 0x4cb,  0xb004, 0x7baf, 0xe47b, 0x92e4, 0x8e92, 0x638e, 0x1763,
+       0xc17,  0xf20b, 0x1ff2, 0x8920, 0x5889, 0xcb58, 0xf8cb, 0xcaf8, 0x84cb,
+       0x9f84, 0x8a9f, 0x918a, 0x4991, 0x8249, 0xff81, 0x46ff, 0x5046, 0x5f50,
+       0x725f, 0xf772, 0x8ef7, 0xe08f, 0xc1e0, 0x1fc2, 0x9e1f, 0x8b9d, 0x108b,
+       0x411,  0x2b04, 0xb02a, 0x1fb0, 0x1020, 0x7a0f, 0x587a, 0x8958, 0xb188,
+       0xb1b1, 0x49b2, 0xb949, 0x7ab9, 0x917a, 0xfc91, 0xe6fc, 0x47e7, 0xbc47,
+       0x8fbb, 0xea8e, 0x34ea, 0x2635, 0x1726, 0x9616, 0xc196, 0xa6c1, 0xf3a6,
+       0x11f3, 0x4811, 0x3e48, 0xeb3e, 0xf7ea, 0x1bf8, 0xdb1c, 0x8adb, 0xe18a,
+       0x42e1, 0x9d42, 0x5d9c, 0x6e5d, 0x286e, 0x4928, 0x9a49, 0xb09c, 0xa6b0,
+       0x2a7,  0xe702, 0xf5e6, 0x9af5, 0xf9b,  0x810f, 0x8080, 0x180,  0x1702,
+       0x5117, 0xa650, 0x11a6, 0x1011, 0x550f, 0xd554, 0xbdd5, 0x6bbe, 0xc66b,
+       0xfc7,  0x5510, 0x5555, 0x7655, 0x177,  0x2b02, 0x6f2a, 0xb70,  0x9f0b,
+       0xcf9e, 0xf3cf, 0x3ff4, 0xcb40, 0x8ecb, 0x768e, 0x5277, 0x8652, 0x9186,
+       0x9991, 0x5099, 0xd350, 0x93d3, 0x6d94, 0xe6d,  0x530e, 0x3153, 0xa531,
+       0x64a5, 0x7964, 0x7c79, 0x467c, 0x1746, 0x3017, 0x3730, 0x538,  0x5,
+       0x1e00, 0x5b1e, 0x955a, 0xae95, 0x3eaf, 0xff3e, 0xf8ff, 0xb2f9, 0xa1b3,
+       0xb2a1, 0x5b2,  0xad05, 0x7cac, 0x2d7c, 0xd32c, 0x80d2, 0x7280, 0x8d72,
+       0x1b8e, 0x831b, 0xac82, 0xfdac, 0xa7fd, 0x15a8, 0xd614, 0xe0d5, 0x7be0,
+       0xb37b, 0x61b3, 0x9661, 0x9d95, 0xc79d, 0x83c7, 0xd883, 0xead7, 0xceb,
+       0xf60c, 0xa9f5, 0x19a9, 0xa019, 0x8f9f, 0xd48f, 0x3ad5, 0x853a, 0x985,
+       0x5309, 0x6f52, 0x1370, 0x6e13, 0xa96d, 0x98a9, 0x5198, 0x9f51, 0xb69f,
+       0xa1b6, 0x2ea1, 0x672e, 0x2067, 0x6520, 0xaf65, 0x6eaf, 0x7e6f, 0xee7e,
+       0x17ef, 0xa917, 0xcea8, 0x9ace, 0xff99, 0x5dff, 0xdf5d, 0x38df, 0xa39,
+       0x1c0b, 0xe01b, 0x46e0, 0xcb46, 0x90cb, 0xba90, 0x4bb,  0x9104, 0x9d90,
+       0xc89c, 0xf6c8, 0x6cf6, 0x886c, 0x1789, 0xbd17, 0x70bc, 0x7e71, 0x17e,
+       0x1f01, 0xa01f, 0xbaa0, 0x14bb, 0xfc14, 0x7afb, 0xa07a, 0x3da0, 0xbf3d,
+       0x48bf, 0x8c48, 0x968b, 0x9d96, 0xfd9d, 0x96fd, 0x9796, 0x6b97, 0xd16b,
+       0xf4d1, 0x3bf4, 0x253c, 0x9125, 0x6691, 0xc166, 0x34c1, 0x5735, 0x1a57,
+       0xdc19, 0x77db, 0x8577, 0x4a85, 0x824a, 0x9182, 0x7f91, 0xfd7f, 0xb4c3,
+       0xb5b4, 0xb3b5, 0x7eb3, 0x617e, 0x4e61, 0xa4f,  0x530a, 0x3f52, 0xa33e,
+       0x34a3, 0x9234, 0xf091, 0xf4f0, 0x1bf5, 0x311b, 0x9631, 0x6a96, 0x386b,
+       0x1d39, 0xe91d, 0xe8e9, 0x69e8, 0x426a, 0xee42, 0x89ee, 0x368a, 0x2837,
+       0x7428, 0x5974, 0x6159, 0x1d62, 0x7b1d, 0xf77a, 0x7bf7, 0x6b7c, 0x696c,
+       0xf969, 0x4cf9, 0x714c, 0x4e71, 0x6b4e, 0x256c, 0x6e25, 0xe96d, 0x94e9,
+       0x8f94, 0x3e8f, 0x343e, 0x4634, 0xb646, 0x97b5, 0x8997, 0xe8a,  0x900e,
+       0x8090, 0xfd80, 0xa0fd, 0x16a1, 0xf416, 0xebf4, 0x95ec, 0x1196, 0x8911,
+       0x3d89, 0xda3c, 0x9fd9, 0xd79f, 0x4bd7, 0x214c, 0x3021, 0x4f30, 0x994e,
+       0x5c99, 0x6f5d, 0x326f, 0xab31, 0x6aab, 0xe969, 0x90e9, 0x1190, 0xff10,
+       0xa2fe, 0xe0a2, 0x66e1, 0x4067, 0x9e3f, 0x2d9e, 0x712d, 0x8170, 0xd180,
+       0xffd1, 0x25ff, 0x3826, 0x2538, 0x5f24, 0xc45e, 0x1cc4, 0xdf1c, 0x93df,
+       0xc793, 0x80c7, 0x2380, 0xd223, 0x7ed2, 0xfc7e, 0x22fd, 0x7422, 0x1474,
+       0xb714, 0x7db6, 0x857d, 0xa85,  0xa60a, 0x88a6, 0x4289, 0x7842, 0xc278,
+       0xf7c2, 0xcdf7, 0x84cd, 0xae84, 0x8cae, 0xb98c, 0x1aba, 0x4d1a, 0x884c,
+       0x4688, 0xcc46, 0xd8cb, 0x2bd9, 0xbe2b, 0xa2be, 0x72a2, 0xf772, 0xd2f6,
+       0x75d2, 0xc075, 0xa3c0, 0x63a3, 0xae63, 0x8fae, 0x2a90, 0x5f2a, 0xef5f,
+       0x5cef, 0xa05c, 0x89a0, 0x5e89, 0x6b5e, 0x736b, 0x773,  0x9d07, 0xe99c,
+       0x27ea, 0x2028, 0xc20,  0x980b, 0x4797, 0x2848, 0x9828, 0xc197, 0x48c2,
+       0x2449, 0x7024, 0x570,  0x3e05, 0xd3e,  0xf60c, 0xbbf5, 0x69bb, 0x3f6a,
+       0x740,  0xf006, 0xe0ef, 0xbbe0, 0xadbb, 0x56ad, 0xcf56, 0xbfce, 0xa9bf,
+       0x205b, 0x6920, 0xae69, 0x50ae, 0x2050, 0xf01f, 0x27f0, 0x9427, 0x8993,
+       0x8689, 0x4087, 0x6e40, 0xb16e, 0xa1b1, 0xe8a1, 0x87e8, 0x6f88, 0xfe6f,
+       0x4cfe, 0xe94d, 0xd5e9, 0x47d6, 0x3148, 0x5f31, 0xc35f, 0x13c4, 0xa413,
+       0x5a5,  0x2405, 0xc223, 0x66c2, 0x3667, 0x5e37, 0x5f5e, 0x2f5f, 0x8c2f,
+       0xe48c, 0xd0e4, 0x4d1,  0xd104, 0xe4d0, 0xcee4, 0xfcf,  0x480f, 0xa447,
+       0x5ea4, 0xff5e, 0xbefe, 0x8dbe, 0x1d8e, 0x411d, 0x1841, 0x6918, 0x5469,
+       0x1155, 0xc611, 0xaac6, 0x37ab, 0x2f37, 0xca2e, 0x87ca, 0xbd87, 0xabbd,
+       0xb3ab, 0xcb4,  0xce0c, 0xfccd, 0xa5fd, 0x72a5, 0xf072, 0x83f0, 0xfe83,
+       0x97fd, 0xc997, 0xb0c9, 0xadb0, 0xe6ac, 0x88e6, 0x1088, 0xbe10, 0x16be,
+       0xa916, 0xa3a8, 0x46a3, 0x5447, 0xe953, 0x84e8, 0x2085, 0xa11f, 0xfa1,
+       0xdd0f, 0xbedc, 0x5abe, 0x805a, 0xc97f, 0x6dc9, 0x826d, 0x4a82, 0x934a,
+       0x5293, 0xd852, 0xd3d8, 0xadd3, 0xf4ad, 0xf3f4, 0xfcf3, 0xfefc, 0xcafe,
+       0xb7ca, 0x3cb8, 0xa13c, 0x18a1, 0x1418, 0xea13, 0x91ea, 0xf891, 0x53f8,
+       0xa254, 0xe9a2, 0x87ea, 0x4188, 0x1c41, 0xdc1b, 0xf5db, 0xcaf5, 0x45ca,
+       0x6d45, 0x396d, 0xde39, 0x90dd, 0x1e91, 0x1e,   0x7b00, 0x6a7b, 0xa46a,
+       0xc9a3, 0x9bc9, 0x389b, 0x1139, 0x5211, 0x1f52, 0xeb1f, 0xabeb, 0x48ab,
+       0x9348, 0xb392, 0x17b3, 0x1618, 0x5b16, 0x175b, 0xdc17, 0xdedb, 0x1cdf,
+       0xeb1c, 0xd1ea, 0x4ad2, 0xd4b,  0xc20c, 0x24c2, 0x7b25, 0x137b, 0x8b13,
+       0x618b, 0xa061, 0xff9f, 0xfffe, 0x72ff, 0xf572, 0xe2f5, 0xcfe2, 0xd2cf,
+       0x75d3, 0x6a76, 0xc469, 0x1ec4, 0xfc1d, 0x59fb, 0x455a, 0x7a45, 0xa479,
+       0xb7a4
+};
+
 static u8 tmp_buf[TEST_BUFLEN];
 
 #define full_csum(buff, len, sum) csum_fold(csum_partial(buff, len, sum))
@@ -338,10 +575,57 @@ static void test_csum_no_carry_inputs(struct kunit *test)
        }
 }
 
+static void test_ip_fast_csum(struct kunit *test)
+{
+       __sum16 csum_result, expected;
+
+       for (int len = IPv4_MIN_WORDS; len < IPv4_MAX_WORDS; len++) {
+               for (int index = 0; index < NUM_IP_FAST_CSUM_TESTS; index++) {
+                       csum_result = ip_fast_csum(random_buf + index, len);
+                       expected =
+                               expected_fast_csum[(len - IPv4_MIN_WORDS) *
+                                                  NUM_IP_FAST_CSUM_TESTS +
+                                                  index];
+                       CHECK_EQ(expected, csum_result);
+               }
+       }
+}
+
+static void test_csum_ipv6_magic(struct kunit *test)
+{
+#if defined(CONFIG_NET)
+       const struct in6_addr *saddr;
+       const struct in6_addr *daddr;
+       unsigned int len;
+       unsigned char proto;
+       unsigned int csum;
+
+       const int daddr_offset = sizeof(struct in6_addr);
+       const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr);
+       const int proto_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) +
+                            sizeof(int);
+       const int csum_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) +
+                           sizeof(int) + sizeof(char);
+
+       for (int i = 0; i < NUM_IPv6_TESTS; i++) {
+               saddr = (const struct in6_addr *)(random_buf + i);
+               daddr = (const struct in6_addr *)(random_buf + i +
+                                                 daddr_offset);
+               len = *(unsigned int *)(random_buf + i + len_offset);
+               proto = *(random_buf + i + proto_offset);
+               csum = *(unsigned int *)(random_buf + i + csum_offset);
+               CHECK_EQ(expected_csum_ipv6_magic[i],
+                        csum_ipv6_magic(saddr, daddr, len, proto, csum));
+       }
+#endif /* !CONFIG_NET */
+}
+
 static struct kunit_case __refdata checksum_test_cases[] = {
        KUNIT_CASE(test_csum_fixed_random_inputs),
        KUNIT_CASE(test_csum_all_carry_inputs),
        KUNIT_CASE(test_csum_no_carry_inputs),
+       KUNIT_CASE(test_ip_fast_csum),
+       KUNIT_CASE(test_csum_ipv6_magic),
        {}
 };
 
index e2a6a69..8122039 100644 (file)
@@ -24,6 +24,41 @@ extern void my_tramp2(void *);
 
 static unsigned long my_ip = (unsigned long)schedule;
 
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+"      .pushsection    .text, \"ax\", @progbits\n"
+"      .type           my_tramp1, @function\n"
+"      .globl          my_tramp1\n"
+"   my_tramp1:\n"
+"      addi    sp,sp,-2*"SZREG"\n"
+"      "REG_S" t0,0*"SZREG"(sp)\n"
+"      "REG_S" ra,1*"SZREG"(sp)\n"
+"      call    my_direct_func1\n"
+"      "REG_L" t0,0*"SZREG"(sp)\n"
+"      "REG_L" ra,1*"SZREG"(sp)\n"
+"      addi    sp,sp,2*"SZREG"\n"
+"      jr      t0\n"
+"      .size           my_tramp1, .-my_tramp1\n"
+"      .type           my_tramp2, @function\n"
+"      .globl          my_tramp2\n"
+
+"   my_tramp2:\n"
+"      addi    sp,sp,-2*"SZREG"\n"
+"      "REG_S" t0,0*"SZREG"(sp)\n"
+"      "REG_S" ra,1*"SZREG"(sp)\n"
+"      call    my_direct_func2\n"
+"      "REG_L" t0,0*"SZREG"(sp)\n"
+"      "REG_L" ra,1*"SZREG"(sp)\n"
+"      addi    sp,sp,2*"SZREG"\n"
+"      jr      t0\n"
+"      .size           my_tramp2, .-my_tramp2\n"
+"      .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
index 2e34983..f943e40 100644 (file)
@@ -22,6 +22,47 @@ void my_direct_func2(unsigned long ip)
 extern void my_tramp1(void *);
 extern void my_tramp2(void *);
 
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+"      .pushsection    .text, \"ax\", @progbits\n"
+"      .type           my_tramp1, @function\n"
+"      .globl          my_tramp1\n"
+"   my_tramp1:\n"
+"       addi   sp,sp,-3*"SZREG"\n"
+"       "REG_S"        a0,0*"SZREG"(sp)\n"
+"       "REG_S"        t0,1*"SZREG"(sp)\n"
+"       "REG_S"        ra,2*"SZREG"(sp)\n"
+"       mv     a0,t0\n"
+"       call   my_direct_func1\n"
+"       "REG_L"        a0,0*"SZREG"(sp)\n"
+"       "REG_L"        t0,1*"SZREG"(sp)\n"
+"       "REG_L"        ra,2*"SZREG"(sp)\n"
+"       addi   sp,sp,3*"SZREG"\n"
+"      jr      t0\n"
+"      .size           my_tramp1, .-my_tramp1\n"
+
+"      .type           my_tramp2, @function\n"
+"      .globl          my_tramp2\n"
+"   my_tramp2:\n"
+"       addi   sp,sp,-3*"SZREG"\n"
+"       "REG_S"        a0,0*"SZREG"(sp)\n"
+"       "REG_S"        t0,1*"SZREG"(sp)\n"
+"       "REG_S"        ra,2*"SZREG"(sp)\n"
+"       mv     a0,t0\n"
+"       call   my_direct_func2\n"
+"       "REG_L"        a0,0*"SZREG"(sp)\n"
+"       "REG_L"        t0,1*"SZREG"(sp)\n"
+"       "REG_L"        ra,2*"SZREG"(sp)\n"
+"       addi   sp,sp,3*"SZREG"\n"
+"      jr      t0\n"
+"      .size           my_tramp2, .-my_tramp2\n"
+"      .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
index 9243dbf..aed6df2 100644 (file)
@@ -17,6 +17,31 @@ void my_direct_func(unsigned long ip)
 
 extern void my_tramp(void *);
 
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+"       .pushsection    .text, \"ax\", @progbits\n"
+"       .type           my_tramp, @function\n"
+"       .globl          my_tramp\n"
+"   my_tramp:\n"
+"       addi   sp,sp,-3*"SZREG"\n"
+"       "REG_S"        a0,0*"SZREG"(sp)\n"
+"       "REG_S"        t0,1*"SZREG"(sp)\n"
+"       "REG_S"        ra,2*"SZREG"(sp)\n"
+"       mv     a0,t0\n"
+"       call   my_direct_func\n"
+"       "REG_L"        a0,0*"SZREG"(sp)\n"
+"       "REG_L"        t0,1*"SZREG"(sp)\n"
+"       "REG_L"        ra,2*"SZREG"(sp)\n"
+"       addi   sp,sp,3*"SZREG"\n"
+"       jr     t0\n"
+"       .size           my_tramp, .-my_tramp\n"
+"       .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
index e39c356..6ff546a 100644 (file)
@@ -19,6 +19,34 @@ void my_direct_func(struct vm_area_struct *vma, unsigned long address,
 
 extern void my_tramp(void *);
 
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+"       .pushsection    .text, \"ax\", @progbits\n"
+"       .type           my_tramp, @function\n"
+"       .globl          my_tramp\n"
+"   my_tramp:\n"
+"       addi   sp,sp,-5*"SZREG"\n"
+"       "REG_S"        a0,0*"SZREG"(sp)\n"
+"       "REG_S"        a1,1*"SZREG"(sp)\n"
+"       "REG_S"        a2,2*"SZREG"(sp)\n"
+"       "REG_S"        t0,3*"SZREG"(sp)\n"
+"       "REG_S"        ra,4*"SZREG"(sp)\n"
+"       call   my_direct_func\n"
+"       "REG_L"        a0,0*"SZREG"(sp)\n"
+"       "REG_L"        a1,1*"SZREG"(sp)\n"
+"       "REG_L"        a2,2*"SZREG"(sp)\n"
+"       "REG_L"        t0,3*"SZREG"(sp)\n"
+"       "REG_L"        ra,4*"SZREG"(sp)\n"
+"       addi   sp,sp,5*"SZREG"\n"
+"       jr     t0\n"
+"       .size           my_tramp, .-my_tramp\n"
+"       .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
index 32c477d..ef09456 100644 (file)
@@ -16,6 +16,30 @@ void my_direct_func(struct task_struct *p)
 
 extern void my_tramp(void *);
 
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+"       .pushsection    .text, \"ax\", @progbits\n"
+"       .type           my_tramp, @function\n"
+"       .globl          my_tramp\n"
+"   my_tramp:\n"
+"       addi   sp,sp,-3*"SZREG"\n"
+"       "REG_S"        a0,0*"SZREG"(sp)\n"
+"       "REG_S"        t0,1*"SZREG"(sp)\n"
+"       "REG_S"        ra,2*"SZREG"(sp)\n"
+"       call   my_direct_func\n"
+"       "REG_L"        a0,0*"SZREG"(sp)\n"
+"       "REG_L"        t0,1*"SZREG"(sp)\n"
+"       "REG_L"        ra,2*"SZREG"(sp)\n"
+"       addi   sp,sp,3*"SZREG"\n"
+"       jr     t0\n"
+"       .size           my_tramp, .-my_tramp\n"
+"       .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
index 50a2cc8..c537d52 100644 (file)
@@ -36,16 +36,14 @@ static void sigill_handler(int sig, siginfo_t *info, void *context)
        regs[0] += 4;
 }
 
-static void cbo_insn(char *base, int fn)
-{
-       uint32_t insn = MK_CBO(fn);
-
-       asm volatile(
-       "mv     a0, %0\n"
-       "li     a1, %1\n"
-       ".4byte %2\n"
-       : : "r" (base), "i" (fn), "i" (insn) : "a0", "a1", "memory");
-}
+#define cbo_insn(base, fn)                                                     \
+({                                                                             \
+       asm volatile(                                                           \
+       "mv     a0, %0\n"                                                       \
+       "li     a1, %1\n"                                                       \
+       ".4byte %2\n"                                                           \
+       : : "r" (base), "i" (fn), "i" (MK_CBO(fn)) : "a0", "a1", "memory");     \
+})
 
 static void cbo_inval(char *base) { cbo_insn(base, 0); }
 static void cbo_clean(char *base) { cbo_insn(base, 1); }
@@ -97,7 +95,7 @@ static void test_zicboz(void *arg)
        block_size = pair.value;
        ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE &&
                         is_power_of_2(block_size), "Zicboz block size\n");
-       ksft_print_msg("Zicboz block size: %ld\n", block_size);
+       ksft_print_msg("Zicboz block size: %llu\n", block_size);
 
        illegal_insn = false;
        cbo_zero(&mem[block_size]);
@@ -121,7 +119,7 @@ static void test_zicboz(void *arg)
                for (j = 0; j < block_size; ++j) {
                        if (mem[i * block_size + j] != expected) {
                                ksft_test_result_fail("cbo.zero check\n");
-                               ksft_print_msg("cbo.zero check: mem[%d] != 0x%x\n",
+                               ksft_print_msg("cbo.zero check: mem[%llu] != 0x%x\n",
                                               i * block_size + j, expected);
                                return;
                        }
@@ -201,7 +199,7 @@ int main(int argc, char **argv)
        pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
        rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, 0);
        if (rc < 0)
-               ksft_exit_fail_msg("hwprobe() failed with %d\n", rc);
+               ksft_exit_fail_msg("hwprobe() failed with %ld\n", rc);
        assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
 
        if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) {
index d53e088..fd73c87 100644 (file)
@@ -29,7 +29,7 @@ int main(int argc, char **argv)
                /* Fail if the kernel claims not to recognize a base key. */
                if ((i < 4) && (pairs[i].key != i))
                        ksft_exit_fail_msg("Failed to recognize base key: key != i, "
-                                          "key=%ld, i=%ld\n", pairs[i].key, i);
+                                          "key=%lld, i=%ld\n", pairs[i].key, i);
 
                if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
                        continue;
@@ -37,7 +37,7 @@ int main(int argc, char **argv)
                if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA)
                        continue;
 
-               ksft_exit_fail_msg("Unexpected pair: (%ld, %ld)\n", pairs[i].key, pairs[i].value);
+               ksft_exit_fail_msg("Unexpected pair: (%lld, %llu)\n", pairs[i].key, pairs[i].value);
        }
 
        out = riscv_hwprobe(pairs, 8, 0, 0, 0);
index 9b8434f..2e0db9c 100644 (file)
@@ -18,6 +18,8 @@ struct addresses {
        int *on_56_addr;
 };
 
+// Only works on 64 bit
+#if __riscv_xlen == 64
 static inline void do_mmaps(struct addresses *mmap_addresses)
 {
        /*
@@ -50,6 +52,7 @@ static inline void do_mmaps(struct addresses *mmap_addresses)
        mmap_addresses->on_56_addr =
                mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0);
 }
+#endif /* __riscv_xlen == 64 */
 
 static inline int memory_layout(void)
 {
index 66764ed..1dd9419 100644 (file)
@@ -27,7 +27,7 @@ int main(void)
 
        datap = malloc(MAX_VSIZE);
        if (!datap) {
-               ksft_test_result_fail("fail to allocate memory for size = %lu\n", MAX_VSIZE);
+               ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE);
                exit(-1);
        }
 
index 2c0d2b1..1f9969b 100644 (file)
@@ -1,4 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/wait.h>
+
 #define THIS_PROGRAM "./vstate_exec_nolibc"
 
 int main(int argc, char **argv)
index 8dcd399..27668fb 100644 (file)
@@ -60,7 +60,7 @@ int test_and_compare_child(long provided, long expected, int inherit)
        }
        rc = launch_test(inherit);
        if (rc != expected) {
-               ksft_test_result_fail("Test failed, check %d != %d\n", rc,
+               ksft_test_result_fail("Test failed, check %d != %ld\n", rc,
                                      expected);
                return -2;
        }
@@ -79,7 +79,7 @@ int main(void)
        pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
        rc = riscv_hwprobe(&pair, 1, 0, NULL, 0);
        if (rc < 0) {
-               ksft_test_result_fail("hwprobe() failed with %d\n", rc);
+               ksft_test_result_fail("hwprobe() failed with %ld\n", rc);
                return -1;
        }