Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Dec 2020 00:24:30 +0000 (16:24 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Dec 2020 00:24:30 +0000 (16:24 -0800)
Pull arm64 updates from Catalin Marinas:

 - Expose tag address bits in siginfo. The original arm64 ABI did not
   expose any of the bits 63:56 of a tagged address in siginfo. In the
   presence of user ASAN or MTE, this information may be useful. The
   implementation is generic to other architectures supporting tags
   (like SPARC ADI, subject to wiring up the arch code). The user will
   have to opt in via sigaction(SA_EXPOSE_TAGBITS) so that the extra
   bits, if available, become visible in si_addr.

 - Default to 32-bit wide ZONE_DMA. Previously, ZONE_DMA was set to the
   lowest 1GB to cope with the Raspberry Pi 4 limitations, to the
   detriment of other platforms. With these changes, the kernel scans
   the Device Tree dma-ranges and the ACPI IORT information before
   deciding on a smaller ZONE_DMA.

 - Strengthen READ_ONCE() to acquire when CONFIG_LTO=y. When building
   with LTO, there is an increased risk of the compiler converting an
   address dependency headed by a READ_ONCE() invocation into a control
   dependency and consequently allowing for harmful reordering by the
   CPU.

 - Add CPPC FFH support using arm64 AMU counters.

 - set_fs() removal on arm64. This renders the User Access Override
   (UAO) ARMv8 feature unnecessary.

 - Perf updates: PMU driver for the ARM DMC-620 memory controller, sysfs
   identifier file for SMMUv3, stop event counters support for i.MX8MP,
   enable the perf events-based hard lockup detector.

 - Reorganise the kernel VA space slightly so that 52-bit VA
   configurations can use more virtual address space.

 - Improve the robustness of the arm64 memory offline event notifier.

 - Pad the Image header to 64K following the EFI header definition
   updated recently to increase the section alignment to 64K.

 - Support CONFIG_CMDLINE_EXTEND on arm64.

 - Do not use tagged PC in the kernel (TCR_EL1.TBID1==1), freeing up 8
   bits for PtrAuth.

 - Switch to vmapped shadow call stacks.

 - Miscellaneous clean-ups.

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (78 commits)
  perf/imx_ddr: Add system PMU identifier for userspace
  bindings: perf: imx-ddr: add compatible string
  arm64: Fix build failure when HARDLOCKUP_DETECTOR_PERF is enabled
  arm64: mte: fix prctl(PR_GET_TAGGED_ADDR_CTRL) if TCF0=NONE
  arm64: mark __system_matches_cap as __maybe_unused
  arm64: uaccess: remove vestigal UAO support
  arm64: uaccess: remove redundant PAN toggling
  arm64: uaccess: remove addr_limit_user_check()
  arm64: uaccess: remove set_fs()
  arm64: uaccess cleanup macro naming
  arm64: uaccess: split user/kernel routines
  arm64: uaccess: refactor __{get,put}_user
  arm64: uaccess: simplify __copy_user_flushcache()
  arm64: uaccess: rename privileged uaccess routines
  arm64: sdei: explicitly simulate PAN/UAO entry
  arm64: sdei: move uaccess logic to arch/arm64/
  arm64: head.S: always initialize PSTATE
  arm64: head.S: cleanup SCTLR_ELx initialization
  arm64: head.S: rename el2_setup -> init_kernel_el
  arm64: add C wrappers for SET_PSTATE_*()
  ...

114 files changed:
Documentation/arm64/kasan-offsets.sh
Documentation/arm64/memory.rst
Documentation/arm64/tagged-pointers.rst
Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
arch/alpha/include/uapi/asm/signal.h
arch/arm/include/asm/signal.h
arch/arm/include/uapi/asm/signal.h
arch/arm64/Kconfig
arch/arm64/include/asm/alternative-macros.h [new file with mode: 0644]
arch/arm64/include/asm/alternative.h
arch/arm64/include/asm/asm-uaccess.h
arch/arm64/include/asm/cpucaps.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/exception.h
arch/arm64/include/asm/exec.h
arch/arm64/include/asm/futex.h
arch/arm64/include/asm/insn.h
arch/arm64/include/asm/kernel-pgtable.h
arch/arm64/include/asm/kprobes.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/mmu_context.h
arch/arm64/include/asm/pgtable-hwdef.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/ptrace.h
arch/arm64/include/asm/rwonce.h [new file with mode: 0644]
arch/arm64/include/asm/signal.h [new file with mode: 0644]
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/system_misc.h
arch/arm64/include/asm/thread_info.h
arch/arm64/include/asm/topology.h
arch/arm64/include/asm/traps.h
arch/arm64/include/asm/uaccess.h
arch/arm64/kernel/Makefile
arch/arm64/kernel/alternative.c
arch/arm64/kernel/armv8_deprecated.c
arch/arm64/kernel/asm-offsets.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/debug-monitors.c
arch/arm64/kernel/efi-header.S
arch/arm64/kernel/entry-common.c
arch/arm64/kernel/entry.S
arch/arm64/kernel/head.S
arch/arm64/kernel/irq.c
arch/arm64/kernel/kaslr.c
arch/arm64/kernel/mte.c
arch/arm64/kernel/perf_event.c
arch/arm64/kernel/probes/kprobes.c
arch/arm64/kernel/process.c
arch/arm64/kernel/proton-pack.c
arch/arm64/kernel/ptrace.c
arch/arm64/kernel/scs.c [deleted file]
arch/arm64/kernel/sdei.c
arch/arm64/kernel/setup.c
arch/arm64/kernel/signal.c
arch/arm64/kernel/sleep.S
arch/arm64/kernel/smp.c
arch/arm64/kernel/suspend.c
arch/arm64/kernel/sys_compat.c
arch/arm64/kernel/syscall.c
arch/arm64/kernel/topology.c
arch/arm64/kernel/traps.c
arch/arm64/kernel/vdso/Makefile
arch/arm64/kernel/vdso32/Makefile
arch/arm64/kernel/vmlinux.lds.S
arch/arm64/lib/clear_user.S
arch/arm64/lib/copy_from_user.S
arch/arm64/lib/copy_in_user.S
arch/arm64/lib/copy_to_user.S
arch/arm64/lib/mte.S
arch/arm64/lib/uaccess_flushcache.c
arch/arm64/mm/fault.c
arch/arm64/mm/init.c
arch/arm64/mm/mmu.c
arch/arm64/mm/proc.S
arch/h8300/include/uapi/asm/signal.h
arch/ia64/include/uapi/asm/signal.h
arch/m68k/include/uapi/asm/signal.h
arch/mips/include/uapi/asm/signal.h
arch/parisc/include/asm/signal.h
arch/parisc/include/uapi/asm/signal.h
arch/powerpc/include/uapi/asm/signal.h
arch/s390/include/uapi/asm/signal.h
arch/sparc/include/uapi/asm/signal.h
arch/x86/include/uapi/asm/signal.h
arch/x86/kernel/signal_compat.c
arch/xtensa/include/uapi/asm/signal.h
drivers/acpi/arm64/iort.c
drivers/firmware/arm_sdei.c
drivers/of/address.c
drivers/of/unittest.c
drivers/perf/Kconfig
drivers/perf/Makefile
drivers/perf/arm_dmc620_pmu.c [new file with mode: 0644]
drivers/perf/arm_dsu_pmu.c
drivers/perf/arm_pmu.c
drivers/perf/arm_smmuv3_pmu.c
drivers/perf/fsl_imx8_ddr_perf.c
drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
drivers/perf/hisilicon/hisi_uncore_pmu.c
drivers/perf/hisilicon/hisi_uncore_pmu.h
include/linux/acpi_iort.h
include/linux/mmzone.h
include/linux/of.h
include/linux/perf/arm_pmu.h
include/linux/scs.h
include/linux/signal.h
include/linux/signal_types.h
include/uapi/asm-generic/signal-defs.h
include/uapi/asm-generic/signal.h
kernel/scs.c
kernel/signal.c

index 2b7a021..2dc5f9e 100644 (file)
@@ -1,12 +1,11 @@
 #!/bin/sh
 
 # Print out the KASAN_SHADOW_OFFSETS required to place the KASAN SHADOW
-# start address at the mid-point of the kernel VA space
+# start address at the top of the linear region
 
 print_kasan_offset () {
        printf "%02d\t" $1
        printf "0x%08x00000000\n" $(( (0xffffffff & (-1 << ($1 - 1 - 32))) \
-                       + (1 << ($1 - 32 - $2)) \
                        - (1 << (64 - 32 - $2)) ))
 }
 
index cf03b32..e7522e5 100644 (file)
@@ -32,17 +32,16 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
   -----------------------------------------------------------------------
   0000000000000000     0000ffffffffffff         256TB          user
   ffff000000000000     ffff7fffffffffff         128TB          kernel logical memory map
-  ffff800000000000     ffff9fffffffffff          32TB          kasan shadow region
-  ffffa00000000000     ffffa00007ffffff         128MB          bpf jit region
-  ffffa00008000000     ffffa0000fffffff         128MB          modules
-  ffffa00010000000     fffffdffbffeffff         ~93TB          vmalloc
-  fffffdffbfff0000     fffffdfffe5f8fff        ~998MB          [guard region]
-  fffffdfffe5f9000     fffffdfffe9fffff        4124KB          fixed mappings
-  fffffdfffea00000     fffffdfffebfffff           2MB          [guard region]
-  fffffdfffec00000     fffffdffffbfffff          16MB          PCI I/O space
-  fffffdffffc00000     fffffdffffdfffff           2MB          [guard region]
-  fffffdffffe00000     ffffffffffdfffff           2TB          vmemmap
-  ffffffffffe00000     ffffffffffffffff           2MB          [guard region]
+ [ffff600000000000     ffff7fffffffffff]         32TB          [kasan shadow region]
+  ffff800000000000     ffff800007ffffff         128MB          bpf jit region
+  ffff800008000000     ffff80000fffffff         128MB          modules
+  ffff800010000000     fffffbffefffffff         124TB          vmalloc
+  fffffbfff0000000     fffffbfffdffffff         224MB          fixed mappings (top down)
+  fffffbfffe000000     fffffbfffe7fffff           8MB          [guard region]
+  fffffbfffe800000     fffffbffff7fffff          16MB          PCI I/O space
+  fffffbffff800000     fffffbffffffffff           8MB          [guard region]
+  fffffc0000000000     fffffdffffffffff           2TB          vmemmap
+  fffffe0000000000     ffffffffffffffff           2TB          [guard region]
 
 
 AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support)::
@@ -50,19 +49,17 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
   Start                        End                     Size            Use
   -----------------------------------------------------------------------
   0000000000000000     000fffffffffffff           4PB          user
-  fff0000000000000     fff7ffffffffffff           2PB          kernel logical memory map
-  fff8000000000000     fffd9fffffffffff        1440TB          [gap]
-  fffda00000000000     ffff9fffffffffff         512TB          kasan shadow region
-  ffffa00000000000     ffffa00007ffffff         128MB          bpf jit region
-  ffffa00008000000     ffffa0000fffffff         128MB          modules
-  ffffa00010000000     fffff81ffffeffff         ~88TB          vmalloc
-  fffff81fffff0000     fffffc1ffe58ffff          ~3TB          [guard region]
-  fffffc1ffe590000     fffffc1ffe9fffff        4544KB          fixed mappings
-  fffffc1ffea00000     fffffc1ffebfffff           2MB          [guard region]
-  fffffc1ffec00000     fffffc1fffbfffff          16MB          PCI I/O space
-  fffffc1fffc00000     fffffc1fffdfffff           2MB          [guard region]
-  fffffc1fffe00000     ffffffffffdfffff        3968GB          vmemmap
-  ffffffffffe00000     ffffffffffffffff           2MB          [guard region]
+  fff0000000000000     ffff7fffffffffff          ~4PB          kernel logical memory map
+ [fffd800000000000     ffff7fffffffffff]        512TB          [kasan shadow region]
+  ffff800000000000     ffff800007ffffff         128MB          bpf jit region
+  ffff800008000000     ffff80000fffffff         128MB          modules
+  ffff800010000000     fffffbffefffffff         124TB          vmalloc
+  fffffbfff0000000     fffffbfffdffffff         224MB          fixed mappings (top down)
+  fffffbfffe000000     fffffbfffe7fffff           8MB          [guard region]
+  fffffbfffe800000     fffffbffff7fffff          16MB          PCI I/O space
+  fffffbffff800000     fffffbffffffffff           8MB          [guard region]
+  fffffc0000000000     ffffffdfffffffff          ~4TB          vmemmap
+  ffffffe000000000     ffffffffffffffff         128GB          [guard region]
 
 
 Translation table lookup with 4KB pages::
index eab4323..19d284b 100644 (file)
@@ -53,12 +53,25 @@ visibility.
 Preserving tags
 ---------------
 
-Non-zero tags are not preserved when delivering signals. This means that
-signal handlers in applications making use of tags cannot rely on the
-tag information for user virtual addresses being maintained for fields
-inside siginfo_t. One exception to this rule is for signals raised in
-response to watchpoint debug exceptions, where the tag information will
-be preserved.
+When delivering signals, non-zero tags are not preserved in
+siginfo.si_addr unless the flag SA_EXPOSE_TAGBITS was set in
+sigaction.sa_flags when the signal handler was installed. This means
+that signal handlers in applications making use of tags cannot rely
+on the tag information for user virtual addresses being maintained
+in these fields unless the flag was set.
+
+Due to architecture limitations, bits 63:60 of the fault address
+are not preserved in response to synchronous tag check faults
+(SEGV_MTESERR) even if SA_EXPOSE_TAGBITS was set. Applications should
+treat the values of these bits as undefined in order to accommodate
+future architecture revisions which may preserve the bits.
+
+For signals raised in response to watchpoint debug exceptions, the
+tag information will be preserved regardless of the SA_EXPOSE_TAGBITS
+flag setting.
+
+Non-zero tags are never preserved in sigcontext.fault_address
+regardless of the SA_EXPOSE_TAGBITS flag setting.
 
 The architecture prevents the use of a tagged PC, so the upper byte will
 be set to a sign-extension of bit 55 on exception return.
index 5aad9f4..80a9238 100644 (file)
@@ -15,6 +15,9 @@ properties:
       - enum:
           - fsl,imx8-ddr-pmu
           - fsl,imx8m-ddr-pmu
+          - fsl,imx8mq-ddr-pmu
+          - fsl,imx8mm-ddr-pmu
+          - fsl,imx8mn-ddr-pmu
           - fsl,imx8mp-ddr-pmu
       - items:
           - enum:
index 74c750b..a69dd8d 100644 (file)
@@ -60,20 +60,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-
 #define SA_ONSTACK     0x00000001
 #define SA_RESTART     0x00000002
 #define SA_NOCLDSTOP   0x00000004
index 65530a0..430be77 100644 (file)
@@ -17,6 +17,8 @@ typedef struct {
        unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
+#define __ARCH_UAPI_SA_FLAGS   (SA_THIRTYTWO | SA_RESTORER)
+
 #define __ARCH_HAS_SA_RESTORER
 
 #include <asm/sigcontext.h>
index 9b4185b..c9a3ea1 100644 (file)
@@ -60,33 +60,12 @@ typedef unsigned long sigset_t;
 #define SIGSWI         32
 
 /*
- * SA_FLAGS values:
- *
- * SA_NOCLDSTOP                flag to turn off SIGCHLD when children stop.
- * SA_NOCLDWAIT                flag on SIGCHLD to inhibit zombies.
- * SA_SIGINFO          deliver the signal with SIGINFO structs
- * SA_THIRTYTWO                delivers the signal in 32-bit mode, even if the task 
- *                     is running in 26-bit.
- * SA_ONSTACK          allows alternate signal stacks (see sigaltstack(2)).
- * SA_RESTART          flag to get restarting signals (which were the default long ago)
- * SA_NODEFER          prevents the current signal from being masked in the handler.
- * SA_RESETHAND                clears the handler when the signal is delivered.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
+ * SA_THIRTYTWO historically meant deliver the signal in 32-bit mode, even if
+ * the task is running in 26-bit. But since the kernel no longer supports
+ * 26-bit mode, the flag has no effect.
  */
-#define SA_NOCLDSTOP   0x00000001
-#define SA_NOCLDWAIT   0x00000002
-#define SA_SIGINFO     0x00000004
 #define SA_THIRTYTWO   0x02000000
 #define SA_RESTORER    0x04000000
-#define SA_ONSTACK     0x08000000
-#define SA_RESTART     0x10000000
-#define SA_NODEFER     0x40000000
-#define SA_RESETHAND   0x80000000
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
 
 #define MINSIGSTKSZ    2048
 #define SIGSTKSZ       8192
index a6b5b7e..e5bcb49 100644 (file)
@@ -171,6 +171,8 @@ config ARM64
        select HAVE_NMI
        select HAVE_PATA_PLATFORM
        select HAVE_PERF_EVENTS
+       select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI && HW_PERF_EVENTS
+       select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
        select HAVE_PERF_REGS
        select HAVE_PERF_USER_STACK_DUMP
        select HAVE_REGS_AND_STACK_ACCESS_API
@@ -196,7 +198,6 @@ config ARM64
        select PCI_SYSCALL if PCI
        select POWER_RESET
        select POWER_SUPPLY
-       select SET_FS
        select SPARSE_IRQ
        select SWIOTLB
        select SYSCTL_EXCEPTION_TRACE
@@ -332,16 +333,16 @@ config BROKEN_GAS_INST
 config KASAN_SHADOW_OFFSET
        hex
        depends on KASAN
-       default 0xdfffa00000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
-       default 0xdfffd00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
-       default 0xdffffe8000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
-       default 0xdfffffd000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
-       default 0xdffffffa00000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
-       default 0xefff900000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
-       default 0xefffc80000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
-       default 0xeffffe4000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
-       default 0xefffffc800000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
-       default 0xeffffff900000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
+       default 0xdfff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
+       default 0xdfffc00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
+       default 0xdffffe0000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
+       default 0xdfffffc000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
+       default 0xdffffff800000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
+       default 0xefff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
+       default 0xefffc00000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
+       default 0xeffffe0000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
+       default 0xefffffc000000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
+       default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
        default 0xffffffffffffffff
 
 source "arch/arm64/Kconfig.platforms"
@@ -1389,6 +1390,9 @@ config ARM64_PAN
         The feature is detected at runtime, and will remain as a 'nop'
         instruction if the cpu does not implement the feature.
 
+config AS_HAS_LDAPR
+       def_bool $(as-instr,.arch_extension rcpc)
+
 config ARM64_LSE_ATOMICS
        bool
        default ARM64_USE_LSE_ATOMICS
@@ -1426,27 +1430,6 @@ endmenu
 
 menu "ARMv8.2 architectural features"
 
-config ARM64_UAO
-       bool "Enable support for User Access Override (UAO)"
-       default y
-       help
-         User Access Override (UAO; part of the ARMv8.2 Extensions)
-         causes the 'unprivileged' variant of the load/store instructions to
-         be overridden to be privileged.
-
-         This option changes get_user() and friends to use the 'unprivileged'
-         variant of the load/store instructions. This ensures that user-space
-         really did have access to the supplied memory. When addr_limit is
-         set to kernel memory the UAO bit will be set, allowing privileged
-         access to kernel memory.
-
-         Choosing this option will cause copy_to_user() et al to use user-space
-         memory permissions.
-
-         The feature is detected at runtime, the kernel will use the
-         regular load/store instructions if the cpu does not implement the
-         feature.
-
 config ARM64_PMEM
        bool "Enable support for persistent memory"
        select ARCH_HAS_PMEM_API
@@ -1847,15 +1830,36 @@ config CMDLINE
          entering them here. As a minimum, you should specify the the
          root device (e.g. root=/dev/nfs).
 
+choice
+       prompt "Kernel command line type" if CMDLINE != ""
+       default CMDLINE_FROM_BOOTLOADER
+       help
+         Choose how the kernel will handle the provided default kernel
+         command line string.
+
+config CMDLINE_FROM_BOOTLOADER
+       bool "Use bootloader kernel arguments if available"
+       help
+         Uses the command-line options passed by the boot loader. If
+         the boot loader doesn't provide any, the default kernel command
+         string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+       bool "Extend bootloader kernel arguments"
+       help
+         The command-line arguments provided by the boot loader will be
+         appended to the default kernel command string.
+
 config CMDLINE_FORCE
        bool "Always use the default kernel command string"
-       depends on CMDLINE != ""
        help
          Always use the default kernel command string, even if the boot
          loader passes other arguments to the kernel.
          This is useful if you cannot or don't want to change the
          command-line options your boot loader passes to the kernel.
 
+endchoice
+
 config EFI_STUB
        bool
 
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
new file mode 100644 (file)
index 0000000..5df500d
--- /dev/null
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALTERNATIVE_MACROS_H
+#define __ASM_ALTERNATIVE_MACROS_H
+
+#include <asm/cpucaps.h>
+
+#define ARM64_CB_PATCH ARM64_NCAPS
+
+/* A64 instructions are always 32 bits. */
+#define        AARCH64_INSN_SIZE               4
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+
+#define ALTINSTR_ENTRY(feature)                                                      \
+       " .word 661b - .\n"                             /* label           */ \
+       " .word 663f - .\n"                             /* new instruction */ \
+       " .hword " __stringify(feature) "\n"            /* feature bit     */ \
+       " .byte 662b-661b\n"                            /* source len      */ \
+       " .byte 664f-663f\n"                            /* replacement len */
+
+#define ALTINSTR_ENTRY_CB(feature, cb)                                       \
+       " .word 661b - .\n"                             /* label           */ \
+       " .word " __stringify(cb) "- .\n"               /* callback */        \
+       " .hword " __stringify(feature) "\n"            /* feature bit     */ \
+       " .byte 662b-661b\n"                            /* source len      */ \
+       " .byte 664f-663f\n"                            /* replacement len */
+
+/*
+ * alternative assembly primitive:
+ *
+ * If any of these .org directive fail, it means that insn1 and insn2
+ * don't have the same length. This used to be written as
+ *
+ * .if ((664b-663b) != (662b-661b))
+ *     .error "Alternatives instruction length mismatch"
+ * .endif
+ *
+ * but most assemblers die if insn1 or insn2 have a .inst. This should
+ * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
+ * containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
+ */
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled)    \
+       ".if "__stringify(cfg_enabled)" == 1\n"                         \
+       "661:\n\t"                                                      \
+       oldinstr "\n"                                                   \
+       "662:\n"                                                        \
+       ".pushsection .altinstructions,\"a\"\n"                         \
+       ALTINSTR_ENTRY(feature)                                         \
+       ".popsection\n"                                                 \
+       ".subsection 1\n"                                               \
+       "663:\n\t"                                                      \
+       newinstr "\n"                                                   \
+       "664:\n\t"                                                      \
+       ".org   . - (664b-663b) + (662b-661b)\n\t"                      \
+       ".org   . - (662b-661b) + (664b-663b)\n\t"                      \
+       ".previous\n"                                                   \
+       ".endif\n"
+
+#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb)       \
+       ".if "__stringify(cfg_enabled)" == 1\n"                         \
+       "661:\n\t"                                                      \
+       oldinstr "\n"                                                   \
+       "662:\n"                                                        \
+       ".pushsection .altinstructions,\"a\"\n"                         \
+       ALTINSTR_ENTRY_CB(feature, cb)                                  \
+       ".popsection\n"                                                 \
+       "663:\n\t"                                                      \
+       "664:\n\t"                                                      \
+       ".endif\n"
+
+#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...)        \
+       __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+
+#define ALTERNATIVE_CB(oldinstr, cb) \
+       __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
+#else
+
+#include <asm/assembler.h>
+
+.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
+       .word \orig_offset - .
+       .word \alt_offset - .
+       .hword \feature
+       .byte \orig_len
+       .byte \alt_len
+.endm
+
+.macro alternative_insn insn1, insn2, cap, enable = 1
+       .if \enable
+661:   \insn1
+662:   .pushsection .altinstructions, "a"
+       altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
+       .popsection
+       .subsection 1
+663:   \insn2
+664:   .previous
+       .org    . - (664b-663b) + (662b-661b)
+       .org    . - (662b-661b) + (664b-663b)
+       .endif
+.endm
+
+/*
+ * Alternative sequences
+ *
+ * The code for the case where the capability is not present will be
+ * assembled and linked as normal. There are no restrictions on this
+ * code.
+ *
+ * The code for the case where the capability is present will be
+ * assembled into a special section to be used for dynamic patching.
+ * Code for that case must:
+ *
+ * 1. Be exactly the same length (in bytes) as the default code
+ *    sequence.
+ *
+ * 2. Not contain a branch target that is used outside of the
+ *    alternative sequence it is defined in (branches into an
+ *    alternative sequence are not fixed up).
+ */
+
+/*
+ * Begin an alternative code sequence.
+ */
+.macro alternative_if_not cap
+       .set .Lasm_alt_mode, 0
+       .pushsection .altinstructions, "a"
+       altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
+       .popsection
+661:
+.endm
+
+.macro alternative_if cap
+       .set .Lasm_alt_mode, 1
+       .pushsection .altinstructions, "a"
+       altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
+       .popsection
+       .subsection 1
+       .align 2        /* So GAS knows label 661 is suitably aligned */
+661:
+.endm
+
+.macro alternative_cb cb
+       .set .Lasm_alt_mode, 0
+       .pushsection .altinstructions, "a"
+       altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+       .popsection
+661:
+.endm
+
+/*
+ * Provide the other half of the alternative code sequence.
+ */
+.macro alternative_else
+662:
+       .if .Lasm_alt_mode==0
+       .subsection 1
+       .else
+       .previous
+       .endif
+663:
+.endm
+
+/*
+ * Complete an alternative code sequence.
+ */
+.macro alternative_endif
+664:
+       .if .Lasm_alt_mode==0
+       .previous
+       .endif
+       .org    . - (664b-663b) + (662b-661b)
+       .org    . - (662b-661b) + (664b-663b)
+.endm
+
+/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
+/*
+ * Provides a trivial alternative or default sequence consisting solely
+ * of NOPs. The number of NOPs is chosen automatically to match the
+ * previous case.
+ */
+.macro alternative_else_nop_endif
+alternative_else
+       nops    (662b-661b) / AARCH64_INSN_SIZE
+alternative_endif
+.endm
+
+#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...)  \
+       alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
+
+.macro user_alt, label, oldinstr, newinstr, cond
+9999:  alternative_insn "\oldinstr", "\newinstr", \cond
+       _asm_extable 9999b, \label
+.endm
+
+#endif  /*  __ASSEMBLY__  */
+
+/*
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
+ *
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
+ * N.B. If CONFIG_FOO is specified, but not selected, the whole block
+ *      will be omitted, including oldinstr.
+ */
+#define ALTERNATIVE(oldinstr, newinstr, ...)   \
+       _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
+
+#endif /* __ASM_ALTERNATIVE_MACROS_H */
index 619db9b..a38b92e 100644 (file)
@@ -2,17 +2,13 @@
 #ifndef __ASM_ALTERNATIVE_H
 #define __ASM_ALTERNATIVE_H
 
-#include <asm/cpucaps.h>
-#include <asm/insn.h>
-
-#define ARM64_CB_PATCH ARM64_NCAPS
+#include <asm/alternative-macros.h>
 
 #ifndef __ASSEMBLY__
 
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/stddef.h>
-#include <linux/stringify.h>
 
 struct alt_instr {
        s32 orig_offset;        /* offset to original instruction */
@@ -35,264 +31,5 @@ void apply_alternatives_module(void *start, size_t length);
 static inline void apply_alternatives_module(void *start, size_t length) { }
 #endif
 
-#define ALTINSTR_ENTRY(feature)                                                      \
-       " .word 661b - .\n"                             /* label           */ \
-       " .word 663f - .\n"                             /* new instruction */ \
-       " .hword " __stringify(feature) "\n"            /* feature bit     */ \
-       " .byte 662b-661b\n"                            /* source len      */ \
-       " .byte 664f-663f\n"                            /* replacement len */
-
-#define ALTINSTR_ENTRY_CB(feature, cb)                                       \
-       " .word 661b - .\n"                             /* label           */ \
-       " .word " __stringify(cb) "- .\n"               /* callback */        \
-       " .hword " __stringify(feature) "\n"            /* feature bit     */ \
-       " .byte 662b-661b\n"                            /* source len      */ \
-       " .byte 664f-663f\n"                            /* replacement len */
-
-/*
- * alternative assembly primitive:
- *
- * If any of these .org directive fail, it means that insn1 and insn2
- * don't have the same length. This used to be written as
- *
- * .if ((664b-663b) != (662b-661b))
- *     .error "Alternatives instruction length mismatch"
- * .endif
- *
- * but most assemblers die if insn1 or insn2 have a .inst. This should
- * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
- * containing commit 4e4d08cf7399b606 or c1baaddf8861).
- *
- * Alternatives with callbacks do not generate replacement instructions.
- */
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled)    \
-       ".if "__stringify(cfg_enabled)" == 1\n"                         \
-       "661:\n\t"                                                      \
-       oldinstr "\n"                                                   \
-       "662:\n"                                                        \
-       ".pushsection .altinstructions,\"a\"\n"                         \
-       ALTINSTR_ENTRY(feature)                                         \
-       ".popsection\n"                                                 \
-       ".subsection 1\n"                                               \
-       "663:\n\t"                                                      \
-       newinstr "\n"                                                   \
-       "664:\n\t"                                                      \
-       ".org   . - (664b-663b) + (662b-661b)\n\t"                      \
-       ".org   . - (662b-661b) + (664b-663b)\n\t"                      \
-       ".previous\n"                                                   \
-       ".endif\n"
-
-#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb)       \
-       ".if "__stringify(cfg_enabled)" == 1\n"                         \
-       "661:\n\t"                                                      \
-       oldinstr "\n"                                                   \
-       "662:\n"                                                        \
-       ".pushsection .altinstructions,\"a\"\n"                         \
-       ALTINSTR_ENTRY_CB(feature, cb)                                  \
-       ".popsection\n"                                                 \
-       "663:\n\t"                                                      \
-       "664:\n\t"                                                      \
-       ".endif\n"
-
-#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...)        \
-       __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
-
-#define ALTERNATIVE_CB(oldinstr, cb) \
-       __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
-#else
-
-#include <asm/assembler.h>
-
-.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
-       .word \orig_offset - .
-       .word \alt_offset - .
-       .hword \feature
-       .byte \orig_len
-       .byte \alt_len
-.endm
-
-.macro alternative_insn insn1, insn2, cap, enable = 1
-       .if \enable
-661:   \insn1
-662:   .pushsection .altinstructions, "a"
-       altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
-       .popsection
-       .subsection 1
-663:   \insn2
-664:   .previous
-       .org    . - (664b-663b) + (662b-661b)
-       .org    . - (662b-661b) + (664b-663b)
-       .endif
-.endm
-
-/*
- * Alternative sequences
- *
- * The code for the case where the capability is not present will be
- * assembled and linked as normal. There are no restrictions on this
- * code.
- *
- * The code for the case where the capability is present will be
- * assembled into a special section to be used for dynamic patching.
- * Code for that case must:
- *
- * 1. Be exactly the same length (in bytes) as the default code
- *    sequence.
- *
- * 2. Not contain a branch target that is used outside of the
- *    alternative sequence it is defined in (branches into an
- *    alternative sequence are not fixed up).
- */
-
-/*
- * Begin an alternative code sequence.
- */
-.macro alternative_if_not cap
-       .set .Lasm_alt_mode, 0
-       .pushsection .altinstructions, "a"
-       altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
-       .popsection
-661:
-.endm
-
-.macro alternative_if cap
-       .set .Lasm_alt_mode, 1
-       .pushsection .altinstructions, "a"
-       altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
-       .popsection
-       .subsection 1
-       .align 2        /* So GAS knows label 661 is suitably aligned */
-661:
-.endm
-
-.macro alternative_cb cb
-       .set .Lasm_alt_mode, 0
-       .pushsection .altinstructions, "a"
-       altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
-       .popsection
-661:
-.endm
-
-/*
- * Provide the other half of the alternative code sequence.
- */
-.macro alternative_else
-662:
-       .if .Lasm_alt_mode==0
-       .subsection 1
-       .else
-       .previous
-       .endif
-663:
-.endm
-
-/*
- * Complete an alternative code sequence.
- */
-.macro alternative_endif
-664:
-       .if .Lasm_alt_mode==0
-       .previous
-       .endif
-       .org    . - (664b-663b) + (662b-661b)
-       .org    . - (662b-661b) + (664b-663b)
-.endm
-
-/*
- * Callback-based alternative epilogue
- */
-.macro alternative_cb_end
-662:
-.endm
-
-/*
- * Provides a trivial alternative or default sequence consisting solely
- * of NOPs. The number of NOPs is chosen automatically to match the
- * previous case.
- */
-.macro alternative_else_nop_endif
-alternative_else
-       nops    (662b-661b) / AARCH64_INSN_SIZE
-alternative_endif
-.endm
-
-#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...)  \
-       alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
-
-.macro user_alt, label, oldinstr, newinstr, cond
-9999:  alternative_insn "\oldinstr", "\newinstr", \cond
-       _asm_extable 9999b, \label
-.endm
-
-/*
- * Generate the assembly for UAO alternatives with exception table entries.
- * This is complicated as there is no post-increment or pair versions of the
- * unprivileged instructions, and USER() only works for single instructions.
- */
-#ifdef CONFIG_ARM64_UAO
-       .macro uao_ldp l, reg1, reg2, addr, post_inc
-               alternative_if_not ARM64_HAS_UAO
-8888:                  ldp     \reg1, \reg2, [\addr], \post_inc;
-8889:                  nop;
-                       nop;
-               alternative_else
-                       ldtr    \reg1, [\addr];
-                       ldtr    \reg2, [\addr, #8];
-                       add     \addr, \addr, \post_inc;
-               alternative_endif
-
-               _asm_extable    8888b,\l;
-               _asm_extable    8889b,\l;
-       .endm
-
-       .macro uao_stp l, reg1, reg2, addr, post_inc
-               alternative_if_not ARM64_HAS_UAO
-8888:                  stp     \reg1, \reg2, [\addr], \post_inc;
-8889:                  nop;
-                       nop;
-               alternative_else
-                       sttr    \reg1, [\addr];
-                       sttr    \reg2, [\addr, #8];
-                       add     \addr, \addr, \post_inc;
-               alternative_endif
-
-               _asm_extable    8888b,\l;
-               _asm_extable    8889b,\l;
-       .endm
-
-       .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
-               alternative_if_not ARM64_HAS_UAO
-8888:                  \inst   \reg, [\addr], \post_inc;
-                       nop;
-               alternative_else
-                       \alt_inst       \reg, [\addr];
-                       add             \addr, \addr, \post_inc;
-               alternative_endif
-
-               _asm_extable    8888b,\l;
-       .endm
-#else
-       .macro uao_ldp l, reg1, reg2, addr, post_inc
-               USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
-       .endm
-       .macro uao_stp l, reg1, reg2, addr, post_inc
-               USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
-       .endm
-       .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
-               USER(\l, \inst \reg, [\addr], \post_inc)
-       .endm
-#endif
-
-#endif  /*  __ASSEMBLY__  */
-
-/*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
- *
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
- * N.B. If CONFIG_FOO is specified, but not selected, the whole block
- *      will be omitted, including oldinstr.
- */
-#define ALTERNATIVE(oldinstr, newinstr, ...)   \
-       _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
-
+#endif /* __ASSEMBLY__ */
 #endif /* __ASM_ALTERNATIVE_H */
index f68a0e6..9990059 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef __ASM_ASM_UACCESS_H
 #define __ASM_ASM_UACCESS_H
 
-#include <asm/alternative.h>
+#include <asm/alternative-macros.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/mmu.h>
 #include <asm/sysreg.h>
        .macro  __uaccess_ttbr0_disable, tmp1
        mrs     \tmp1, ttbr1_el1                        // swapper_pg_dir
        bic     \tmp1, \tmp1, #TTBR_ASID_MASK
-       sub     \tmp1, \tmp1, #RESERVED_TTBR0_SIZE      // reserved_ttbr0 just before swapper_pg_dir
+       sub     \tmp1, \tmp1, #PAGE_SIZE                // reserved_pg_dir just before swapper_pg_dir
        msr     ttbr0_el1, \tmp1                        // set reserved TTBR0_EL1
        isb
-       add     \tmp1, \tmp1, #RESERVED_TTBR0_SIZE
+       add     \tmp1, \tmp1, #PAGE_SIZE
        msr     ttbr1_el1, \tmp1                // set reserved ASID
        isb
        .endm
@@ -58,4 +58,33 @@ alternative_else_nop_endif
        .endm
 #endif
 
+/*
+ * Generate the assembly for LDTR/STTR with exception table entries.
+ * This is complicated as there is no post-increment or pair versions of the
+ * unprivileged instructions, and USER() only works for single instructions.
+ */
+       .macro user_ldp l, reg1, reg2, addr, post_inc
+8888:          ldtr    \reg1, [\addr];
+8889:          ldtr    \reg2, [\addr, #8];
+               add     \addr, \addr, \post_inc;
+
+               _asm_extable    8888b,\l;
+               _asm_extable    8889b,\l;
+       .endm
+
+       .macro user_stp l, reg1, reg2, addr, post_inc
+8888:          sttr    \reg1, [\addr];
+8889:          sttr    \reg2, [\addr, #8];
+               add     \addr, \addr, \post_inc;
+
+               _asm_extable    8888b,\l;
+               _asm_extable    8889b,\l;
+       .endm
+
+       .macro user_ldst l, inst, reg, addr, post_inc
+8888:          \inst           \reg, [\addr];
+               add             \addr, \addr, \post_inc;
+
+               _asm_extable    8888b,\l;
+       .endm
 #endif
index e7d9899..a7242ef 100644 (file)
@@ -16,8 +16,6 @@
 #define ARM64_WORKAROUND_CAVIUM_23154          6
 #define ARM64_WORKAROUND_834220                        7
 #define ARM64_HAS_NO_HW_PREFETCH               8
-#define ARM64_HAS_UAO                          9
-#define ARM64_ALT_PAN_NOT_UAO                  10
 #define ARM64_HAS_VIRT_HOST_EXTN               11
 #define ARM64_WORKAROUND_CAVIUM_27456          12
 #define ARM64_HAS_32BIT_EL0                    13
@@ -66,7 +64,8 @@
 #define ARM64_HAS_TLB_RANGE                    56
 #define ARM64_MTE                              57
 #define ARM64_WORKAROUND_1508412               58
+#define ARM64_HAS_LDAPR                                59
 
-#define ARM64_NCAPS                            59
+#define ARM64_NCAPS                            60
 
 #endif /* __ASM_CPUCAPS_H */
index da250e4..1c406e8 100644 (file)
@@ -669,10 +669,16 @@ static __always_inline bool system_supports_fpsimd(void)
        return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
 }
 
+static inline bool system_uses_hw_pan(void)
+{
+       return IS_ENABLED(CONFIG_ARM64_PAN) &&
+               cpus_have_const_cap(ARM64_HAS_PAN);
+}
+
 static inline bool system_uses_ttbr0_pan(void)
 {
        return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
-               !cpus_have_const_cap(ARM64_HAS_PAN);
+               !system_uses_hw_pan();
 }
 
 static __always_inline bool system_supports_sve(void)
@@ -764,11 +770,26 @@ static inline bool cpu_has_hw_af(void)
                                                ID_AA64MMFR1_HADBS_SHIFT);
 }
 
+static inline bool cpu_has_pan(void)
+{
+       u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+       return cpuid_feature_extract_unsigned_field(mmfr1,
+                                                   ID_AA64MMFR1_PAN_SHIFT);
+}
+
 #ifdef CONFIG_ARM64_AMU_EXTN
 /* Check whether the cpu supports the Activity Monitors Unit (AMU) */
 extern bool cpu_has_amu_feat(int cpu);
+#else
+static inline bool cpu_has_amu_feat(int cpu)
+{
+       return false;
+}
 #endif
 
+/* Get a cpu that supports the Activity Monitors Unit (AMU) */
+extern int get_cpu_with_amu_feat(void);
+
 static inline unsigned int get_vmid_bits(u64 mmfr1)
 {
        int vmid_bits;
index 0756191..7853739 100644 (file)
@@ -37,7 +37,7 @@ asmlinkage void enter_from_user_mode(void);
 asmlinkage void exit_to_user_mode(void);
 void arm64_enter_nmi(struct pt_regs *regs);
 void arm64_exit_nmi(struct pt_regs *regs);
-void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
 void do_bti(struct pt_regs *regs);
 asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
index 1aae6f9..9a1c22c 100644 (file)
@@ -10,6 +10,5 @@
 #include <linux/sched.h>
 
 extern unsigned long arch_align_stack(unsigned long sp);
-void uao_thread_switch(struct task_struct *next);
 
 #endif /* __ASM_EXEC_H */
index 97f6a63..8e41faa 100644 (file)
@@ -16,7 +16,7 @@
 do {                                                                   \
        unsigned int loops = FUTEX_MAX_LOOPS;                           \
                                                                        \
-       uaccess_enable();                                               \
+       uaccess_enable_privileged();                                    \
        asm volatile(                                                   \
 "      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %w1, %2\n"                                              \
@@ -39,7 +39,7 @@ do {                                                                  \
          "+r" (loops)                                                  \
        : "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN)                   \
        : "memory");                                                    \
-       uaccess_disable();                                              \
+       uaccess_disable_privileged();                                   \
 } while (0)
 
 static inline int
@@ -95,7 +95,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
                return -EFAULT;
 
        uaddr = __uaccess_mask_ptr(_uaddr);
-       uaccess_enable();
+       uaccess_enable_privileged();
        asm volatile("// futex_atomic_cmpxchg_inatomic\n"
 "      prfm    pstl1strm, %2\n"
 "1:    ldxr    %w1, %2\n"
@@ -118,7 +118,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
        : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops)
        : "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN)
        : "memory");
-       uaccess_disable();
+       uaccess_disable_privileged();
 
        if (!ret)
                *uval = val;
index 4b39293..4ebb9c0 100644 (file)
@@ -10,8 +10,7 @@
 #include <linux/build_bug.h>
 #include <linux/types.h>
 
-/* A64 instructions are always 32 bits. */
-#define        AARCH64_INSN_SIZE               4
+#include <asm/alternative.h>
 
 #ifndef __ASSEMBLY__
 /*
index 19ca76e..587c504 100644 (file)
 #define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end))
 #define IDMAP_DIR_SIZE         (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
 
-#ifdef CONFIG_ARM64_SW_TTBR0_PAN
-#define RESERVED_TTBR0_SIZE    (PAGE_SIZE)
-#else
-#define RESERVED_TTBR0_SIZE    (0)
-#endif
-
 /* Initial memory map size */
 #if ARM64_SWAPPER_USES_SECTION_MAPS
 #define SWAPPER_BLOCK_SHIFT    SECTION_SHIFT
index 8699ce3..5d38ff4 100644 (file)
@@ -28,18 +28,11 @@ struct prev_kprobe {
        unsigned int status;
 };
 
-/* Single step context for kprobe */
-struct kprobe_step_ctx {
-       unsigned long ss_pending;
-       unsigned long match_addr;
-};
-
 /* per-cpu kprobe control block */
 struct kprobe_ctlblk {
        unsigned int kprobe_status;
        unsigned long saved_irqflag;
        struct prev_kprobe prev_kprobe;
-       struct kprobe_step_ctx ss_ctx;
 };
 
 void arch_remove_kprobe(struct kprobe *);
index cd61239..556cb2d 100644 (file)
@@ -30,8 +30,8 @@
  * keep a constant PAGE_OFFSET and "fallback" to using the higher end
  * of the VMEMMAP where 52-bit support is not available in hardware.
  */
-#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) \
-                       >> (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT))
+#define VMEMMAP_SHIFT  (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT)
+#define VMEMMAP_SIZE   ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT)
 
 /*
  * PAGE_OFFSET - the virtual address of the start of the linear map, at the
 #define _PAGE_OFFSET(va)       (-(UL(1) << (va)))
 #define PAGE_OFFSET            (_PAGE_OFFSET(VA_BITS))
 #define KIMAGE_VADDR           (MODULES_END)
-#define BPF_JIT_REGION_START   (KASAN_SHADOW_END)
+#define BPF_JIT_REGION_START   (_PAGE_END(VA_BITS_MIN))
 #define BPF_JIT_REGION_SIZE    (SZ_128M)
 #define BPF_JIT_REGION_END     (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
 #define MODULES_END            (MODULES_VADDR + MODULES_VSIZE)
 #define MODULES_VADDR          (BPF_JIT_REGION_END)
 #define MODULES_VSIZE          (SZ_128M)
-#define VMEMMAP_START          (-VMEMMAP_SIZE - SZ_2M)
+#define VMEMMAP_START          (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
 #define VMEMMAP_END            (VMEMMAP_START + VMEMMAP_SIZE)
-#define PCI_IO_END             (VMEMMAP_START - SZ_2M)
+#define PCI_IO_END             (VMEMMAP_START - SZ_8M)
 #define PCI_IO_START           (PCI_IO_END - PCI_IO_SIZE)
-#define FIXADDR_TOP            (PCI_IO_START - SZ_2M)
+#define FIXADDR_TOP            (VMEMMAP_START - SZ_32M)
 
 #if VA_BITS > 48
 #define VA_BITS_MIN            (48)
 #define KASAN_SHADOW_OFFSET    _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
 #define KASAN_SHADOW_END       ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \
                                        + KASAN_SHADOW_OFFSET)
+#define PAGE_END               (KASAN_SHADOW_END - (1UL << (vabits_actual - KASAN_SHADOW_SCALE_SHIFT)))
 #define KASAN_THREAD_SHIFT     1
 #else
 #define KASAN_THREAD_SHIFT     0
-#define KASAN_SHADOW_END       (_PAGE_END(VA_BITS_MIN))
+#define PAGE_END               (_PAGE_END(VA_BITS_MIN))
 #endif /* CONFIG_KASAN */
 
 #define MIN_THREAD_SHIFT       (14 + KASAN_THREAD_SHIFT)
 #include <asm/bug.h>
 
 extern u64                     vabits_actual;
-#define PAGE_END               (_PAGE_END(vabits_actual))
 
 extern s64                     memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
@@ -238,11 +238,9 @@ static inline const void *__tag_set(const void *addr, u8 tag)
 
 
 /*
- * The linear kernel range starts at the bottom of the virtual address
- * space. Testing the top bit for the start of the region is a
- * sufficient check and avoids having to worry about the tag.
+ * The linear kernel range starts at the bottom of the virtual address space.
  */
-#define __is_lm_address(addr)  (!(((u64)addr) & BIT(vabits_actual - 1)))
+#define __is_lm_address(addr)  (((u64)(addr) & ~PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET))
 
 #define __lm_to_phys(addr)     (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET)
 #define __kimg_to_phys(addr)   ((addr) - kimage_voffset)
index 0672236..5c72c20 100644 (file)
@@ -36,11 +36,11 @@ static inline void contextidr_thread_switch(struct task_struct *next)
 }
 
 /*
- * Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0.
+ * Set TTBR0 to reserved_pg_dir. No translations will be possible via TTBR0.
  */
 static inline void cpu_set_reserved_ttbr0(void)
 {
-       unsigned long ttbr = phys_to_ttbr(__pa_symbol(empty_zero_page));
+       unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
 
        write_sysreg(ttbr, ttbr0_el1);
        isb();
@@ -195,7 +195,7 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
                return;
 
        if (mm == &init_mm)
-               ttbr = __pa_symbol(empty_zero_page);
+               ttbr = __pa_symbol(reserved_pg_dir);
        else
                ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
 
index 01a96d0..42442a0 100644 (file)
 #define TCR_TBI1               (UL(1) << 38)
 #define TCR_HA                 (UL(1) << 39)
 #define TCR_HD                 (UL(1) << 40)
+#define TCR_TBID1              (UL(1) << 52)
 #define TCR_NFD0               (UL(1) << 53)
 #define TCR_NFD1               (UL(1) << 54)
 #define TCR_E0PD0              (UL(1) << 55)
index 5628289..005eb03 100644 (file)
@@ -22,7 +22,7 @@
  *     and fixed mappings
  */
 #define VMALLOC_START          (MODULES_END)
-#define VMALLOC_END            (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
+#define VMALLOC_END            (VMEMMAP_START - SZ_256M)
 
 #define vmemmap                        ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
 
@@ -527,6 +527,7 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
 extern pgd_t idmap_pg_end[];
 extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
+extern pgd_t reserved_pg_dir[PTRS_PER_PGD];
 
 extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
 
index fce8cbe..724249f 100644 (file)
@@ -8,9 +8,6 @@
 #ifndef __ASM_PROCESSOR_H
 #define __ASM_PROCESSOR_H
 
-#define KERNEL_DS              UL(-1)
-#define USER_DS                        ((UL(1) << VA_BITS) - 1)
-
 /*
  * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
  * no point in shifting all network buffers by 2 bytes just to make some IP
@@ -48,6 +45,7 @@
 
 #define DEFAULT_MAP_WINDOW_64  (UL(1) << VA_BITS_MIN)
 #define TASK_SIZE_64           (UL(1) << vabits_actual)
+#define TASK_SIZE_MAX          (UL(1) << VA_BITS)
 
 #ifdef CONFIG_COMPAT
 #if defined(CONFIG_ARM64_64K_PAGES) && defined(CONFIG_KUSER_HELPERS)
index 28c85b8..e58bca8 100644 (file)
 #define CurrentEL_EL1          (1 << 2)
 #define CurrentEL_EL2          (2 << 2)
 
+#define INIT_PSTATE_EL1 \
+       (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h)
+#define INIT_PSTATE_EL2 \
+       (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h)
+
 /*
  * PMR values used to mask/unmask interrupts.
  *
@@ -188,8 +193,7 @@ struct pt_regs {
        s32 syscallno;
        u32 unused2;
 #endif
-
-       u64 orig_addr_limit;
+       u64 sdei_ttbr1;
        /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */
        u64 pmr_save;
        u64 stackframe[2];
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
new file mode 100644 (file)
index 0000000..1bce62f
--- /dev/null
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#ifdef CONFIG_LTO
+
+#include <linux/compiler_types.h>
+#include <asm/alternative-macros.h>
+
+#ifndef BUILD_VDSO
+
+#ifdef CONFIG_AS_HAS_LDAPR
+#define __LOAD_RCPC(sfx, regs...)                                      \
+       ALTERNATIVE(                                                    \
+               "ldar"  #sfx "\t" #regs,                                \
+               ".arch_extension rcpc\n"                                \
+               "ldapr" #sfx "\t" #regs,                                \
+       ARM64_HAS_LDAPR)
+#else
+#define __LOAD_RCPC(sfx, regs...)      "ldar" #sfx "\t" #regs
+#endif /* CONFIG_AS_HAS_LDAPR */
+
+/*
+ * When building with LTO, there is an increased risk of the compiler
+ * converting an address dependency headed by a READ_ONCE() invocation
+ * into a control dependency and consequently allowing for harmful
+ * reordering by the CPU.
+ *
+ * Ensure that such transformations are harmless by overriding the generic
+ * READ_ONCE() definition with one that provides RCpc acquire semantics
+ * when building with LTO.
+ */
+#define __READ_ONCE(x)                                                 \
+({                                                                     \
+       typeof(&(x)) __x = &(x);                                        \
+       int atomic = 1;                                                 \
+       union { __unqual_scalar_typeof(*__x) __val; char __c[1]; } __u; \
+       switch (sizeof(x)) {                                            \
+       case 1:                                                         \
+               asm volatile(__LOAD_RCPC(b, %w0, %1)                    \
+                       : "=r" (*(__u8 *)__u.__c)                       \
+                       : "Q" (*__x) : "memory");                       \
+               break;                                                  \
+       case 2:                                                         \
+               asm volatile(__LOAD_RCPC(h, %w0, %1)                    \
+                       : "=r" (*(__u16 *)__u.__c)                      \
+                       : "Q" (*__x) : "memory");                       \
+               break;                                                  \
+       case 4:                                                         \
+               asm volatile(__LOAD_RCPC(, %w0, %1)                     \
+                       : "=r" (*(__u32 *)__u.__c)                      \
+                       : "Q" (*__x) : "memory");                       \
+               break;                                                  \
+       case 8:                                                         \
+               asm volatile(__LOAD_RCPC(, %0, %1)                      \
+                       : "=r" (*(__u64 *)__u.__c)                      \
+                       : "Q" (*__x) : "memory");                       \
+               break;                                                  \
+       default:                                                        \
+               atomic = 0;                                             \
+       }                                                               \
+       atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(__x))__x);\
+})
+
+#endif /* !BUILD_VDSO */
+#endif /* CONFIG_LTO */
+
+#include <asm-generic/rwonce.h>
+
+#endif /* __ASM_RWONCE_H */
diff --git a/arch/arm64/include/asm/signal.h b/arch/arm64/include/asm/signal.h
new file mode 100644 (file)
index 0000000..ef449f5
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM64_ASM_SIGNAL_H
+#define __ARM64_ASM_SIGNAL_H
+
+#include <asm/memory.h>
+#include <uapi/asm/signal.h>
+#include <uapi/asm/siginfo.h>
+
+static inline void __user *arch_untagged_si_addr(void __user *addr,
+                                                unsigned long sig,
+                                                unsigned long si_code)
+{
+       /*
+        * For historical reasons, all bits of the fault address are exposed as
+        * address bits for watchpoint exceptions. New architectures should
+        * handle the tag bits consistently.
+        */
+       if (sig == SIGTRAP && si_code == TRAP_BRKPT)
+               return addr;
+
+       return untagged_addr(addr);
+}
+#define arch_untagged_si_addr arch_untagged_si_addr
+
+#endif
index 801861d..cf7922f 100644 (file)
 #define SET_PSTATE_SSBS(x)             __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift))
 #define SET_PSTATE_TCO(x)              __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) << PSTATE_Imm_shift))
 
+#define set_pstate_pan(x)              asm volatile(SET_PSTATE_PAN(x))
+#define set_pstate_uao(x)              asm volatile(SET_PSTATE_UAO(x))
+#define set_pstate_ssbs(x)             asm volatile(SET_PSTATE_SSBS(x))
+
 #define __SYS_BARRIER_INSN(CRm, op2, Rt) \
        __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
 
 #define ENDIAN_SET_EL2         0
 #endif
 
+#define INIT_SCTLR_EL2_MMU_OFF \
+       (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
+
 /* SCTLR_EL1 specific flags. */
 #define SCTLR_EL1_ATA0         (BIT(42))
 
 #define ENDIAN_SET_EL1         0
 #endif
 
-#define SCTLR_EL1_SET  (SCTLR_ELx_M    | SCTLR_ELx_C    | SCTLR_ELx_SA   |\
-                        SCTLR_EL1_SA0  | SCTLR_EL1_SED  | SCTLR_ELx_I    |\
-                        SCTLR_EL1_DZE  | SCTLR_EL1_UCT                   |\
-                        SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\
-                        SCTLR_ELx_ITFSB| SCTLR_ELx_ATA  | SCTLR_EL1_ATA0 |\
-                        ENDIAN_SET_EL1 | SCTLR_EL1_UCI  | SCTLR_EL1_RES1)
+#define INIT_SCTLR_EL1_MMU_OFF \
+       (ENDIAN_SET_EL1 | SCTLR_EL1_RES1)
+
+#define INIT_SCTLR_EL1_MMU_ON \
+       (SCTLR_ELx_M    | SCTLR_ELx_C    | SCTLR_ELx_SA   | SCTLR_EL1_SA0   | \
+        SCTLR_EL1_SED  | SCTLR_ELx_I    | SCTLR_EL1_DZE  | SCTLR_EL1_UCT   | \
+        SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
+        SCTLR_ELx_ATA  | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI   | \
+        SCTLR_EL1_RES1)
 
 /* MAIR_ELx memory attributes (used by Linux) */
 #define MAIR_ATTR_DEVICE_nGnRnE                UL(0x00)
index 1ab63cf..673be2d 100644 (file)
@@ -22,7 +22,7 @@ void die(const char *msg, struct pt_regs *regs, int err);
 
 struct siginfo;
 void arm64_notify_die(const char *str, struct pt_regs *regs,
-                     int signo, int sicode, void __user *addr,
+                     int signo, int sicode, unsigned long far,
                      int err);
 
 void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
index 1fbab85..015beaf 100644 (file)
@@ -18,14 +18,11 @@ struct task_struct;
 #include <asm/stack_pointer.h>
 #include <asm/types.h>
 
-typedef unsigned long mm_segment_t;
-
 /*
  * low level task data that entry.S needs immediate access to.
  */
 struct thread_info {
        unsigned long           flags;          /* low level flags */
-       mm_segment_t            addr_limit;     /* address limit */
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
        u64                     ttbr0;          /* saved TTBR0_EL1 */
 #endif
@@ -66,8 +63,7 @@ void arch_release_task_struct(struct task_struct *tsk);
 #define TIF_NOTIFY_RESUME      2       /* callback before returning to user */
 #define TIF_FOREIGN_FPSTATE    3       /* CPU's FP state is not current's */
 #define TIF_UPROBE             4       /* uprobe breakpoint or singlestep */
-#define TIF_FSCHECK            5       /* Check FS is USER_DS on return */
-#define TIF_MTE_ASYNC_FAULT    6       /* MTE Asynchronous Tag Check Fault */
+#define TIF_MTE_ASYNC_FAULT    5       /* MTE Asynchronous Tag Check Fault */
 #define TIF_SYSCALL_TRACE      8       /* syscall trace active */
 #define TIF_SYSCALL_AUDIT      9       /* syscall auditing */
 #define TIF_SYSCALL_TRACEPOINT 10      /* syscall tracepoint for ftrace */
@@ -93,7 +89,6 @@ void arch_release_task_struct(struct task_struct *tsk);
 #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
 #define _TIF_SYSCALL_EMU       (1 << TIF_SYSCALL_EMU)
 #define _TIF_UPROBE            (1 << TIF_UPROBE)
-#define _TIF_FSCHECK           (1 << TIF_FSCHECK)
 #define _TIF_SINGLESTEP                (1 << TIF_SINGLESTEP)
 #define _TIF_32BIT             (1 << TIF_32BIT)
 #define _TIF_SVE               (1 << TIF_SVE)
@@ -101,7 +96,7 @@ void arch_release_task_struct(struct task_struct *tsk);
 
 #define _TIF_WORK_MASK         (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
                                 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
-                                _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT)
+                                _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT)
 
 #define _TIF_SYSCALL_WORK      (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
                                 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
@@ -119,7 +114,6 @@ void arch_release_task_struct(struct task_struct *tsk);
 {                                                                      \
        .flags          = _TIF_FOREIGN_FPSTATE,                         \
        .preempt_count  = INIT_PREEMPT_COUNT,                           \
-       .addr_limit     = KERNEL_DS,                                    \
        INIT_SCS                                                        \
 }
 
index 11a4652..3b8dca4 100644 (file)
@@ -16,12 +16,14 @@ int pcibus_to_node(struct pci_bus *bus);
 
 #include <linux/arch_topology.h>
 
+void update_freq_counters_refs(void);
+void topology_scale_freq_tick(void);
+
 #ifdef CONFIG_ARM64_AMU_EXTN
 /*
  * Replace task scheduler's default counter-based
  * frequency-invariance scale factor setting.
  */
-void topology_scale_freq_tick(void);
 #define arch_scale_freq_tick topology_scale_freq_tick
 #endif /* CONFIG_ARM64_AMU_EXTN */
 
index d96dc2c..54f32a0 100644 (file)
@@ -26,9 +26,9 @@ void register_undef_hook(struct undef_hook *hook);
 void unregister_undef_hook(struct undef_hook *hook);
 void force_signal_inject(int signal, int code, unsigned long address, unsigned int err);
 void arm64_notify_segfault(unsigned long addr);
-void arm64_force_sig_fault(int signo, int code, void __user *addr, const char *str);
-void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, const char *str);
-void arm64_force_sig_ptrace_errno_trap(int errno, void __user *addr, const char *str);
+void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
+void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
+void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
 
 /*
  * Move regs->pc to next instruction and do necessary setup before it
index 991dd5f..abb31aa 100644 (file)
 #include <asm/memory.h>
 #include <asm/extable.h>
 
-#define get_fs()       (current_thread_info()->addr_limit)
-
-static inline void set_fs(mm_segment_t fs)
-{
-       current_thread_info()->addr_limit = fs;
-
-       /*
-        * Prevent a mispredicted conditional call to set_fs from forwarding
-        * the wrong address limit to access_ok under speculation.
-        */
-       spec_bar();
-
-       /* On user-mode return, check fs is correct */
-       set_thread_flag(TIF_FSCHECK);
-
-       /*
-        * Enable/disable UAO so that copy_to_user() etc can access
-        * kernel memory with the unprivileged instructions.
-        */
-       if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS)
-               asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
-       else
-               asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
-                               CONFIG_ARM64_UAO));
-}
-
-#define uaccess_kernel()       (get_fs() == KERNEL_DS)
+#define HAVE_GET_KERNEL_NOFAULT
 
 /*
  * Test whether a block of memory is a valid user space address.
  * Returns 1 if the range is valid, 0 otherwise.
  *
  * This is equivalent to the following test:
- * (u65)addr + (u65)size <= (u65)current->addr_limit + 1
+ * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX
  */
 static inline unsigned long __range_ok(const void __user *addr, unsigned long size)
 {
-       unsigned long ret, limit = current_thread_info()->addr_limit;
+       unsigned long ret, limit = TASK_SIZE_MAX - 1;
 
        /*
         * Asynchronous I/O running in a kernel thread does not have the
@@ -94,7 +68,6 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si
 }
 
 #define access_ok(addr, size)  __range_ok(addr, size)
-#define user_addr_max                  get_fs
 
 #define _ASM_EXTABLE(from, to)                                         \
        "       .pushsection    __ex_table, \"a\"\n"                    \
@@ -113,8 +86,8 @@ static inline void __uaccess_ttbr0_disable(void)
        local_irq_save(flags);
        ttbr = read_sysreg(ttbr1_el1);
        ttbr &= ~TTBR_ASID_MASK;
-       /* reserved_ttbr0 placed before swapper_pg_dir */
-       write_sysreg(ttbr - RESERVED_TTBR0_SIZE, ttbr0_el1);
+       /* reserved_pg_dir placed before swapper_pg_dir */
+       write_sysreg(ttbr - PAGE_SIZE, ttbr0_el1);
        isb();
        /* Set reserved ASID */
        write_sysreg(ttbr, ttbr1_el1);
@@ -186,47 +159,26 @@ static inline void __uaccess_enable_hw_pan(void)
                        CONFIG_ARM64_PAN));
 }
 
-#define __uaccess_disable(alt)                                         \
-do {                                                                   \
-       if (!uaccess_ttbr0_disable())                                   \
-               asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt,          \
-                               CONFIG_ARM64_PAN));                     \
-} while (0)
-
-#define __uaccess_enable(alt)                                          \
-do {                                                                   \
-       if (!uaccess_ttbr0_enable())                                    \
-               asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt,          \
-                               CONFIG_ARM64_PAN));                     \
-} while (0)
-
-static inline void uaccess_disable(void)
+static inline void uaccess_disable_privileged(void)
 {
-       __uaccess_disable(ARM64_HAS_PAN);
-}
+       if (uaccess_ttbr0_disable())
+               return;
 
-static inline void uaccess_enable(void)
-{
-       __uaccess_enable(ARM64_HAS_PAN);
+       __uaccess_enable_hw_pan();
 }
 
-/*
- * These functions are no-ops when UAO is present.
- */
-static inline void uaccess_disable_not_uao(void)
+static inline void uaccess_enable_privileged(void)
 {
-       __uaccess_disable(ARM64_ALT_PAN_NOT_UAO);
-}
+       if (uaccess_ttbr0_enable())
+               return;
 
-static inline void uaccess_enable_not_uao(void)
-{
-       __uaccess_enable(ARM64_ALT_PAN_NOT_UAO);
+       __uaccess_disable_hw_pan();
 }
 
 /*
- * Sanitise a uaccess pointer such that it becomes NULL if above the
- * current addr_limit. In case the pointer is tagged (has the top byte set),
- * untag the pointer before checking.
+ * Sanitise a uaccess pointer such that it becomes NULL if above the maximum
+ * user address. In case the pointer is tagged (has the top byte set), untag
+ * the pointer before checking.
  */
 #define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr)
 static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
@@ -237,7 +189,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
        "       bics    xzr, %3, %2\n"
        "       csel    %0, %1, xzr, eq\n"
        : "=&r" (safe_ptr)
-       : "r" (ptr), "r" (current_thread_info()->addr_limit),
+       : "r" (ptr), "r" (TASK_SIZE_MAX - 1),
          "r" (untagged_addr(ptr))
        : "cc");
 
@@ -253,10 +205,9 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
  * The "__xxx_error" versions set the third argument to -EFAULT if an error
  * occurs, and leave it unchanged on success.
  */
-#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature)   \
+#define __get_mem_asm(load, reg, x, addr, err)                         \
        asm volatile(                                                   \
-       "1:"ALTERNATIVE(instr "     " reg "1, [%2]\n",                  \
-                       alt_instr " " reg "1, [%2]\n", feature)         \
+       "1:     " load "        " reg "1, [%2]\n"                       \
        "2:\n"                                                          \
        "       .section .fixup, \"ax\"\n"                              \
        "       .align  2\n"                                            \
@@ -268,35 +219,36 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
        : "+r" (err), "=&r" (x)                                         \
        : "r" (addr), "i" (-EFAULT))
 
-#define __raw_get_user(x, ptr, err)                                    \
+#define __raw_get_mem(ldr, x, ptr, err)                                        \
 do {                                                                   \
        unsigned long __gu_val;                                         \
-       __chk_user_ptr(ptr);                                            \
-       uaccess_enable_not_uao();                                       \
        switch (sizeof(*(ptr))) {                                       \
        case 1:                                                         \
-               __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr),  \
-                              (err), ARM64_HAS_UAO);                   \
+               __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err));   \
                break;                                                  \
        case 2:                                                         \
-               __get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr),  \
-                              (err), ARM64_HAS_UAO);                   \
+               __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err));   \
                break;                                                  \
        case 4:                                                         \
-               __get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr),    \
-                              (err), ARM64_HAS_UAO);                   \
+               __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err));       \
                break;                                                  \
        case 8:                                                         \
-               __get_user_asm("ldr", "ldtr", "%x",  __gu_val, (ptr),   \
-                              (err), ARM64_HAS_UAO);                   \
+               __get_mem_asm(ldr, "%x",  __gu_val, (ptr), (err));      \
                break;                                                  \
        default:                                                        \
                BUILD_BUG();                                            \
        }                                                               \
-       uaccess_disable_not_uao();                                      \
        (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
 } while (0)
 
+#define __raw_get_user(x, ptr, err)                                    \
+do {                                                                   \
+       __chk_user_ptr(ptr);                                            \
+       uaccess_ttbr0_enable();                                         \
+       __raw_get_mem("ldtr", x, ptr, err);                             \
+       uaccess_ttbr0_disable();                                        \
+} while (0)
+
 #define __get_user_error(x, ptr, err)                                  \
 do {                                                                   \
        __typeof__(*(ptr)) __user *__p = (ptr);                         \
@@ -318,10 +270,19 @@ do {                                                                      \
 
 #define get_user       __get_user
 
-#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature)   \
+#define __get_kernel_nofault(dst, src, type, err_label)                        \
+do {                                                                   \
+       int __gkn_err = 0;                                              \
+                                                                       \
+       __raw_get_mem("ldr", *((type *)(dst)),                          \
+                     (__force type *)(src), __gkn_err);                \
+       if (unlikely(__gkn_err))                                        \
+               goto err_label;                                         \
+} while (0)
+
+#define __put_mem_asm(store, reg, x, addr, err)                                \
        asm volatile(                                                   \
-       "1:"ALTERNATIVE(instr "     " reg "1, [%2]\n",                  \
-                       alt_instr " " reg "1, [%2]\n", feature)         \
+       "1:     " store "       " reg "1, [%2]\n"                       \
        "2:\n"                                                          \
        "       .section .fixup,\"ax\"\n"                               \
        "       .align  2\n"                                            \
@@ -332,32 +293,33 @@ do {                                                                      \
        : "+r" (err)                                                    \
        : "r" (x), "r" (addr), "i" (-EFAULT))
 
-#define __raw_put_user(x, ptr, err)                                    \
+#define __raw_put_mem(str, x, ptr, err)                                        \
 do {                                                                   \
        __typeof__(*(ptr)) __pu_val = (x);                              \
-       __chk_user_ptr(ptr);                                            \
-       uaccess_enable_not_uao();                                       \
        switch (sizeof(*(ptr))) {                                       \
        case 1:                                                         \
-               __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr),  \
-                              (err), ARM64_HAS_UAO);                   \
+               __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err));   \
                break;                                                  \
        case 2:                                                         \
-               __put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr),  \
-                              (err), ARM64_HAS_UAO);                   \
+               __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err));   \
                break;                                                  \
        case 4:                                                         \
-               __put_user_asm("str", "sttr", "%w", __pu_val, (ptr),    \
-                              (err), ARM64_HAS_UAO);                   \
+               __put_mem_asm(str, "%w", __pu_val, (ptr), (err));       \
                break;                                                  \
        case 8:                                                         \
-               __put_user_asm("str", "sttr", "%x", __pu_val, (ptr),    \
-                              (err), ARM64_HAS_UAO);                   \
+               __put_mem_asm(str, "%x", __pu_val, (ptr), (err));       \
                break;                                                  \
        default:                                                        \
                BUILD_BUG();                                            \
        }                                                               \
-       uaccess_disable_not_uao();                                      \
+} while (0)
+
+#define __raw_put_user(x, ptr, err)                                    \
+do {                                                                   \
+       __chk_user_ptr(ptr);                                            \
+       uaccess_ttbr0_enable();                                         \
+       __raw_put_mem("sttr", x, ptr, err);                             \
+       uaccess_ttbr0_disable();                                        \
 } while (0)
 
 #define __put_user_error(x, ptr, err)                                  \
@@ -381,14 +343,24 @@ do {                                                                      \
 
 #define put_user       __put_user
 
+#define __put_kernel_nofault(dst, src, type, err_label)                        \
+do {                                                                   \
+       int __pkn_err = 0;                                              \
+                                                                       \
+       __raw_put_mem("str", *((type *)(src)),                          \
+                     (__force type *)(dst), __pkn_err);                \
+       if (unlikely(__pkn_err))                                        \
+               goto err_label;                                         \
+} while(0)
+
 extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
 #define raw_copy_from_user(to, from, n)                                        \
 ({                                                                     \
        unsigned long __acfu_ret;                                       \
-       uaccess_enable_not_uao();                                       \
+       uaccess_ttbr0_enable();                                         \
        __acfu_ret = __arch_copy_from_user((to),                        \
                                      __uaccess_mask_ptr(from), (n));   \
-       uaccess_disable_not_uao();                                      \
+       uaccess_ttbr0_disable();                                        \
        __acfu_ret;                                                     \
 })
 
@@ -396,10 +368,10 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi
 #define raw_copy_to_user(to, from, n)                                  \
 ({                                                                     \
        unsigned long __actu_ret;                                       \
-       uaccess_enable_not_uao();                                       \
+       uaccess_ttbr0_enable();                                         \
        __actu_ret = __arch_copy_to_user(__uaccess_mask_ptr(to),        \
                                    (from), (n));                       \
-       uaccess_disable_not_uao();                                      \
+       uaccess_ttbr0_disable();                                        \
        __actu_ret;                                                     \
 })
 
@@ -407,10 +379,10 @@ extern unsigned long __must_check __arch_copy_in_user(void __user *to, const voi
 #define raw_copy_in_user(to, from, n)                                  \
 ({                                                                     \
        unsigned long __aciu_ret;                                       \
-       uaccess_enable_not_uao();                                       \
+       uaccess_ttbr0_enable();                                         \
        __aciu_ret = __arch_copy_in_user(__uaccess_mask_ptr(to),        \
                                    __uaccess_mask_ptr(from), (n));     \
-       uaccess_disable_not_uao();                                      \
+       uaccess_ttbr0_disable();                                        \
        __aciu_ret;                                                     \
 })
 
@@ -421,9 +393,9 @@ extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned lo
 static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n)
 {
        if (access_ok(to, n)) {
-               uaccess_enable_not_uao();
+               uaccess_ttbr0_enable();
                n = __arch_clear_user(__uaccess_mask_ptr(to), n);
-               uaccess_disable_not_uao();
+               uaccess_ttbr0_disable();
        }
        return n;
 }
index bbaf0bc..86364ab 100644 (file)
@@ -58,7 +58,6 @@ obj-$(CONFIG_CRASH_DUMP)              += crash_dump.o
 obj-$(CONFIG_CRASH_CORE)               += crash_core.o
 obj-$(CONFIG_ARM_SDE_INTERFACE)                += sdei.o
 obj-$(CONFIG_ARM64_PTR_AUTH)           += pointer_auth.o
-obj-$(CONFIG_SHADOW_CALL_STACK)                += scs.o
 obj-$(CONFIG_ARM64_MTE)                        += mte.o
 
 obj-y                                  += vdso/ probes/
index 7303994..a57cffb 100644 (file)
@@ -21,7 +21,8 @@
 #define ALT_ORIG_PTR(a)                __ALT_PTR(a, orig_offset)
 #define ALT_REPL_PTR(a)                __ALT_PTR(a, alt_offset)
 
-static int all_alternatives_applied;
+/* Volatile, as we may be patching the guts of READ_ONCE() */
+static volatile int all_alternatives_applied;
 
 static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS);
 
@@ -205,7 +206,7 @@ static int __apply_alternatives_multi_stop(void *unused)
 
        /* We always have a CPU 0 at this point (__init) */
        if (smp_processor_id()) {
-               while (!READ_ONCE(all_alternatives_applied))
+               while (!all_alternatives_applied)
                        cpu_relax();
                isb();
        } else {
@@ -217,7 +218,7 @@ static int __apply_alternatives_multi_stop(void *unused)
                BUG_ON(all_alternatives_applied);
                __apply_alternatives(&region, false, remaining_capabilities);
                /* Barriers provided by the cache flushing */
-               WRITE_ONCE(all_alternatives_applied, 1);
+               all_alternatives_applied = 1;
        }
 
        return 0;
index 7364de0..0e86e8b 100644 (file)
@@ -277,7 +277,7 @@ static void __init register_insn_emulation_sysctl(void)
 
 #define __user_swpX_asm(data, addr, res, temp, temp2, B)       \
 do {                                                           \
-       uaccess_enable();                                       \
+       uaccess_enable_privileged();                            \
        __asm__ __volatile__(                                   \
        "       mov             %w3, %w7\n"                     \
        "0:     ldxr"B"         %w2, [%4]\n"                    \
@@ -302,7 +302,7 @@ do {                                                                \
          "i" (-EFAULT),                                        \
          "i" (__SWP_LL_SC_LOOPS)                               \
        : "memory");                                            \
-       uaccess_disable();                                      \
+       uaccess_disable_privileged();                           \
 } while (0)
 
 #define __user_swp_asm(data, addr, res, temp, temp2) \
index 7d32fc9..679b19b 100644 (file)
@@ -30,7 +30,6 @@ int main(void)
   BLANK();
   DEFINE(TSK_TI_FLAGS,         offsetof(struct task_struct, thread_info.flags));
   DEFINE(TSK_TI_PREEMPT,       offsetof(struct task_struct, thread_info.preempt_count));
-  DEFINE(TSK_TI_ADDR_LIMIT,    offsetof(struct task_struct, thread_info.addr_limit));
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
   DEFINE(TSK_TI_TTBR0,         offsetof(struct task_struct, thread_info.ttbr0));
 #endif
@@ -70,7 +69,7 @@ int main(void)
   DEFINE(S_PSTATE,             offsetof(struct pt_regs, pstate));
   DEFINE(S_PC,                 offsetof(struct pt_regs, pc));
   DEFINE(S_SYSCALLNO,          offsetof(struct pt_regs, syscallno));
-  DEFINE(S_ORIG_ADDR_LIMIT,    offsetof(struct pt_regs, orig_addr_limit));
+  DEFINE(S_SDEI_TTBR1,         offsetof(struct pt_regs, sdei_ttbr1));
   DEFINE(S_PMR_SAVE,           offsetof(struct pt_regs, pmr_save));
   DEFINE(S_STACKFRAME,         offsetof(struct pt_regs, stackframe));
   DEFINE(S_FRAME_SIZE,         sizeof(struct pt_regs));
index 6f36c4f..39138f6 100644 (file)
@@ -153,10 +153,6 @@ EXPORT_SYMBOL(cpu_hwcap_keys);
                .width = 0,                             \
        }
 
-/* meta feature for alternatives */
-static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
-
 static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap);
 
 static bool __system_matches_cap(unsigned int n);
@@ -1528,8 +1524,10 @@ bool cpu_has_amu_feat(int cpu)
        return cpumask_test_cpu(cpu, &amu_cpus);
 }
 
-/* Initialize the use of AMU counters for frequency invariance */
-extern void init_cpu_freq_invariance_counters(void);
+int get_cpu_with_amu_feat(void)
+{
+       return cpumask_any(&amu_cpus);
+}
 
 static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
 {
@@ -1537,7 +1535,7 @@ static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
                pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
                        smp_processor_id());
                cpumask_set_cpu(smp_processor_id(), &amu_cpus);
-               init_cpu_freq_invariance_counters();
+               update_freq_counters_refs();
        }
 }
 
@@ -1559,6 +1557,11 @@ static bool has_amu(const struct arm64_cpu_capabilities *cap,
 
        return true;
 }
+#else
+int get_cpu_with_amu_feat(void)
+{
+       return nr_cpu_ids;
+}
 #endif
 
 #ifdef CONFIG_ARM64_VHE
@@ -1600,7 +1603,7 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
        WARN_ON_ONCE(in_interrupt());
 
        sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0);
-       asm(SET_PSTATE_PAN(1));
+       set_pstate_pan(1);
 }
 #endif /* CONFIG_ARM64_PAN */
 
@@ -1770,28 +1773,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
                .matches = has_no_hw_prefetch,
        },
-#ifdef CONFIG_ARM64_UAO
-       {
-               .desc = "User Access Override",
-               .capability = ARM64_HAS_UAO,
-               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
-               .matches = has_cpuid_feature,
-               .sys_reg = SYS_ID_AA64MMFR2_EL1,
-               .field_pos = ID_AA64MMFR2_UAO_SHIFT,
-               .min_field_value = 1,
-               /*
-                * We rely on stop_machine() calling uao_thread_switch() to set
-                * UAO immediately after patching.
-                */
-       },
-#endif /* CONFIG_ARM64_UAO */
-#ifdef CONFIG_ARM64_PAN
-       {
-               .capability = ARM64_ALT_PAN_NOT_UAO,
-               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
-               .matches = cpufeature_pan_not_uao,
-       },
-#endif /* CONFIG_ARM64_PAN */
 #ifdef CONFIG_ARM64_VHE
        {
                .desc = "Virtualization Host Extensions",
@@ -2138,6 +2119,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .cpu_enable = cpu_enable_mte,
        },
 #endif /* CONFIG_ARM64_MTE */
+       {
+               .desc = "RCpc load-acquire (LDAPR)",
+               .capability = ARM64_HAS_LDAPR,
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .sys_reg = SYS_ID_AA64ISAR1_EL1,
+               .sign = FTR_UNSIGNED,
+               .field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
+               .matches = has_cpuid_feature,
+               .min_field_value = 1,
+       },
        {},
 };
 
@@ -2652,7 +2643,7 @@ bool this_cpu_has_cap(unsigned int n)
  * - The SYSTEM_FEATURE cpu_hwcaps may not have been set.
  * In all other cases cpus_have_{const_}cap() should be used.
  */
-static bool __system_matches_cap(unsigned int n)
+static bool __maybe_unused __system_matches_cap(unsigned int n)
 {
        if (n < ARM64_NCAPS) {
                const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n];
@@ -2732,12 +2723,6 @@ void __init setup_cpu_features(void)
                        ARCH_DMA_MINALIGN);
 }
 
-static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
-{
-       return (__system_matches_cap(ARM64_HAS_PAN) && !__system_matches_cap(ARM64_HAS_UAO));
-}
-
 static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
 {
        cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
index fa76151..4f3661e 100644 (file)
@@ -234,9 +234,8 @@ static void send_user_sigtrap(int si_code)
        if (interrupts_enabled(regs))
                local_irq_enable();
 
-       arm64_force_sig_fault(SIGTRAP, si_code,
-                            (void __user *)instruction_pointer(regs),
-                            "User debug trap");
+       arm64_force_sig_fault(SIGTRAP, si_code, instruction_pointer(regs),
+                             "User debug trap");
 }
 
 static int single_step_handler(unsigned long unused, unsigned int esr,
index a71844f..28d8a5d 100644 (file)
@@ -7,30 +7,48 @@
 #include <linux/pe.h>
 #include <linux/sizes.h>
 
+       .macro  efi_signature_nop
+#ifdef CONFIG_EFI
+.L_head:
+       /*
+        * This ccmp instruction has no meaningful effect except that
+        * its opcode forms the magic "MZ" signature required by UEFI.
+        */
+       ccmp    x18, #0, #0xd, pl
+#else
+       /*
+        * Bootloaders may inspect the opcode at the start of the kernel
+        * image to decide if the kernel is capable of booting via UEFI.
+        * So put an ordinary NOP here, not the "MZ.." pseudo-nop above.
+        */
+       nop
+#endif
+       .endm
+
        .macro  __EFI_PE_HEADER
+#ifdef CONFIG_EFI
+       .set    .Lpe_header_offset, . - .L_head
        .long   PE_MAGIC
-coff_header:
        .short  IMAGE_FILE_MACHINE_ARM64                // Machine
-       .short  section_count                           // NumberOfSections
+       .short  .Lsection_count                         // NumberOfSections
        .long   0                                       // TimeDateStamp
        .long   0                                       // PointerToSymbolTable
        .long   0                                       // NumberOfSymbols
-       .short  section_table - optional_header         // SizeOfOptionalHeader
+       .short  .Lsection_table - .Loptional_header     // SizeOfOptionalHeader
        .short  IMAGE_FILE_DEBUG_STRIPPED | \
                IMAGE_FILE_EXECUTABLE_IMAGE | \
                IMAGE_FILE_LINE_NUMS_STRIPPED           // Characteristics
 
-optional_header:
+.Loptional_header:
        .short  PE_OPT_MAGIC_PE32PLUS                   // PE32+ format
        .byte   0x02                                    // MajorLinkerVersion
        .byte   0x14                                    // MinorLinkerVersion
-       .long   __initdata_begin - efi_header_end       // SizeOfCode
+       .long   __initdata_begin - .Lefi_header_end     // SizeOfCode
        .long   __pecoff_data_size                      // SizeOfInitializedData
        .long   0                                       // SizeOfUninitializedData
-       .long   __efistub_efi_pe_entry - _head          // AddressOfEntryPoint
-       .long   efi_header_end - _head                  // BaseOfCode
+       .long   __efistub_efi_pe_entry - .L_head        // AddressOfEntryPoint
+       .long   .Lefi_header_end - .L_head              // BaseOfCode
 
-extra_header_fields:
        .quad   0                                       // ImageBase
        .long   SEGMENT_ALIGN                           // SectionAlignment
        .long   PECOFF_FILE_ALIGNMENT                   // FileAlignment
@@ -42,10 +60,10 @@ extra_header_fields:
        .short  0                                       // MinorSubsystemVersion
        .long   0                                       // Win32VersionValue
 
-       .long   _end - _head                            // SizeOfImage
+       .long   _end - .L_head                          // SizeOfImage
 
        // Everything before the kernel image is considered part of the header
-       .long   efi_header_end - _head                  // SizeOfHeaders
+       .long   .Lefi_header_end - .L_head              // SizeOfHeaders
        .long   0                                       // CheckSum
        .short  IMAGE_SUBSYSTEM_EFI_APPLICATION         // Subsystem
        .short  0                                       // DllCharacteristics
@@ -54,7 +72,7 @@ extra_header_fields:
        .quad   0                                       // SizeOfHeapReserve
        .quad   0                                       // SizeOfHeapCommit
        .long   0                                       // LoaderFlags
-       .long   (section_table - .) / 8                 // NumberOfRvaAndSizes
+       .long   (.Lsection_table - .) / 8               // NumberOfRvaAndSizes
 
        .quad   0                                       // ExportTable
        .quad   0                                       // ImportTable
@@ -64,17 +82,17 @@ extra_header_fields:
        .quad   0                                       // BaseRelocationTable
 
 #ifdef CONFIG_DEBUG_EFI
-       .long   efi_debug_table - _head                 // DebugTable
-       .long   efi_debug_table_size
+       .long   .Lefi_debug_table - .L_head             // DebugTable
+       .long   .Lefi_debug_table_size
 #endif
 
        // Section table
-section_table:
+.Lsection_table:
        .ascii  ".text\0\0\0"
-       .long   __initdata_begin - efi_header_end       // VirtualSize
-       .long   efi_header_end - _head                  // VirtualAddress
-       .long   __initdata_begin - efi_header_end       // SizeOfRawData
-       .long   efi_header_end - _head                  // PointerToRawData
+       .long   __initdata_begin - .Lefi_header_end     // VirtualSize
+       .long   .Lefi_header_end - .L_head              // VirtualAddress
+       .long   __initdata_begin - .Lefi_header_end     // SizeOfRawData
+       .long   .Lefi_header_end - .L_head              // PointerToRawData
 
        .long   0                                       // PointerToRelocations
        .long   0                                       // PointerToLineNumbers
@@ -86,9 +104,9 @@ section_table:
 
        .ascii  ".data\0\0\0"
        .long   __pecoff_data_size                      // VirtualSize
-       .long   __initdata_begin - _head                // VirtualAddress
+       .long   __initdata_begin - .L_head              // VirtualAddress
        .long   __pecoff_data_rawsize                   // SizeOfRawData
-       .long   __initdata_begin - _head                // PointerToRawData
+       .long   __initdata_begin - .L_head              // PointerToRawData
 
        .long   0                                       // PointerToRelocations
        .long   0                                       // PointerToLineNumbers
@@ -98,7 +116,7 @@ section_table:
                IMAGE_SCN_MEM_READ | \
                IMAGE_SCN_MEM_WRITE                     // Characteristics
 
-       .set    section_count, (. - section_table) / 40
+       .set    .Lsection_count, (. - .Lsection_table) / 40
 
 #ifdef CONFIG_DEBUG_EFI
        /*
@@ -114,21 +132,21 @@ section_table:
        __INITRODATA
 
        .align  2
-efi_debug_table:
+.Lefi_debug_table:
        // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY
        .long   0                                       // Characteristics
        .long   0                                       // TimeDateStamp
        .short  0                                       // MajorVersion
        .short  0                                       // MinorVersion
        .long   IMAGE_DEBUG_TYPE_CODEVIEW               // Type
-       .long   efi_debug_entry_size                    // SizeOfData
+       .long   .Lefi_debug_entry_size                  // SizeOfData
        .long   0                                       // RVA
-       .long   efi_debug_entry - _head                 // FileOffset
+       .long   .Lefi_debug_entry - .L_head             // FileOffset
 
-       .set    efi_debug_table_size, . - efi_debug_table
+       .set    .Lefi_debug_table_size, . - .Lefi_debug_table
        .previous
 
-efi_debug_entry:
+.Lefi_debug_entry:
        // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY
        .ascii  "NB10"                                  // Signature
        .long   0                                       // Unknown
@@ -137,16 +155,12 @@ efi_debug_entry:
 
        .asciz  VMLINUX_PATH
 
-       .set    efi_debug_entry_size, . - efi_debug_entry
+       .set    .Lefi_debug_entry_size, . - .Lefi_debug_entry
 #endif
 
-       /*
-        * EFI will load .text onwards at the 4k section alignment
-        * described in the PE/COFF header. To ensure that instruction
-        * sequences using an adrp and a :lo12: immediate will function
-        * correctly at this alignment, we must ensure that .text is
-        * placed at a 4k boundary in the Image to begin with.
-        */
        .balign SEGMENT_ALIGN
-efi_header_end:
+.Lefi_header_end:
+#else
+       .set    .Lpe_header_offset, 0x0
+#endif
        .endm
index 70e0a75..5346953 100644 (file)
@@ -115,7 +115,6 @@ static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
 
        enter_from_kernel_mode(regs);
        local_daif_inherit(regs);
-       far = untagged_addr(far);
        do_mem_abort(far, esr, regs);
        local_daif_mask();
        exit_to_kernel_mode(regs);
@@ -256,7 +255,6 @@ static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
 
        enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
-       far = untagged_addr(far);
        do_mem_abort(far, esr, regs);
 }
 
index d72c818..51c7621 100644 (file)
@@ -216,12 +216,6 @@ alternative_else_nop_endif
        .else
        add     x21, sp, #S_FRAME_SIZE
        get_current_task tsk
-       /* Save the task's original addr_limit and set USER_DS */
-       ldr     x20, [tsk, #TSK_TI_ADDR_LIMIT]
-       str     x20, [sp, #S_ORIG_ADDR_LIMIT]
-       mov     x20, #USER_DS
-       str     x20, [tsk, #TSK_TI_ADDR_LIMIT]
-       /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
        .endif /* \el == 0 */
        mrs     x22, elr_el1
        mrs     x23, spsr_el1
@@ -279,12 +273,6 @@ alternative_else_nop_endif
        .macro  kernel_exit, el
        .if     \el != 0
        disable_daif
-
-       /* Restore the task's original addr_limit. */
-       ldr     x20, [sp, #S_ORIG_ADDR_LIMIT]
-       str     x20, [tsk, #TSK_TI_ADDR_LIMIT]
-
-       /* No need to restore UAO, it will be restored from SPSR_EL1 */
        .endif
 
        /* Restore pmr */
@@ -438,7 +426,7 @@ SYM_CODE_END(__swpan_exit_el0)
 
 #ifdef CONFIG_SHADOW_CALL_STACK
        /* also switch to the irq shadow stack */
-       adr_this_cpu scs_sp, irq_shadow_call_stack, x26
+       ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x26
 #endif
 
 9998:
@@ -773,9 +761,10 @@ SYM_CODE_END(ret_to_user)
  */
        .pushsection ".entry.tramp.text", "ax"
 
+       // Move from tramp_pg_dir to swapper_pg_dir
        .macro tramp_map_kernel, tmp
        mrs     \tmp, ttbr1_el1
-       add     \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE)
+       add     \tmp, \tmp, #(2 * PAGE_SIZE)
        bic     \tmp, \tmp, #USER_ASID_FLAG
        msr     ttbr1_el1, \tmp
 #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
@@ -792,9 +781,10 @@ alternative_else_nop_endif
 #endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */
        .endm
 
+       // Move from swapper_pg_dir to tramp_pg_dir
        .macro tramp_unmap_kernel, tmp
        mrs     \tmp, ttbr1_el1
-       sub     \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE)
+       sub     \tmp, \tmp, #(2 * PAGE_SIZE)
        orr     \tmp, \tmp, #USER_ASID_FLAG
        msr     ttbr1_el1, \tmp
        /*
@@ -965,10 +955,9 @@ SYM_CODE_START(__sdei_asm_entry_trampoline)
        mov     x4, xzr
 
        /*
-        * Use reg->interrupted_regs.addr_limit to remember whether to unmap
-        * the kernel on exit.
+        * Remember whether to unmap the kernel on exit.
         */
-1:     str     x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
+1:     str     x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
 
 #ifdef CONFIG_RANDOMIZE_BASE
        adr     x4, tramp_vectors + PAGE_SIZE
@@ -989,7 +978,7 @@ NOKPROBE(__sdei_asm_entry_trampoline)
  * x4: struct sdei_registered_event argument from registration time.
  */
 SYM_CODE_START(__sdei_asm_exit_trampoline)
-       ldr     x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
+       ldr     x4, [x4, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
        cbnz    x4, 1f
 
        tramp_unmap_kernel      tmp=x4
@@ -1063,9 +1052,9 @@ SYM_CODE_START(__sdei_asm_handler)
 #ifdef CONFIG_SHADOW_CALL_STACK
        /* Use a separate shadow call stack for normal and critical events */
        cbnz    w4, 3f
-       adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_normal, tmp=x6
+       ldr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_normal_ptr, tmp=x6
        b       4f
-3:     adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_critical, tmp=x6
+3:     ldr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_critical_ptr, tmp=x6
 4:
 #endif
 
index d8d9caf..f2eb206 100644 (file)
  * in the entry routines.
  */
        __HEAD
-_head:
        /*
         * DO NOT MODIFY. Image header expected by Linux boot-loaders.
         */
-#ifdef CONFIG_EFI
-       /*
-        * This add instruction has no meaningful effect except that
-        * its opcode forms the magic "MZ" signature required by UEFI.
-        */
-       add     x13, x18, #0x16
-       b       primary_entry
-#else
+       efi_signature_nop                       // special NOP to identity as PE/COFF executable
        b       primary_entry                   // branch to kernel start, magic
-       .long   0                               // reserved
-#endif
        .quad   0                               // Image load offset from start of RAM, little-endian
        le64sym _kernel_size_le                 // Effective size of kernel image, little-endian
        le64sym _kernel_flags_le                // Informative flags, little-endian
@@ -80,14 +70,9 @@ _head:
        .quad   0                               // reserved
        .quad   0                               // reserved
        .ascii  ARM64_IMAGE_MAGIC               // Magic number
-#ifdef CONFIG_EFI
-       .long   pe_header - _head               // Offset to the PE header.
+       .long   .Lpe_header_offset              // Offset to the PE header.
 
-pe_header:
        __EFI_PE_HEADER
-#else
-       .long   0                               // reserved
-#endif
 
        __INIT
 
@@ -104,7 +89,7 @@ pe_header:
         */
 SYM_CODE_START(primary_entry)
        bl      preserve_boot_args
-       bl      el2_setup                       // Drop to EL1, w0=cpu_boot_mode
+       bl      init_kernel_el                  // w0=cpu_boot_mode
        adrp    x23, __PHYS_OFFSET
        and     x23, x23, MIN_KIMG_ALIGN - 1    // KASLR offset, defaults to 0
        bl      set_cpu_boot_mode_flag
@@ -482,24 +467,33 @@ EXPORT_SYMBOL(kimage_vaddr)
        .section ".idmap.text","awx"
 
 /*
- * If we're fortunate enough to boot at EL2, ensure that the world is
- * sane before dropping to EL1.
+ * Starting from EL2 or EL1, configure the CPU to execute at the highest
+ * reachable EL supported by the kernel in a chosen default state. If dropping
+ * from EL2 to EL1, configure EL2 before configuring EL1.
+ *
+ * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
+ * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
  *
  * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
  * booted in EL1 or EL2 respectively.
  */
-SYM_FUNC_START(el2_setup)
-       msr     SPsel, #1                       // We want to use SP_EL{1,2}
+SYM_FUNC_START(init_kernel_el)
        mrs     x0, CurrentEL
        cmp     x0, #CurrentEL_EL2
-       b.eq    1f
-       mov_q   x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
+       b.eq    init_el2
+
+SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
+       mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
        msr     sctlr_el1, x0
-       mov     w0, #BOOT_CPU_MODE_EL1          // This cpu booted in EL1
        isb
-       ret
+       mov_q   x0, INIT_PSTATE_EL1
+       msr     spsr_el1, x0
+       msr     elr_el1, lr
+       mov     w0, #BOOT_CPU_MODE_EL1
+       eret
 
-1:     mov_q   x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
+SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
+       mov_q   x0, INIT_SCTLR_EL2_MMU_OFF
        msr     sctlr_el2, x0
 
 #ifdef CONFIG_ARM64_VHE
@@ -608,9 +602,12 @@ set_hcr:
 
        cbz     x2, install_el2_stub
 
-       mov     w0, #BOOT_CPU_MODE_EL2          // This CPU booted in EL2
        isb
-       ret
+       mov_q   x0, INIT_PSTATE_EL2
+       msr     spsr_el2, x0
+       msr     elr_el2, lr
+       mov     w0, #BOOT_CPU_MODE_EL2
+       eret
 
 SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)
        /*
@@ -620,7 +617,7 @@ SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)
         * requires no configuration, and all non-hyp-specific EL2 setup
         * will be done via the _EL1 system register aliases in __cpu_setup.
         */
-       mov_q   x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
+       mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
        msr     sctlr_el1, x0
 
        /* Coprocessor traps. */
@@ -642,14 +639,13 @@ SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)
 7:     adr_l   x0, __hyp_stub_vectors
        msr     vbar_el2, x0
 
-       /* spsr */
-       mov     x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
-                     PSR_MODE_EL1h)
+       isb
+       mov     x0, #INIT_PSTATE_EL1
        msr     spsr_el2, x0
        msr     elr_el2, lr
-       mov     w0, #BOOT_CPU_MODE_EL2          // This CPU booted in EL2
+       mov     w0, #BOOT_CPU_MODE_EL2
        eret
-SYM_FUNC_END(el2_setup)
+SYM_FUNC_END(init_kernel_el)
 
 /*
  * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
@@ -699,7 +695,7 @@ SYM_DATA_END(__early_cpu_boot_status)
         * cores are held until we're ready for them to initialise.
         */
 SYM_FUNC_START(secondary_holding_pen)
-       bl      el2_setup                       // Drop to EL1, w0=cpu_boot_mode
+       bl      init_kernel_el                  // w0=cpu_boot_mode
        bl      set_cpu_boot_mode_flag
        mrs     x0, mpidr_el1
        mov_q   x1, MPIDR_HWID_BITMASK
@@ -717,7 +713,7 @@ SYM_FUNC_END(secondary_holding_pen)
         * be used where CPUs are brought online dynamically by the kernel.
         */
 SYM_FUNC_START(secondary_entry)
-       bl      el2_setup                       // Drop to EL1
+       bl      init_kernel_el                  // w0=cpu_boot_mode
        bl      set_cpu_boot_mode_flag
        b       secondary_startup
 SYM_FUNC_END(secondary_entry)
index 60456a6..dfb1fea 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/irqchip.h>
 #include <linux/kprobes.h>
+#include <linux/scs.h>
 #include <linux/seq_file.h>
 #include <linux/vmalloc.h>
 #include <asm/daifflags.h>
@@ -27,6 +28,25 @@ DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts);
 
 DEFINE_PER_CPU(unsigned long *, irq_stack_ptr);
 
+
+DECLARE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+#endif
+
+static void init_irq_scs(void)
+{
+       int cpu;
+
+       if (!IS_ENABLED(CONFIG_SHADOW_CALL_STACK))
+               return;
+
+       for_each_possible_cpu(cpu)
+               per_cpu(irq_shadow_call_stack_ptr, cpu) =
+                       scs_alloc(cpu_to_node(cpu));
+}
+
 #ifdef CONFIG_VMAP_STACK
 static void init_irq_stacks(void)
 {
@@ -54,6 +74,7 @@ static void init_irq_stacks(void)
 void __init init_IRQ(void)
 {
        init_irq_stacks();
+       init_irq_scs();
        irqchip_init();
        if (!handle_arch_irq)
                panic("No interrupt controller found.");
index b181e05..0921aa1 100644 (file)
@@ -50,10 +50,16 @@ static __init u64 get_kaslr_seed(void *fdt)
        return ret;
 }
 
-static __init const u8 *kaslr_get_cmdline(void *fdt)
+static __init bool cmdline_contains_nokaslr(const u8 *cmdline)
 {
-       static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE;
+       const u8 *str;
 
+       str = strstr(cmdline, "nokaslr");
+       return str == cmdline || (str > cmdline && *(str - 1) == ' ');
+}
+
+static __init bool is_kaslr_disabled_cmdline(void *fdt)
+{
        if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
                int node;
                const u8 *prop;
@@ -65,10 +71,17 @@ static __init const u8 *kaslr_get_cmdline(void *fdt)
                prop = fdt_getprop(fdt, node, "bootargs", NULL);
                if (!prop)
                        goto out;
-               return prop;
+
+               if (cmdline_contains_nokaslr(prop))
+                       return true;
+
+               if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
+                       goto out;
+
+               return false;
        }
 out:
-       return default_cmdline;
+       return cmdline_contains_nokaslr(CONFIG_CMDLINE);
 }
 
 /*
@@ -83,7 +96,6 @@ u64 __init kaslr_early_init(u64 dt_phys)
 {
        void *fdt;
        u64 seed, offset, mask, module_range;
-       const u8 *cmdline, *str;
        unsigned long raw;
        int size;
 
@@ -115,9 +127,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
         * Check if 'nokaslr' appears on the command line, and
         * return 0 if that is the case.
         */
-       cmdline = kaslr_get_cmdline(fdt);
-       str = strstr(cmdline, "nokaslr");
-       if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) {
+       if (is_kaslr_disabled_cmdline(fdt)) {
                kaslr_status = KASLR_DISABLED_CMDLINE;
                return 0;
        }
index 52a0638..ef15c8a 100644 (file)
@@ -189,7 +189,8 @@ long get_mte_ctrl(struct task_struct *task)
 
        switch (task->thread.sctlr_tcf0) {
        case SCTLR_EL1_TCF0_NONE:
-               return PR_MTE_TCF_NONE;
+               ret |= PR_MTE_TCF_NONE;
+               break;
        case SCTLR_EL1_TCF0_SYNC:
                ret |= PR_MTE_TCF_SYNC;
                break;
index 3605f77..38bb07e 100644 (file)
@@ -23,6 +23,8 @@
 #include <linux/platform_device.h>
 #include <linux/sched_clock.h>
 #include <linux/smp.h>
+#include <linux/nmi.h>
+#include <linux/cpufreq.h>
 
 /* ARMv8 Cortex-A53 specific event types. */
 #define ARMV8_A53_PERFCTR_PREF_LINEFILL                                0xC2
@@ -1248,10 +1250,21 @@ static struct platform_driver armv8_pmu_driver = {
 
 static int __init armv8_pmu_driver_init(void)
 {
+       int ret;
+
        if (acpi_disabled)
-               return platform_driver_register(&armv8_pmu_driver);
+               ret = platform_driver_register(&armv8_pmu_driver);
        else
-               return arm_pmu_acpi_probe(armv8_pmuv3_init);
+               ret = arm_pmu_acpi_probe(armv8_pmuv3_init);
+
+       /*
+        * Try to re-initialize lockup detector after PMU init in
+        * case PMU events are triggered via NMIs.
+        */
+       if (ret == 0 && arm_pmu_irq_is_nmi())
+               lockup_detector_init();
+
+       return ret;
 }
 device_initcall(armv8_pmu_driver_init)
 
@@ -1309,3 +1322,27 @@ void arch_perf_update_userpage(struct perf_event *event,
        userpg->cap_user_time_zero = 1;
        userpg->cap_user_time_short = 1;
 }
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+/*
+ * Safe maximum CPU frequency in case a particular platform doesn't implement
+ * cpufreq driver. Although, architecture doesn't put any restrictions on
+ * maximum frequency but 5 GHz seems to be safe maximum given the available
+ * Arm CPUs in the market which are clocked much less than 5 GHz. On the other
+ * hand, we can't make it much higher as it would lead to a large hard-lockup
+ * detection timeout on parts which are running slower (eg. 1GHz on
+ * Developerbox) and doesn't possess a cpufreq driver.
+ */
+#define SAFE_MAX_CPU_FREQ      5000000000UL // 5 GHz
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+       unsigned int cpu = smp_processor_id();
+       unsigned long max_cpu_freq;
+
+       max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
+       if (!max_cpu_freq)
+               max_cpu_freq = SAFE_MAX_CPU_FREQ;
+
+       return (u64)max_cpu_freq * watchdog_thresh;
+}
+#endif
index f11a1a1..89c64ad 100644 (file)
@@ -34,7 +34,7 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 static void __kprobes
-post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
+post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
 
 static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
 {
@@ -68,7 +68,7 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
                p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs);
 
        /* single step simulated, now go for post processing */
-       post_kprobe_handler(kcb, regs);
+       post_kprobe_handler(p, kcb, regs);
 }
 
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -177,19 +177,6 @@ static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
        regs->pstate |= kcb->saved_irqflag;
 }
 
-static void __kprobes
-set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr)
-{
-       kcb->ss_ctx.ss_pending = true;
-       kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t);
-}
-
-static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb)
-{
-       kcb->ss_ctx.ss_pending = false;
-       kcb->ss_ctx.match_addr = 0;
-}
-
 static void __kprobes setup_singlestep(struct kprobe *p,
                                       struct pt_regs *regs,
                                       struct kprobe_ctlblk *kcb, int reenter)
@@ -209,7 +196,6 @@ static void __kprobes setup_singlestep(struct kprobe *p,
                /* prepare for single stepping */
                slot = (unsigned long)p->ainsn.api.insn;
 
-               set_ss_context(kcb, slot);      /* mark pending ss */
                kprobes_save_local_irqflag(kcb, regs);
                instruction_pointer_set(regs, slot);
        } else {
@@ -243,13 +229,8 @@ static int __kprobes reenter_kprobe(struct kprobe *p,
 }
 
 static void __kprobes
-post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
+post_kprobe_handler(struct kprobe *cur, struct kprobe_ctlblk *kcb, struct pt_regs *regs)
 {
-       struct kprobe *cur = kprobe_running();
-
-       if (!cur)
-               return;
-
        /* return addr restore if non-branching insn */
        if (cur->ainsn.api.restore != 0)
                instruction_pointer_set(regs, cur->ainsn.api.restore);
@@ -364,33 +345,23 @@ static void __kprobes kprobe_handler(struct pt_regs *regs)
         */
 }
 
-static int __kprobes
-kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr)
-{
-       if ((kcb->ss_ctx.ss_pending)
-           && (kcb->ss_ctx.match_addr == addr)) {
-               clear_ss_context(kcb);  /* clear pending ss */
-               return DBG_HOOK_HANDLED;
-       }
-       /* not ours, kprobes should ignore it */
-       return DBG_HOOK_ERROR;
-}
-
 static int __kprobes
 kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr)
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-       int retval;
-
-       /* return error if this is not our step */
-       retval = kprobe_ss_hit(kcb, instruction_pointer(regs));
+       unsigned long addr = instruction_pointer(regs);
+       struct kprobe *cur = kprobe_running();
 
-       if (retval == DBG_HOOK_HANDLED) {
+       if (cur && (kcb->kprobe_status == KPROBE_HIT_SS)
+           && ((unsigned long)&cur->ainsn.api.insn[1] == addr)) {
                kprobes_restore_local_irqflag(kcb, regs);
-               post_kprobe_handler(kcb, regs);
+               post_kprobe_handler(cur, kcb, regs);
+
+               return DBG_HOOK_HANDLED;
        }
 
-       return retval;
+       /* not ours, kprobes should ignore it */
+       return DBG_HOOK_ERROR;
 }
 
 static struct break_hook kprobes_break_ss_hook = {
index ed919f6..6616486 100644 (file)
@@ -422,16 +422,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
                if (clone_flags & CLONE_SETTLS)
                        p->thread.uw.tp_value = tls;
        } else {
+               /*
+                * A kthread has no context to ERET to, so ensure any buggy
+                * ERET is treated as an illegal exception return.
+                *
+                * When a user task is created from a kthread, childregs will
+                * be initialized by start_thread() or start_compat_thread().
+                */
                memset(childregs, 0, sizeof(struct pt_regs));
-               childregs->pstate = PSR_MODE_EL1h;
-               if (IS_ENABLED(CONFIG_ARM64_UAO) &&
-                   cpus_have_const_cap(ARM64_HAS_UAO))
-                       childregs->pstate |= PSR_UAO_BIT;
-
-               spectre_v4_enable_task_mitigation(p);
-
-               if (system_uses_irq_prio_masking())
-                       childregs->pmr_save = GIC_PRIO_IRQON;
+               childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
 
                p->thread.cpu_context.x19 = stack_start;
                p->thread.cpu_context.x20 = stk_sz;
@@ -461,17 +460,6 @@ static void tls_thread_switch(struct task_struct *next)
        write_sysreg(*task_user_tls(next), tpidr_el0);
 }
 
-/* Restore the UAO state depending on next's addr_limit */
-void uao_thread_switch(struct task_struct *next)
-{
-       if (IS_ENABLED(CONFIG_ARM64_UAO)) {
-               if (task_thread_info(next)->addr_limit == KERNEL_DS)
-                       asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
-               else
-                       asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO));
-       }
-}
-
 /*
  * Force SSBS state on context-switch, since it may be lost after migrating
  * from a CPU which treats the bit as RES0 in a heterogeneous system.
@@ -554,7 +542,6 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
        hw_breakpoint_thread_switch(next);
        contextidr_thread_switch(next);
        entry_task_switch(next);
-       uao_thread_switch(next);
        ssbs_thread_switch(next);
        erratum_1418040_thread_switch(prev, next);
 
index f6e4e37..4c25c00 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/prctl.h>
 #include <linux/sched/task_stack.h>
 
+#include <asm/insn.h>
 #include <asm/spectre.h>
 #include <asm/traps.h>
 
@@ -538,12 +539,12 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void)
 
        if (spectre_v4_mitigations_off()) {
                sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
-               asm volatile(SET_PSTATE_SSBS(1));
+               set_pstate_ssbs(1);
                return SPECTRE_VULNERABLE;
        }
 
        /* SCTLR_EL1.DSSBS was initialised to 0 during boot */
-       asm volatile(SET_PSTATE_SSBS(0));
+       set_pstate_ssbs(0);
        return SPECTRE_MITIGATED;
 }
 
index f49b349..8ac487c 100644 (file)
@@ -192,14 +192,11 @@ static void ptrace_hbptriggered(struct perf_event *bp,
                                break;
                        }
                }
-               arm64_force_sig_ptrace_errno_trap(si_errno,
-                                                 (void __user *)bkpt->trigger,
+               arm64_force_sig_ptrace_errno_trap(si_errno, bkpt->trigger,
                                                  desc);
        }
 #endif
-       arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT,
-                             (void __user *)(bkpt->trigger),
-                             desc);
+       arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc);
 }
 
 /*
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
deleted file mode 100644 (file)
index e8f7ff4..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Shadow Call Stack support.
- *
- * Copyright (C) 2019 Google LLC
- */
-
-#include <linux/percpu.h>
-#include <linux/scs.h>
-
-DEFINE_SCS(irq_shadow_call_stack);
-
-#ifdef CONFIG_ARM_SDE_INTERFACE
-DEFINE_SCS(sdei_shadow_call_stack_normal);
-DEFINE_SCS(sdei_shadow_call_stack_critical);
-#endif
index 793c46d..2c7ca44 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/hardirq.h>
 #include <linux/irqflags.h>
 #include <linux/sched/task_stack.h>
+#include <linux/scs.h>
 #include <linux/uaccess.h>
 
 #include <asm/alternative.h>
@@ -38,6 +39,14 @@ DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
 DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
 #endif
 
+DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
+DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
+DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
+#endif
+
 static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
 {
        unsigned long *p;
@@ -53,6 +62,9 @@ static void free_sdei_stacks(void)
 {
        int cpu;
 
+       if (!IS_ENABLED(CONFIG_VMAP_STACK))
+               return;
+
        for_each_possible_cpu(cpu) {
                _free_sdei_stack(&sdei_stack_normal_ptr, cpu);
                _free_sdei_stack(&sdei_stack_critical_ptr, cpu);
@@ -76,6 +88,9 @@ static int init_sdei_stacks(void)
        int cpu;
        int err = 0;
 
+       if (!IS_ENABLED(CONFIG_VMAP_STACK))
+               return 0;
+
        for_each_possible_cpu(cpu) {
                err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu);
                if (err)
@@ -91,6 +106,62 @@ static int init_sdei_stacks(void)
        return err;
 }
 
+static void _free_sdei_scs(unsigned long * __percpu *ptr, int cpu)
+{
+       void *s;
+
+       s = per_cpu(*ptr, cpu);
+       if (s) {
+               per_cpu(*ptr, cpu) = NULL;
+               scs_free(s);
+       }
+}
+
+static void free_sdei_scs(void)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               _free_sdei_scs(&sdei_shadow_call_stack_normal_ptr, cpu);
+               _free_sdei_scs(&sdei_shadow_call_stack_critical_ptr, cpu);
+       }
+}
+
+static int _init_sdei_scs(unsigned long * __percpu *ptr, int cpu)
+{
+       void *s;
+
+       s = scs_alloc(cpu_to_node(cpu));
+       if (!s)
+               return -ENOMEM;
+       per_cpu(*ptr, cpu) = s;
+
+       return 0;
+}
+
+static int init_sdei_scs(void)
+{
+       int cpu;
+       int err = 0;
+
+       if (!IS_ENABLED(CONFIG_SHADOW_CALL_STACK))
+               return 0;
+
+       for_each_possible_cpu(cpu) {
+               err = _init_sdei_scs(&sdei_shadow_call_stack_normal_ptr, cpu);
+               if (err)
+                       break;
+               err = _init_sdei_scs(&sdei_shadow_call_stack_critical_ptr, cpu);
+               if (err)
+                       break;
+       }
+
+       if (err)
+               free_sdei_scs();
+
+       return err;
+}
+
 static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info)
 {
        unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
@@ -131,13 +202,14 @@ unsigned long sdei_arch_get_entry_point(int conduit)
         */
        if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
                pr_err("Not supported on this hardware/boot configuration\n");
-               return 0;
+               goto out_err;
        }
 
-       if (IS_ENABLED(CONFIG_VMAP_STACK)) {
-               if (init_sdei_stacks())
-                       return 0;
-       }
+       if (init_sdei_stacks())
+               goto out_err;
+
+       if (init_sdei_scs())
+               goto out_err_free_stacks;
 
        sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
 
@@ -152,6 +224,10 @@ unsigned long sdei_arch_get_entry_point(int conduit)
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
                return (unsigned long)__sdei_asm_handler;
 
+out_err_free_stacks:
+       free_sdei_stacks();
+out_err:
+       return 0;
 }
 
 /*
@@ -179,12 +255,6 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
                sdei_api_event_context(i, &regs->regs[i]);
        }
 
-       /*
-        * We didn't take an exception to get here, set PAN. UAO will be cleared
-        * by sdei_event_handler()s force_uaccess_begin() call.
-        */
-       __uaccess_enable_hw_pan();
-
        err = sdei_event_handler(regs, arg);
        if (err)
                return SDEI_EV_FAILED;
@@ -223,12 +293,39 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
        return vbar + 0x480;
 }
 
+static void __kprobes notrace __sdei_pstate_entry(void)
+{
+       /*
+        * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
+        * whether PSTATE bits are inherited unchanged or generated from
+        * scratch, and the TF-A implementation always clears PAN and always
+        * clears UAO. There are no other known implementations.
+        *
+        * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
+        * PSTATE is modified upon architectural exceptions, and so PAN is
+        * either inherited or set per SCTLR_ELx.SPAN, and UAO is always
+        * cleared.
+        *
+        * We must explicitly reset PAN to the expected state, including
+        * clearing it when the host isn't using it, in case a VM had it set.
+        */
+       if (system_uses_hw_pan())
+               set_pstate_pan(1);
+       else if (cpu_has_pan())
+               set_pstate_pan(0);
+}
 
 asmlinkage noinstr unsigned long
 __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
 {
        unsigned long ret;
 
+       /*
+        * We didn't take an exception to get here, so the HW hasn't
+        * set/cleared bits in PSTATE that we may rely on. Initialize PAN.
+        */
+       __sdei_pstate_entry();
+
        arm64_enter_nmi(regs);
 
        ret = _sdei_handler(regs, arg);
index 133257f..1a57a76 100644 (file)
@@ -206,7 +206,7 @@ static void __init request_standard_resources(void)
        unsigned long i = 0;
        size_t res_size;
 
-       kernel_code.start   = __pa_symbol(_text);
+       kernel_code.start   = __pa_symbol(_stext);
        kernel_code.end     = __pa_symbol(__init_begin - 1);
        kernel_data.start   = __pa_symbol(_sdata);
        kernel_data.end     = __pa_symbol(_end - 1);
@@ -283,7 +283,7 @@ u64 cpu_logical_map(int cpu)
 
 void __init __no_sanitize_address setup_arch(char **cmdline_p)
 {
-       init_mm.start_code = (unsigned long) _text;
+       init_mm.start_code = (unsigned long) _stext;
        init_mm.end_code   = (unsigned long) _etext;
        init_mm.end_data   = (unsigned long) _edata;
        init_mm.brk        = (unsigned long) _end;
@@ -366,7 +366,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
         * faults in case uaccess_enable() is inadvertently called by the init
         * thread.
         */
-       init_task.thread_info.ttbr0 = __pa_symbol(empty_zero_page);
+       init_task.thread_info.ttbr0 = __pa_symbol(reserved_pg_dir);
 #endif
 
        if (boot_args[1] || boot_args[2] || boot_args[3]) {
index a8184ca..af5c6c6 100644 (file)
@@ -922,9 +922,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
        trace_hardirqs_off();
 
        do {
-               /* Check valid user FS if needed */
-               addr_limit_user_check();
-
                if (thread_flags & _TIF_NEED_RESCHED) {
                        /* Unmask Debug and SError for the next task */
                        local_daif_restore(DAIF_PROCCTX_NOIRQ);
index ba40d57..4be7f7e 100644 (file)
@@ -99,7 +99,7 @@ SYM_FUNC_END(__cpu_suspend_enter)
 
        .pushsection ".idmap.text", "awx"
 SYM_CODE_START(cpu_resume)
-       bl      el2_setup               // if in EL2 drop to EL1 cleanly
+       bl      init_kernel_el
        bl      __cpu_setup
        /* enable the MMU early - so we can access sleep_save_stash by va */
        adrp    x1, swapper_pg_dir
index 18e9727..2499b89 100644 (file)
@@ -787,14 +787,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 }
 
 static const char *ipi_types[NR_IPI] __tracepoint_string = {
-#define S(x,s) [x] = s
-       S(IPI_RESCHEDULE, "Rescheduling interrupts"),
-       S(IPI_CALL_FUNC, "Function call interrupts"),
-       S(IPI_CPU_STOP, "CPU stop interrupts"),
-       S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"),
-       S(IPI_TIMER, "Timer broadcast interrupts"),
-       S(IPI_IRQ_WORK, "IRQ work interrupts"),
-       S(IPI_WAKEUP, "CPU wake-up interrupts"),
+       [IPI_RESCHEDULE]        = "Rescheduling interrupts",
+       [IPI_CALL_FUNC]         = "Function call interrupts",
+       [IPI_CPU_STOP]          = "CPU stop interrupts",
+       [IPI_CPU_CRASH_STOP]    = "CPU stop (for crash dump) interrupts",
+       [IPI_TIMER]             = "Timer broadcast interrupts",
+       [IPI_IRQ_WORK]          = "IRQ work interrupts",
+       [IPI_WAKEUP]            = "CPU wake-up interrupts",
 };
 
 static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
index 96cd347..a67b37a 100644 (file)
@@ -58,7 +58,6 @@ void notrace __cpu_suspend_exit(void)
         * features that might not have been set correctly.
         */
        __uaccess_enable_hw_pan();
-       uao_thread_switch(current);
 
        /*
         * Restore HW breakpoint registers to sane values
index 3c18c24..265fe3e 100644 (file)
@@ -68,7 +68,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags)
  */
 long compat_arm_syscall(struct pt_regs *regs, int scno)
 {
-       void __user *addr;
+       unsigned long addr;
 
        switch (scno) {
        /*
@@ -111,8 +111,7 @@ long compat_arm_syscall(struct pt_regs *regs, int scno)
                break;
        }
 
-       addr  = (void __user *)instruction_pointer(regs) -
-               (compat_thumb_mode(regs) ? 2 : 4);
+       addr = instruction_pointer(regs) - (compat_thumb_mode(regs) ? 2 : 4);
 
        arm64_notify_die("Oops - bad compat syscall(2)", regs,
                         SIGILL, ILL_ILLTRP, addr, scno);
index f8f758e..f61e9d8 100644 (file)
@@ -122,7 +122,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
        cortex_a76_erratum_1463225_svc_handler();
        local_daif_restore(DAIF_PROCCTX);
 
-       if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) {
+       if (flags & _TIF_MTE_ASYNC_FAULT) {
                /*
                 * Process the asynchronous tag check fault before the actual
                 * syscall. do_notify_resume() will send a signal to userspace
index 543c67c..b8026ec 100644 (file)
@@ -124,6 +124,12 @@ int __init parse_acpi_topology(void)
 #endif
 
 #ifdef CONFIG_ARM64_AMU_EXTN
+#define read_corecnt() read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)
+#define read_constcnt()        read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)
+#else
+#define read_corecnt() (0UL)
+#define read_constcnt()        (0UL)
+#endif
 
 #undef pr_fmt
 #define pr_fmt(fmt) "AMU: " fmt
@@ -133,54 +139,58 @@ static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
 static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
 static cpumask_var_t amu_fie_cpus;
 
-/* Initialize counter reference per-cpu variables for the current CPU */
-void init_cpu_freq_invariance_counters(void)
+void update_freq_counters_refs(void)
 {
-       this_cpu_write(arch_core_cycles_prev,
-                      read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0));
-       this_cpu_write(arch_const_cycles_prev,
-                      read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0));
+       this_cpu_write(arch_core_cycles_prev, read_corecnt());
+       this_cpu_write(arch_const_cycles_prev, read_constcnt());
 }
 
-static int validate_cpu_freq_invariance_counters(int cpu)
+static inline bool freq_counters_valid(int cpu)
 {
-       u64 max_freq_hz, ratio;
+       if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
+               return false;
 
        if (!cpu_has_amu_feat(cpu)) {
                pr_debug("CPU%d: counters are not supported.\n", cpu);
-               return -EINVAL;
+               return false;
        }
 
        if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) ||
                     !per_cpu(arch_core_cycles_prev, cpu))) {
                pr_debug("CPU%d: cycle counters are not enabled.\n", cpu);
-               return -EINVAL;
+               return false;
        }
 
-       /* Convert maximum frequency from KHz to Hz and validate */
-       max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000;
-       if (unlikely(!max_freq_hz)) {
-               pr_debug("CPU%d: invalid maximum frequency.\n", cpu);
+       return true;
+}
+
+static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
+{
+       u64 ratio;
+
+       if (unlikely(!max_rate || !ref_rate)) {
+               pr_debug("CPU%d: invalid maximum or reference frequency.\n",
+                        cpu);
                return -EINVAL;
        }
 
        /*
         * Pre-compute the fixed ratio between the frequency of the constant
-        * counter and the maximum frequency of the CPU.
+        * reference counter and the maximum frequency of the CPU.
         *
-        *                            const_freq
-        * arch_max_freq_scale =   ---------------- * SCHED_CAPACITY_SCALE²
-        *                         cpuinfo_max_freq
+        *                          ref_rate
+        * arch_max_freq_scale =   ---------- * SCHED_CAPACITY_SCALE²
+        *                          max_rate
         *
         * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE²
         * in order to ensure a good resolution for arch_max_freq_scale for
-        * very low arch timer frequencies (down to the KHz range which should
+        * very low reference frequencies (down to the KHz range which should
         * be unlikely).
         */
-       ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT);
-       ratio = div64_u64(ratio, max_freq_hz);
+       ratio = ref_rate << (2 * SCHED_CAPACITY_SHIFT);
+       ratio = div64_u64(ratio, max_rate);
        if (!ratio) {
-               WARN_ONCE(1, "System timer frequency too low.\n");
+               WARN_ONCE(1, "Reference frequency too low.\n");
                return -EINVAL;
        }
 
@@ -227,8 +237,12 @@ static int __init init_amu_fie(void)
        }
 
        for_each_present_cpu(cpu) {
-               if (validate_cpu_freq_invariance_counters(cpu))
+               if (!freq_counters_valid(cpu) ||
+                   freq_inv_set_max_ratio(cpu,
+                                          cpufreq_get_hw_max_freq(cpu) * 1000,
+                                          arch_timer_get_rate()))
                        continue;
+
                cpumask_set_cpu(cpu, valid_cpus);
                have_policy |= enable_policy_freq_counters(cpu, valid_cpus);
        }
@@ -280,11 +294,14 @@ void topology_scale_freq_tick(void)
        if (!cpumask_test_cpu(cpu, amu_fie_cpus))
                return;
 
-       const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0);
-       core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
        prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
        prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
 
+       update_freq_counters_refs();
+
+       const_cnt = this_cpu_read(arch_const_cycles_prev);
+       core_cnt = this_cpu_read(arch_core_cycles_prev);
+
        if (unlikely(core_cnt <= prev_core_cnt ||
                     const_cnt <= prev_const_cnt))
                goto store_and_exit;
@@ -309,4 +326,71 @@ store_and_exit:
        this_cpu_write(arch_core_cycles_prev, core_cnt);
        this_cpu_write(arch_const_cycles_prev, const_cnt);
 }
-#endif /* CONFIG_ARM64_AMU_EXTN */
+
+#ifdef CONFIG_ACPI_CPPC_LIB
+#include <acpi/cppc_acpi.h>
+
+static void cpu_read_corecnt(void *val)
+{
+       *(u64 *)val = read_corecnt();
+}
+
+static void cpu_read_constcnt(void *val)
+{
+       *(u64 *)val = read_constcnt();
+}
+
+static inline
+int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val)
+{
+       /*
+        * Abort call on counterless CPU or when interrupts are
+        * disabled - can lead to deadlock in smp sync call.
+        */
+       if (!cpu_has_amu_feat(cpu))
+               return -EOPNOTSUPP;
+
+       if (WARN_ON_ONCE(irqs_disabled()))
+               return -EPERM;
+
+       smp_call_function_single(cpu, func, val, 1);
+
+       return 0;
+}
+
+/*
+ * Refer to drivers/acpi/cppc_acpi.c for the description of the functions
+ * below.
+ */
+bool cpc_ffh_supported(void)
+{
+       return freq_counters_valid(get_cpu_with_amu_feat());
+}
+
+int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val)
+{
+       int ret = -EOPNOTSUPP;
+
+       switch ((u64)reg->address) {
+       case 0x0:
+               ret = counters_read_on_cpu(cpu, cpu_read_corecnt, val);
+               break;
+       case 0x1:
+               ret = counters_read_on_cpu(cpu, cpu_read_constcnt, val);
+               break;
+       }
+
+       if (!ret) {
+               *val &= GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+                                   reg->bit_offset);
+               *val >>= reg->bit_offset;
+       }
+
+       return ret;
+}
+
+int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+{
+       return -EOPNOTSUPP;
+}
+#endif /* CONFIG_ACPI_CPPC_LIB */
index 2059d8f..08156be 100644 (file)
@@ -171,32 +171,32 @@ static void arm64_show_signal(int signo, const char *str)
        __show_regs(regs);
 }
 
-void arm64_force_sig_fault(int signo, int code, void __user *addr,
+void arm64_force_sig_fault(int signo, int code, unsigned long far,
                           const char *str)
 {
        arm64_show_signal(signo, str);
        if (signo == SIGKILL)
                force_sig(SIGKILL);
        else
-               force_sig_fault(signo, code, addr);
+               force_sig_fault(signo, code, (void __user *)far);
 }
 
-void arm64_force_sig_mceerr(int code, void __user *addr, short lsb,
+void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
                            const char *str)
 {
        arm64_show_signal(SIGBUS, str);
-       force_sig_mceerr(code, addr, lsb);
+       force_sig_mceerr(code, (void __user *)far, lsb);
 }
 
-void arm64_force_sig_ptrace_errno_trap(int errno, void __user *addr,
+void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far,
                                       const char *str)
 {
        arm64_show_signal(SIGTRAP, str);
-       force_sig_ptrace_errno_trap(errno, addr);
+       force_sig_ptrace_errno_trap(errno, (void __user *)far);
 }
 
 void arm64_notify_die(const char *str, struct pt_regs *regs,
-                     int signo, int sicode, void __user *addr,
+                     int signo, int sicode, unsigned long far,
                      int err)
 {
        if (user_mode(regs)) {
@@ -204,7 +204,7 @@ void arm64_notify_die(const char *str, struct pt_regs *regs,
                current->thread.fault_address = 0;
                current->thread.fault_code = err;
 
-               arm64_force_sig_fault(signo, sicode, addr, str);
+               arm64_force_sig_fault(signo, sicode, far, str);
        } else {
                die(str, regs, err);
        }
@@ -375,7 +375,7 @@ void force_signal_inject(int signal, int code, unsigned long address, unsigned i
                signal = SIGKILL;
        }
 
-       arm64_notify_die(desc, regs, signal, code, (void __user *)address, err);
+       arm64_notify_die(desc, regs, signal, code, address, err);
 }
 
 /*
@@ -386,7 +386,7 @@ void arm64_notify_segfault(unsigned long addr)
        int code;
 
        mmap_read_lock(current->mm);
-       if (find_vma(current->mm, addr) == NULL)
+       if (find_vma(current->mm, untagged_addr(addr)) == NULL)
                code = SEGV_MAPERR;
        else
                code = SEGV_ACCERR;
@@ -449,12 +449,13 @@ NOKPROBE_SYMBOL(do_ptrauth_fault);
 
 static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 {
-       unsigned long address;
+       unsigned long tagged_address, address;
        int rt = ESR_ELx_SYS64_ISS_RT(esr);
        int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
        int ret = 0;
 
-       address = untagged_addr(pt_regs_read_reg(regs, rt));
+       tagged_address = pt_regs_read_reg(regs, rt);
+       address = untagged_addr(tagged_address);
 
        switch (crm) {
        case ESR_ELx_SYS64_ISS_CRM_DC_CVAU:     /* DC CVAU, gets promoted */
@@ -481,7 +482,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
        }
 
        if (ret)
-               arm64_notify_segfault(address);
+               arm64_notify_segfault(tagged_address);
        else
                arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
@@ -775,7 +776,7 @@ asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int
  */
 void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 {
-       void __user *pc = (void __user *)instruction_pointer(regs);
+       unsigned long pc = instruction_pointer(regs);
 
        current->thread.fault_address = 0;
        current->thread.fault_code = esr;
index d65f522..a8f8e40 100644 (file)
@@ -28,7 +28,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv      \
             $(btildflags-y) -T
 
 ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
-ccflags-y += -DDISABLE_BRANCH_PROFILING
+ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
 
 CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS)
 KASAN_SANITIZE                 := n
index 79280c5..a1e0f91 100644 (file)
@@ -48,7 +48,7 @@ cc32-as-instr = $(call try-run,\
 # As a result we set our own flags here.
 
 # KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
-VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
+VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
 VDSO_CPPFLAGS += $(LINUXINCLUDE)
 
 # Common C and assembly flags
index 1bda604..5d5857c 100644 (file)
@@ -121,7 +121,7 @@ SECTIONS
                _text = .;
                HEAD_TEXT
        }
-       .text : {                       /* Real text segment            */
+       .text : ALIGN(SEGMENT_ALIGN) {  /* Real text segment            */
                _stext = .;             /* Text and read-only data      */
                        IRQENTRY_TEXT
                        SOFTIRQENTRY_TEXT
@@ -164,13 +164,11 @@ SECTIONS
        . += PAGE_SIZE;
 #endif
 
-#ifdef CONFIG_ARM64_SW_TTBR0_PAN
-       reserved_ttbr0 = .;
-       . += RESERVED_TTBR0_SIZE;
-#endif
+       reserved_pg_dir = .;
+       . += PAGE_SIZE;
+
        swapper_pg_dir = .;
        . += PAGE_SIZE;
-       swapper_pg_end = .;
 
        . = ALIGN(SEGMENT_ALIGN);
        __init_begin = .;
@@ -201,7 +199,7 @@ SECTIONS
                INIT_CALLS
                CON_INITCALL
                INIT_RAM_FS
-               *(.init.rodata.* .init.bss)     /* from the EFI stub */
+               *(.init.altinstructions .init.bss)      /* from the EFI stub */
        }
        .exit.data : {
                EXIT_DATA
index 48a3a26..af9afcb 100644 (file)
@@ -24,20 +24,20 @@ SYM_FUNC_START(__arch_clear_user)
        subs    x1, x1, #8
        b.mi    2f
 1:
-uao_user_alternative 9f, str, sttr, xzr, x0, 8
+user_ldst 9f, sttr, xzr, x0, 8
        subs    x1, x1, #8
        b.pl    1b
 2:     adds    x1, x1, #4
        b.mi    3f
-uao_user_alternative 9f, str, sttr, wzr, x0, 4
+user_ldst 9f, sttr, wzr, x0, 4
        sub     x1, x1, #4
 3:     adds    x1, x1, #2
        b.mi    4f
-uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
+user_ldst 9f, sttrh, wzr, x0, 2
        sub     x1, x1, #2
 4:     adds    x1, x1, #1
        b.mi    5f
-uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
+user_ldst 9f, sttrb, wzr, x0, 0
 5:     mov     x0, #0
        ret
 SYM_FUNC_END(__arch_clear_user)
index 0f8a3a9..95cd62d 100644 (file)
@@ -21,7 +21,7 @@
  */
 
        .macro ldrb1 reg, ptr, val
-       uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val
+       user_ldst 9998f, ldtrb, \reg, \ptr, \val
        .endm
 
        .macro strb1 reg, ptr, val
@@ -29,7 +29,7 @@
        .endm
 
        .macro ldrh1 reg, ptr, val
-       uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val
+       user_ldst 9998f, ldtrh, \reg, \ptr, \val
        .endm
 
        .macro strh1 reg, ptr, val
@@ -37,7 +37,7 @@
        .endm
 
        .macro ldr1 reg, ptr, val
-       uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val
+       user_ldst 9998f, ldtr, \reg, \ptr, \val
        .endm
 
        .macro str1 reg, ptr, val
@@ -45,7 +45,7 @@
        .endm
 
        .macro ldp1 reg1, reg2, ptr, val
-       uao_ldp 9998f, \reg1, \reg2, \ptr, \val
+       user_ldp 9998f, \reg1, \reg2, \ptr, \val
        .endm
 
        .macro stp1 reg1, reg2, ptr, val
index 80e37ad..1f61cd0 100644 (file)
  *     x0 - bytes not copied
  */
        .macro ldrb1 reg, ptr, val
-       uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val
+       user_ldst 9998f, ldtrb, \reg, \ptr, \val
        .endm
 
        .macro strb1 reg, ptr, val
-       uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val
+       user_ldst 9998f, sttrb, \reg, \ptr, \val
        .endm
 
        .macro ldrh1 reg, ptr, val
-       uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val
+       user_ldst 9998f, ldtrh, \reg, \ptr, \val
        .endm
 
        .macro strh1 reg, ptr, val
-       uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val
+       user_ldst 9998f, sttrh, \reg, \ptr, \val
        .endm
 
        .macro ldr1 reg, ptr, val
-       uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val
+       user_ldst 9998f, ldtr, \reg, \ptr, \val
        .endm
 
        .macro str1 reg, ptr, val
-       uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val
+       user_ldst 9998f, sttr, \reg, \ptr, \val
        .endm
 
        .macro ldp1 reg1, reg2, ptr, val
-       uao_ldp 9998f, \reg1, \reg2, \ptr, \val
+       user_ldp 9998f, \reg1, \reg2, \ptr, \val
        .endm
 
        .macro stp1 reg1, reg2, ptr, val
-       uao_stp 9998f, \reg1, \reg2, \ptr, \val
+       user_stp 9998f, \reg1, \reg2, \ptr, \val
        .endm
 
 end    .req    x5
index 4ec5970..043da90 100644 (file)
@@ -24,7 +24,7 @@
        .endm
 
        .macro strb1 reg, ptr, val
-       uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val
+       user_ldst 9998f, sttrb, \reg, \ptr, \val
        .endm
 
        .macro ldrh1 reg, ptr, val
@@ -32,7 +32,7 @@
        .endm
 
        .macro strh1 reg, ptr, val
-       uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val
+       user_ldst 9998f, sttrh, \reg, \ptr, \val
        .endm
 
        .macro ldr1 reg, ptr, val
@@ -40,7 +40,7 @@
        .endm
 
        .macro str1 reg, ptr, val
-       uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val
+       user_ldst 9998f, sttr, \reg, \ptr, \val
        .endm
 
        .macro ldp1 reg1, reg2, ptr, val
@@ -48,7 +48,7 @@
        .endm
 
        .macro stp1 reg1, reg2, ptr, val
-       uao_stp 9998f, \reg1, \reg2, \ptr, \val
+       user_stp 9998f, \reg1, \reg2, \ptr, \val
        .endm
 
 end    .req    x5
index 03ca6d8..351537c 100644 (file)
@@ -4,7 +4,7 @@
  */
 #include <linux/linkage.h>
 
-#include <asm/alternative.h>
+#include <asm/asm-uaccess.h>
 #include <asm/assembler.h>
 #include <asm/mte.h>
 #include <asm/page.h>
@@ -67,7 +67,7 @@ SYM_FUNC_START(mte_copy_tags_from_user)
        mov     x3, x1
        cbz     x2, 2f
 1:
-       uao_user_alternative 2f, ldrb, ldtrb, w4, x1, 0
+       user_ldst 2f, ldtrb, w4, x1, 0
        lsl     x4, x4, #MTE_TAG_SHIFT
        stg     x4, [x0], #MTE_GRANULE_SIZE
        add     x1, x1, #1
@@ -94,7 +94,7 @@ SYM_FUNC_START(mte_copy_tags_to_user)
 1:
        ldg     x4, [x1]
        ubfx    x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE
-       uao_user_alternative 2f, strb, sttrb, w4, x0, 0
+       user_ldst 2f, sttrb, w4, x0, 0
        add     x0, x0, #1
        add     x1, x1, #MTE_GRANULE_SIZE
        subs    x2, x2, #1
index bfa30b7..c83bb5a 100644 (file)
@@ -30,9 +30,7 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from,
 {
        unsigned long rc;
 
-       uaccess_enable_not_uao();
-       rc = __arch_copy_from_user(to, from, n);
-       uaccess_disable_not_uao();
+       rc = raw_copy_from_user(to, from, n);
 
        /* See above */
        __clean_dcache_area_pop(to, n - rc);
index 795d224..2848952 100644 (file)
@@ -40,7 +40,7 @@
 #include <asm/traps.h>
 
 struct fault_info {
-       int     (*fn)(unsigned long addr, unsigned int esr,
+       int     (*fn)(unsigned long far, unsigned int esr,
                      struct pt_regs *regs);
        int     sig;
        int     code;
@@ -385,8 +385,11 @@ static void set_thread_esr(unsigned long address, unsigned int esr)
        current->thread.fault_code = esr;
 }
 
-static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+static void do_bad_area(unsigned long far, unsigned int esr,
+                       struct pt_regs *regs)
 {
+       unsigned long addr = untagged_addr(far);
+
        /*
         * If we are in kernel mode at this point, we have no context to
         * handle this fault with.
@@ -395,8 +398,7 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
                const struct fault_info *inf = esr_to_fault_info(esr);
 
                set_thread_esr(addr, esr);
-               arm64_force_sig_fault(inf->sig, inf->code, (void __user *)addr,
-                                     inf->name);
+               arm64_force_sig_fault(inf->sig, inf->code, far, inf->name);
        } else {
                __do_kernel_fault(addr, esr, regs);
        }
@@ -448,7 +450,7 @@ static bool is_write_abort(unsigned int esr)
        return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
 }
 
-static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
+static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
                                   struct pt_regs *regs)
 {
        const struct fault_info *inf;
@@ -456,6 +458,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
        vm_fault_t fault;
        unsigned long vm_flags = VM_ACCESS_FLAGS;
        unsigned int mm_flags = FAULT_FLAG_DEFAULT;
+       unsigned long addr = untagged_addr(far);
 
        if (kprobe_page_fault(regs, esr))
                return 0;
@@ -479,11 +482,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
        }
 
        if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) {
-               /* regs->orig_addr_limit may be 0 if we entered from EL0 */
-               if (regs->orig_addr_limit == KERNEL_DS)
-                       die_kernel_fault("access to user memory with fs=KERNEL_DS",
-                                        addr, esr, regs);
-
                if (is_el1_instruction_abort(esr))
                        die_kernel_fault("execution of user memory",
                                         addr, esr, regs);
@@ -567,8 +565,7 @@ retry:
                 * We had some memory, but were unable to successfully fix up
                 * this page fault.
                 */
-               arm64_force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr,
-                                     inf->name);
+               arm64_force_sig_fault(SIGBUS, BUS_ADRERR, far, inf->name);
        } else if (fault & (VM_FAULT_HWPOISON_LARGE | VM_FAULT_HWPOISON)) {
                unsigned int lsb;
 
@@ -576,8 +573,7 @@ retry:
                if (fault & VM_FAULT_HWPOISON_LARGE)
                        lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
 
-               arm64_force_sig_mceerr(BUS_MCEERR_AR, (void __user *)addr, lsb,
-                                      inf->name);
+               arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
        } else {
                /*
                 * Something tried to access memory that isn't in our memory
@@ -585,8 +581,7 @@ retry:
                 */
                arm64_force_sig_fault(SIGSEGV,
                                      fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,
-                                     (void __user *)addr,
-                                     inf->name);
+                                     far, inf->name);
        }
 
        return 0;
@@ -596,33 +591,35 @@ no_context:
        return 0;
 }
 
-static int __kprobes do_translation_fault(unsigned long addr,
+static int __kprobes do_translation_fault(unsigned long far,
                                          unsigned int esr,
                                          struct pt_regs *regs)
 {
+       unsigned long addr = untagged_addr(far);
+
        if (is_ttbr0_addr(addr))
-               return do_page_fault(addr, esr, regs);
+               return do_page_fault(far, esr, regs);
 
-       do_bad_area(addr, esr, regs);
+       do_bad_area(far, esr, regs);
        return 0;
 }
 
-static int do_alignment_fault(unsigned long addr, unsigned int esr,
+static int do_alignment_fault(unsigned long far, unsigned int esr,
                              struct pt_regs *regs)
 {
-       do_bad_area(addr, esr, regs);
+       do_bad_area(far, esr, regs);
        return 0;
 }
 
-static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+static int do_bad(unsigned long far, unsigned int esr, struct pt_regs *regs)
 {
        return 1; /* "fault" */
 }
 
-static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs)
 {
        const struct fault_info *inf;
-       void __user *siaddr;
+       unsigned long siaddr;
 
        inf = esr_to_fault_info(esr);
 
@@ -634,19 +631,30 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
                return 0;
        }
 
-       if (esr & ESR_ELx_FnV)
-               siaddr = NULL;
-       else
-               siaddr  = (void __user *)addr;
+       if (esr & ESR_ELx_FnV) {
+               siaddr = 0;
+       } else {
+               /*
+                * The architecture specifies that the tag bits of FAR_EL1 are
+                * UNKNOWN for synchronous external aborts. Mask them out now
+                * so that userspace doesn't see them.
+                */
+               siaddr  = untagged_addr(far);
+       }
        arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
 
        return 0;
 }
 
-static int do_tag_check_fault(unsigned long addr, unsigned int esr,
+static int do_tag_check_fault(unsigned long far, unsigned int esr,
                              struct pt_regs *regs)
 {
-       do_bad_area(addr, esr, regs);
+       /*
+        * The architecture specifies that bits 63:60 of FAR_EL1 are UNKNOWN for tag
+        * check faults. Mask them out now so that userspace doesn't see them.
+        */
+       far &= (1UL << 60) - 1;
+       do_bad_area(far, esr, regs);
        return 0;
 }
 
@@ -717,11 +725,12 @@ static const struct fault_info fault_info[] = {
        { do_bad,               SIGKILL, SI_KERNEL,     "unknown 63"                    },
 };
 
-void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
 {
        const struct fault_info *inf = esr_to_fault_info(esr);
+       unsigned long addr = untagged_addr(far);
 
-       if (!inf->fn(addr, esr, regs))
+       if (!inf->fn(far, esr, regs))
                return;
 
        if (!user_mode(regs)) {
@@ -730,8 +739,12 @@ void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
                show_pte(addr);
        }
 
-       arm64_notify_die(inf->name, regs,
-                        inf->sig, inf->code, (void __user *)addr, esr);
+       /*
+        * At this point we have an unrecognized fault type whose tag bits may
+        * have been defined as UNKNOWN. Therefore we only expose the untagged
+        * address to the signal handler.
+        */
+       arm64_notify_die(inf->name, regs, inf->sig, inf->code, addr, esr);
 }
 NOKPROBE_SYMBOL(do_mem_abort);
 
@@ -744,8 +757,8 @@ NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
 
 void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
-       arm64_notify_die("SP/PC alignment exception", regs,
-                        SIGBUS, BUS_ADRALN, (void __user *)addr, esr);
+       arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN,
+                        addr, esr);
 }
 NOKPROBE_SYMBOL(do_sp_pc_abort);
 
@@ -846,8 +859,7 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
                arm64_apply_bp_hardening();
 
        if (inf->fn(addr_if_watchpoint, esr, regs)) {
-               arm64_notify_die(inf->name, regs,
-                                inf->sig, inf->code, (void __user *)pc, esr);
+               arm64_notify_die(inf->name, regs, inf->sig, inf->code, pc, esr);
        }
 
        debug_exception_exit(regs);
index 0955406..fbd452e 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/kexec.h>
 #include <linux/crash_dump.h>
 #include <linux/hugetlb.h>
+#include <linux/acpi_iort.h>
 
 #include <asm/boot.h>
 #include <asm/fixmap.h>
@@ -42,8 +43,6 @@
 #include <asm/tlb.h>
 #include <asm/alternative.h>
 
-#define ARM64_ZONE_DMA_BITS    30
-
 /*
  * We need to be able to catch inadvertent references to memstart_addr
  * that occur (potentially in generic code) before arm64_memblock_init()
@@ -175,21 +174,34 @@ static void __init reserve_elfcorehdr(void)
 #endif /* CONFIG_CRASH_DUMP */
 
 /*
- * Return the maximum physical address for a zone with a given address size
- * limit. It currently assumes that for memory starting above 4G, 32-bit
- * devices will use a DMA offset.
+ * Return the maximum physical address for a zone accessible by the given bits
+ * limit. If DRAM starts above 32-bit, expand the zone to the maximum
+ * available memory, otherwise cap it at 32-bit.
  */
 static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
 {
-       phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits);
-       return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM());
+       phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits);
+       phys_addr_t phys_start = memblock_start_of_DRAM();
+
+       if (phys_start > U32_MAX)
+               zone_mask = PHYS_ADDR_MAX;
+       else if (phys_start > zone_mask)
+               zone_mask = U32_MAX;
+
+       return min(zone_mask, memblock_end_of_DRAM() - 1) + 1;
 }
 
 static void __init zone_sizes_init(unsigned long min, unsigned long max)
 {
        unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
+       unsigned int __maybe_unused acpi_zone_dma_bits;
+       unsigned int __maybe_unused dt_zone_dma_bits;
 
 #ifdef CONFIG_ZONE_DMA
+       acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address());
+       dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL));
+       zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits);
+       arm64_dma_phys_limit = max_zone_phys(zone_dma_bits);
        max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
 #endif
 #ifdef CONFIG_ZONE_DMA32
@@ -269,7 +281,7 @@ static void __init fdt_enforce_memory_region(void)
 
 void __init arm64_memblock_init(void)
 {
-       const s64 linear_region_size = BIT(vabits_actual - 1);
+       const s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
 
        /* Handle linux,usable-memory-range property */
        fdt_enforce_memory_region();
@@ -348,15 +360,18 @@ void __init arm64_memblock_init(void)
 
        if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
                extern u16 memstart_offset_seed;
-               u64 range = linear_region_size -
-                           (memblock_end_of_DRAM() - memblock_start_of_DRAM());
+               u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+               int parange = cpuid_feature_extract_unsigned_field(
+                                       mmfr0, ID_AA64MMFR0_PARANGE_SHIFT);
+               s64 range = linear_region_size -
+                           BIT(id_aa64mmfr0_parange_to_phys_shift(parange));
 
                /*
                 * If the size of the linear region exceeds, by a sufficient
-                * margin, the size of the region that the available physical
-                * memory spans, randomize the linear region as well.
+                * margin, the size of the region that the physical memory can
+                * span, randomize the linear region as well.
                 */
-               if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
+               if (memstart_offset_seed > 0 && range >= (s64)ARM64_MEMSTART_ALIGN) {
                        range /= ARM64_MEMSTART_ALIGN;
                        memstart_addr -= ARM64_MEMSTART_ALIGN *
                                         ((range * memstart_offset_seed) >> 16);
@@ -367,7 +382,7 @@ void __init arm64_memblock_init(void)
         * Register the kernel text, kernel data, initrd, and initial
         * pagetables with memblock.
         */
-       memblock_reserve(__pa_symbol(_text), _end - _text);
+       memblock_reserve(__pa_symbol(_stext), _end - _stext);
        if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
                /* the generic initrd code expects virtual addresses */
                initrd_start = __phys_to_virt(phys_initrd_start);
@@ -376,18 +391,11 @@ void __init arm64_memblock_init(void)
 
        early_init_fdt_scan_reserved_mem();
 
-       if (IS_ENABLED(CONFIG_ZONE_DMA)) {
-               zone_dma_bits = ARM64_ZONE_DMA_BITS;
-               arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS);
-       }
-
        if (IS_ENABLED(CONFIG_ZONE_DMA32))
                arm64_dma32_phys_limit = max_zone_phys(32);
        else
                arm64_dma32_phys_limit = PHYS_MASK + 1;
 
-       reserve_crashkernel();
-
        reserve_elfcorehdr();
 
        high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
@@ -427,6 +435,12 @@ void __init bootmem_init(void)
        sparse_init();
        zone_sizes_init(min, max);
 
+       /*
+        * request_standard_resources() depends on crashkernel's memory being
+        * reserved, so do it here.
+        */
+       reserve_crashkernel();
+
        memblock_dump_all();
 }
 
index ca692a8..ae0c3d0 100644 (file)
@@ -464,20 +464,35 @@ void __init mark_linear_text_alias_ro(void)
        /*
         * Remove the write permissions from the linear alias of .text/.rodata
         */
-       update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
-                           (unsigned long)__init_begin - (unsigned long)_text,
+       update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext),
+                           (unsigned long)__init_begin - (unsigned long)_stext,
                            PAGE_KERNEL_RO);
 }
 
+static bool crash_mem_map __initdata;
+
+static int __init enable_crash_mem_map(char *arg)
+{
+       /*
+        * Proper parameter parsing is done by reserve_crashkernel(). We only
+        * need to know if the linear map has to avoid block mappings so that
+        * the crashkernel reservations can be unmapped later.
+        */
+       crash_mem_map = true;
+
+       return 0;
+}
+early_param("crashkernel", enable_crash_mem_map);
+
 static void __init map_mem(pgd_t *pgdp)
 {
-       phys_addr_t kernel_start = __pa_symbol(_text);
+       phys_addr_t kernel_start = __pa_symbol(_stext);
        phys_addr_t kernel_end = __pa_symbol(__init_begin);
        phys_addr_t start, end;
        int flags = 0;
        u64 i;
 
-       if (rodata_full || debug_pagealloc_enabled())
+       if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
                flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
        /*
@@ -487,11 +502,6 @@ static void __init map_mem(pgd_t *pgdp)
         * the following for-loop
         */
        memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
-#ifdef CONFIG_KEXEC_CORE
-       if (crashk_res.end)
-               memblock_mark_nomap(crashk_res.start,
-                                   resource_size(&crashk_res));
-#endif
 
        /* map all the memory banks */
        for_each_mem_range(i, &start, &end) {
@@ -506,7 +516,7 @@ static void __init map_mem(pgd_t *pgdp)
        }
 
        /*
-        * Map the linear alias of the [_text, __init_begin) interval
+        * Map the linear alias of the [_stext, __init_begin) interval
         * as non-executable now, and remove the write permission in
         * mark_linear_text_alias_ro() below (which will be called after
         * alternative patching has completed). This makes the contents
@@ -518,21 +528,6 @@ static void __init map_mem(pgd_t *pgdp)
        __map_memblock(pgdp, kernel_start, kernel_end,
                       PAGE_KERNEL, NO_CONT_MAPPINGS);
        memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
-
-#ifdef CONFIG_KEXEC_CORE
-       /*
-        * Use page-level mappings here so that we can shrink the region
-        * in page granularity and put back unused memory to buddy system
-        * through /sys/kernel/kexec_crash_size interface.
-        */
-       if (crashk_res.end) {
-               __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
-                              PAGE_KERNEL,
-                              NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
-               memblock_clear_nomap(crashk_res.start,
-                                    resource_size(&crashk_res));
-       }
-#endif
 }
 
 void mark_rodata_ro(void)
@@ -665,7 +660,7 @@ static void __init map_kernel(pgd_t *pgdp)
         * Only rodata will be remapped with different permissions later on,
         * all other segments are allowed to use contiguous mappings.
         */
-       map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
+       map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0,
                           VM_NO_GUARD);
        map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
                           &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
@@ -1132,8 +1127,11 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
                        void *p = NULL;
 
                        p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
-                       if (!p)
-                               return -ENOMEM;
+                       if (!p) {
+                               if (vmemmap_populate_basepages(addr, next, node, altmap))
+                                       return -ENOMEM;
+                               continue;
+                       }
 
                        pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
                } else
@@ -1510,13 +1508,43 @@ static int prevent_bootmem_remove_notifier(struct notifier_block *nb,
        unsigned long end_pfn = arg->start_pfn + arg->nr_pages;
        unsigned long pfn = arg->start_pfn;
 
-       if (action != MEM_GOING_OFFLINE)
+       if ((action != MEM_GOING_OFFLINE) && (action != MEM_OFFLINE))
                return NOTIFY_OK;
 
        for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+               unsigned long start = PFN_PHYS(pfn);
+               unsigned long end = start + (1UL << PA_SECTION_SHIFT);
+
                ms = __pfn_to_section(pfn);
-               if (early_section(ms))
+               if (!early_section(ms))
+                       continue;
+
+               if (action == MEM_GOING_OFFLINE) {
+                       /*
+                        * Boot memory removal is not supported. Prevent
+                        * it via blocking any attempted offline request
+                        * for the boot memory and just report it.
+                        */
+                       pr_warn("Boot memory [%lx %lx] offlining attempted\n", start, end);
                        return NOTIFY_BAD;
+               } else if (action == MEM_OFFLINE) {
+                       /*
+                        * This should have never happened. Boot memory
+                        * offlining should have been prevented by this
+                        * very notifier. Probably some memory removal
+                        * procedure might have changed which would then
+                        * require further debug.
+                        */
+                       pr_err("Boot memory [%lx %lx] offlined\n", start, end);
+
+                       /*
+                        * Core memory hotplug does not process a return
+                        * code from the notifier for MEM_OFFLINE events.
+                        * The error condition has been reported. Return
+                        * from here as if ignored.
+                        */
+                       return NOTIFY_DONE;
+               }
        }
        return NOTIFY_OK;
 }
@@ -1525,9 +1553,66 @@ static struct notifier_block prevent_bootmem_remove_nb = {
        .notifier_call = prevent_bootmem_remove_notifier,
 };
 
+/*
+ * This ensures that boot memory sections on the platform are online
+ * from early boot. Memory sections could not be prevented from being
+ * offlined, unless for some reason they are not online to begin with.
+ * This helps validate the basic assumption on which the above memory
+ * event notifier works to prevent boot memory section offlining and
+ * its possible removal.
+ */
+static void validate_bootmem_online(void)
+{
+       phys_addr_t start, end, addr;
+       struct mem_section *ms;
+       u64 i;
+
+       /*
+        * Scanning across all memblock might be expensive
+        * on some big memory systems. Hence enable this
+        * validation only with DEBUG_VM.
+        */
+       if (!IS_ENABLED(CONFIG_DEBUG_VM))
+               return;
+
+       for_each_mem_range(i, &start, &end) {
+               for (addr = start; addr < end; addr += (1UL << PA_SECTION_SHIFT)) {
+                       ms = __pfn_to_section(PHYS_PFN(addr));
+
+                       /*
+                        * All memory ranges in the system at this point
+                        * should have been marked as early sections.
+                        */
+                       WARN_ON(!early_section(ms));
+
+                       /*
+                        * Memory notifier mechanism here to prevent boot
+                        * memory offlining depends on the fact that each
+                        * early section memory on the system is initially
+                        * online. Otherwise a given memory section which
+                        * is already offline will be overlooked and can
+                        * be removed completely. Call out such sections.
+                        */
+                       if (!online_section(ms))
+                               pr_err("Boot memory [%llx %llx] is offline, can be removed\n",
+                                       addr, addr + (1UL << PA_SECTION_SHIFT));
+               }
+       }
+}
+
 static int __init prevent_bootmem_remove_init(void)
 {
-       return register_memory_notifier(&prevent_bootmem_remove_nb);
+       int ret = 0;
+
+       if (!IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
+               return ret;
+
+       validate_bootmem_online();
+       ret = register_memory_notifier(&prevent_bootmem_remove_nb);
+       if (ret)
+               pr_err("%s: Notifier registration failed %d\n", __func__, ret);
+
+       return ret;
 }
-device_initcall(prevent_bootmem_remove_init);
+early_initcall(prevent_bootmem_remove_init);
 #endif
index 23c326a..a0831bf 100644 (file)
@@ -40,7 +40,7 @@
 #define TCR_CACHE_FLAGS        TCR_IRGN_WBWA | TCR_ORGN_WBWA
 
 #ifdef CONFIG_KASAN_SW_TAGS
-#define TCR_KASAN_FLAGS TCR_TBI1
+#define TCR_KASAN_FLAGS TCR_TBI1 | TCR_TBID1
 #else
 #define TCR_KASAN_FLAGS 0
 #endif
@@ -168,7 +168,7 @@ SYM_FUNC_END(cpu_do_resume)
        .pushsection ".idmap.text", "awx"
 
 .macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2
-       adrp    \tmp1, empty_zero_page
+       adrp    \tmp1, reserved_pg_dir
        phys_to_ttbr \tmp2, \tmp1
        offset_ttbr1 \tmp2, \tmp1
        msr     ttbr1_el1, \tmp2
@@ -489,6 +489,6 @@ SYM_FUNC_START(__cpu_setup)
        /*
         * Prepare SCTLR
         */
-       mov_q   x0, SCTLR_EL1_SET
+       mov_q   x0, INIT_SCTLR_EL1_MMU_ON
        ret                                     // return to head.S
 SYM_FUNC_END(__cpu_setup)
index e155210..2cd0dce 100644 (file)
@@ -57,30 +57,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001
-#define SA_NOCLDWAIT   0x00000002 /* not supported yet */
-#define SA_SIGINFO     0x00000004
-#define SA_ONSTACK     0x08000000
-#define SA_RESTART     0x10000000
-#define SA_NODEFER     0x40000000
-#define SA_RESETHAND   0x80000000
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
 #define SA_RESTORER    0x04000000
 
 #define MINSIGSTKSZ    2048
index aa98ff1..38166a8 100644 (file)
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001
-#define SA_NOCLDWAIT   0x00000002
-#define SA_SIGINFO     0x00000004
-#define SA_ONSTACK     0x08000000
-#define SA_RESTART     0x10000000
-#define SA_NODEFER     0x40000000
-#define SA_RESETHAND   0x80000000
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
 #define SA_RESTORER    0x04000000
 
 /*
index 915cc75..4619291 100644 (file)
@@ -57,30 +57,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001
-#define SA_NOCLDWAIT   0x00000002
-#define SA_SIGINFO     0x00000004
-#define SA_ONSTACK     0x08000000
-#define SA_RESTART     0x10000000
-#define SA_NODEFER     0x40000000
-#define SA_RESETHAND   0x80000000
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
 #define MINSIGSTKSZ    2048
 #define SIGSTKSZ       8192
 
index 53104b1..e6c78a1 100644 (file)
@@ -62,18 +62,6 @@ typedef unsigned long old_sigset_t;          /* at least 32 bits */
 #define SIGRTMAX       _NSIG
 
 /*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- *
  * SA_RESTORER used to be defined as 0x04000000 but only the O32 ABI ever
  * supported its use and no libc was using it, so the entire sa-restorer
  * functionality was removed with lmo commit 39bffc12c3580ab for 2.5.48
index 715c96b..30dd1e4 100644 (file)
@@ -21,6 +21,8 @@ typedef struct {
        unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
+#define __ARCH_UAPI_SA_FLAGS   _SA_SIGGFAULT
+
 #include <asm/sigcontext.h>
 
 #endif /* !__ASSEMBLY */
index e605197..e5a2657 100644 (file)
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
 #define SA_ONSTACK     0x00000001
 #define SA_RESETHAND   0x00000004
 #define SA_NOCLDSTOP   0x00000008
 #define MINSIGSTKSZ    2048
 #define SIGSTKSZ       8192
 
-
-#define SIG_BLOCK          0   /* for blocking signals */
-#define SIG_UNBLOCK        1   /* for unblocking signals */
-#define SIG_SETMASK        2   /* for setting the signal mask */
-
-#define SIG_DFL        ((__sighandler_t)0)     /* default signal handling */
-#define SIG_IGN        ((__sighandler_t)1)     /* ignore signal */
-#define SIG_ERR        ((__sighandler_t)-1)    /* error return from signal */
+#include <asm-generic/signal-defs.h>
 
 # ifndef __ASSEMBLY__
 
 /* Avoid too many header ordering problems.  */
 struct siginfo;
 
-/* Type of a signal handler.  */
-#if defined(__LP64__)
-/* function pointers on 64-bit parisc are pointers to little structs and the
- * compiler doesn't support code which changes or tests the address of
- * the function in the little struct.  This is really ugly -PB
- */
-typedef char __user *__sighandler_t;
-#else
-typedef void __signalfn_t(int);
-typedef __signalfn_t __user *__sighandler_t;
-#endif
-
 typedef struct sigaltstack {
        void __user *ss_sp;
        int ss_flags;
index 85b0a7a..04873dd 100644 (file)
@@ -60,30 +60,6 @@ typedef struct {
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK is not currently supported, but will allow sigaltstack(2).
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001U
-#define SA_NOCLDWAIT   0x00000002U
-#define SA_SIGINFO     0x00000004U
-#define SA_ONSTACK     0x08000000U
-#define SA_RESTART     0x10000000U
-#define SA_NODEFER     0x40000000U
-#define SA_RESETHAND   0x80000000U
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
 #define SA_RESTORER    0x04000000U
 
 #define MINSIGSTKSZ    2048
index 9a14a61..0189f32 100644 (file)
@@ -65,30 +65,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN        32
 #define SIGRTMAX        _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP    0x00000001
-#define SA_NOCLDWAIT    0x00000002
-#define SA_SIGINFO      0x00000004
-#define SA_ONSTACK      0x08000000
-#define SA_RESTART      0x10000000
-#define SA_NODEFER      0x40000000
-#define SA_RESETHAND    0x80000000
-
-#define SA_NOMASK       SA_NODEFER
-#define SA_ONESHOT      SA_RESETHAND
-
 #define SA_RESTORER     0x04000000
 
 #define MINSIGSTKSZ     2048
index ff95059..53758d5 100644 (file)
@@ -137,13 +137,11 @@ struct sigstack {
 #define SA_STACK       _SV_SSTACK
 #define SA_ONSTACK     _SV_SSTACK
 #define SA_RESTART     _SV_INTR
-#define SA_ONESHOT     _SV_RESET
+#define SA_RESETHAND   _SV_RESET
 #define SA_NODEFER     0x20u
 #define SA_NOCLDWAIT    0x100u
 #define SA_SIGINFO      0x200u
 
-#define SA_NOMASK      SA_NODEFER
-
 #define SIG_BLOCK          0x01        /* for blocking signals */
 #define SIG_UNBLOCK        0x02        /* for unblocking signals */
 #define SIG_SETMASK        0x04        /* for setting the signal mask */
index e5745d5..164a22a 100644 (file)
@@ -62,30 +62,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001u
-#define SA_NOCLDWAIT   0x00000002u
-#define SA_SIGINFO     0x00000004u
-#define SA_ONSTACK     0x08000000u
-#define SA_RESTART     0x10000000u
-#define SA_NODEFER     0x40000000u
-#define SA_RESETHAND   0x80000000u
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
 #define SA_RESTORER    0x04000000
 
 #define MINSIGSTKSZ    2048
index a7f3e12..ddfd919 100644 (file)
@@ -165,16 +165,9 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
 {
        signal_compat_build_tests();
 
-       /* Don't leak in-kernel non-uapi flags to user-space */
-       if (oact)
-               oact->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
-
        if (!act)
                return;
 
-       /* Don't let flags to be set from userspace */
-       act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
-
        if (in_ia32_syscall())
                act->sa.sa_flags |= SA_IA32_ABI;
        if (in_x32_syscall())
index 005dec5..79ddaba 100644 (file)
@@ -72,30 +72,6 @@ typedef struct {
 #define SIGRTMIN       32
 #define SIGRTMAX       (_NSIG-1)
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001
-#define SA_NOCLDWAIT   0x00000002 /* not supported yet */
-#define SA_SIGINFO     0x00000004
-#define SA_ONSTACK     0x08000000
-#define SA_RESTART     0x10000000
-#define SA_NODEFER     0x40000000
-#define SA_RESETHAND   0x80000000
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
 #define SA_RESTORER    0x04000000
 
 #define MINSIGSTKSZ    2048
index 770d840..d4eac6d 100644 (file)
@@ -1720,3 +1720,58 @@ void __init acpi_iort_init(void)
 
        iort_init_platform_devices();
 }
+
+#ifdef CONFIG_ZONE_DMA
+/*
+ * Extract the highest CPU physical address accessible to all DMA masters in
+ * the system. PHYS_ADDR_MAX is returned when no constrained device is found.
+ */
+phys_addr_t __init acpi_iort_dma_get_max_cpu_address(void)
+{
+       phys_addr_t limit = PHYS_ADDR_MAX;
+       struct acpi_iort_node *node, *end;
+       struct acpi_table_iort *iort;
+       acpi_status status;
+       int i;
+
+       if (acpi_disabled)
+               return limit;
+
+       status = acpi_get_table(ACPI_SIG_IORT, 0,
+                               (struct acpi_table_header **)&iort);
+       if (ACPI_FAILURE(status))
+               return limit;
+
+       node = ACPI_ADD_PTR(struct acpi_iort_node, iort, iort->node_offset);
+       end = ACPI_ADD_PTR(struct acpi_iort_node, iort, iort->header.length);
+
+       for (i = 0; i < iort->node_count; i++) {
+               if (node >= end)
+                       break;
+
+               switch (node->type) {
+                       struct acpi_iort_named_component *ncomp;
+                       struct acpi_iort_root_complex *rc;
+                       phys_addr_t local_limit;
+
+               case ACPI_IORT_NODE_NAMED_COMPONENT:
+                       ncomp = (struct acpi_iort_named_component *)node->node_data;
+                       local_limit = DMA_BIT_MASK(ncomp->memory_address_limit);
+                       limit = min_not_zero(limit, local_limit);
+                       break;
+
+               case ACPI_IORT_NODE_PCI_ROOT_COMPLEX:
+                       if (node->revision < 1)
+                               break;
+
+                       rc = (struct acpi_iort_root_complex *)node->node_data;
+                       local_limit = DMA_BIT_MASK(rc->memory_address_limit);
+                       limit = min_not_zero(limit, local_limit);
+                       break;
+               }
+               node = ACPI_ADD_PTR(struct acpi_iort_node, node, node->length);
+       }
+       acpi_put_table(&iort->header);
+       return limit;
+}
+#endif
index 840754d..a7e762c 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/spinlock.h>
-#include <linux/uaccess.h>
 
 /*
  * The call to use to reach the firmware.
@@ -1092,26 +1091,13 @@ int sdei_event_handler(struct pt_regs *regs,
                       struct sdei_registered_event *arg)
 {
        int err;
-       mm_segment_t orig_addr_limit;
        u32 event_num = arg->event_num;
 
-       /*
-        * Save restore 'fs'.
-        * The architecture's entry code save/restores 'fs' when taking an
-        * exception from the kernel. This ensures addr_limit isn't inherited
-        * if you interrupted something that allowed the uaccess routines to
-        * access kernel memory.
-        * Do the same here because this doesn't come via the same entry code.
-       */
-       orig_addr_limit = force_uaccess_begin();
-
        err = arg->callback(event_num, regs, arg->callback_arg);
        if (err)
                pr_err_ratelimited("event %u on CPU %u failed with error: %d\n",
                                   event_num, smp_processor_id(), err);
 
-       force_uaccess_end(orig_addr_limit);
-
        return err;
 }
 NOKPROBE_SYMBOL(sdei_event_handler);
index 1c3257a..73ddf25 100644 (file)
@@ -1024,6 +1024,48 @@ out:
 }
 #endif /* CONFIG_HAS_DMA */
 
+/**
+ * of_dma_get_max_cpu_address - Gets highest CPU address suitable for DMA
+ * @np: The node to start searching from or NULL to start from the root
+ *
+ * Gets the highest CPU physical address that is addressable by all DMA masters
+ * in the sub-tree pointed by np, or the whole tree if NULL is passed. If no
+ * DMA constrained device is found, it returns PHYS_ADDR_MAX.
+ */
+phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np)
+{
+       phys_addr_t max_cpu_addr = PHYS_ADDR_MAX;
+       struct of_range_parser parser;
+       phys_addr_t subtree_max_addr;
+       struct device_node *child;
+       struct of_range range;
+       const __be32 *ranges;
+       u64 cpu_end = 0;
+       int len;
+
+       if (!np)
+               np = of_root;
+
+       ranges = of_get_property(np, "dma-ranges", &len);
+       if (ranges && len) {
+               of_dma_range_parser_init(&parser, np);
+               for_each_of_range(&parser, &range)
+                       if (range.cpu_addr + range.size > cpu_end)
+                               cpu_end = range.cpu_addr + range.size - 1;
+
+               if (max_cpu_addr > cpu_end)
+                       max_cpu_addr = cpu_end;
+       }
+
+       for_each_available_child_of_node(np, child) {
+               subtree_max_addr = of_dma_get_max_cpu_address(child);
+               if (max_cpu_addr > subtree_max_addr)
+                       max_cpu_addr = subtree_max_addr;
+       }
+
+       return max_cpu_addr;
+}
+
 /**
  * of_dma_is_coherent - Check if device is coherent
  * @np:        device node
index 06cc988..eb51bc1 100644 (file)
@@ -869,6 +869,26 @@ static void __init of_unittest_changeset(void)
 #endif
 }
 
+static void __init of_unittest_dma_get_max_cpu_address(void)
+{
+       struct device_node *np;
+       phys_addr_t cpu_addr;
+
+       if (!IS_ENABLED(CONFIG_OF_ADDRESS))
+               return;
+
+       np = of_find_node_by_path("/testcase-data/address-tests");
+       if (!np) {
+               pr_err("missing testcase data\n");
+               return;
+       }
+
+       cpu_addr = of_dma_get_max_cpu_address(np);
+       unittest(cpu_addr == 0x4fffffff,
+                "of_dma_get_max_cpu_address: wrong CPU addr %pad (expecting %x)\n",
+                &cpu_addr, 0x4fffffff);
+}
+
 static void __init of_unittest_dma_ranges_one(const char *path,
                u64 expect_dma_addr, u64 expect_paddr)
 {
@@ -3266,6 +3286,7 @@ static int __init of_unittest(void)
        of_unittest_changeset();
        of_unittest_parse_interrupts();
        of_unittest_parse_interrupts_extended();
+       of_unittest_dma_get_max_cpu_address();
        of_unittest_parse_dma_ranges();
        of_unittest_pci_dma_ranges();
        of_unittest_match_node();
index 130327f..3075cf1 100644 (file)
@@ -130,6 +130,13 @@ config ARM_SPE_PMU
          Extension, which provides periodic sampling of operations in
          the CPU pipeline and reports this via the perf AUX interface.
 
+config ARM_DMC620_PMU
+       tristate "Enable PMU support for the ARM DMC-620 memory controller"
+       depends on (ARM64 && ACPI) || COMPILE_TEST
+       help
+         Support for PMU events monitoring on the ARM DMC-620 memory
+         controller.
+
 source "drivers/perf/hisilicon/Kconfig"
 
 endmenu
index 5365fd5..5260b11 100644 (file)
@@ -13,3 +13,4 @@ obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
 obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
+obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
new file mode 100644 (file)
index 0000000..004930e
--- /dev/null
@@ -0,0 +1,748 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ARM DMC-620 memory controller PMU driver
+ *
+ * Copyright (C) 2020 Ampere Computing LLC.
+ */
+
+#define DMC620_PMUNAME         "arm_dmc620"
+#define DMC620_DRVNAME         DMC620_PMUNAME "_pmu"
+#define pr_fmt(fmt)            DMC620_DRVNAME ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/refcount.h>
+
+#define DMC620_PA_SHIFT                                        12
+#define DMC620_CNT_INIT                                        0x80000000
+#define DMC620_CNT_MAX_PERIOD                          0xffffffff
+#define DMC620_PMU_CLKDIV2_MAX_COUNTERS                        8
+#define DMC620_PMU_CLK_MAX_COUNTERS                    2
+#define DMC620_PMU_MAX_COUNTERS                                \
+       (DMC620_PMU_CLKDIV2_MAX_COUNTERS + DMC620_PMU_CLK_MAX_COUNTERS)
+
+/*
+ * The PMU registers start at 0xA00 in the DMC-620 memory map, and these
+ * offsets are relative to that base.
+ *
+ * Each counter has a group of control/value registers, and the
+ * DMC620_PMU_COUNTERn offsets are within a counter group.
+ *
+ * The counter registers groups start at 0xA10.
+ */
+#define DMC620_PMU_OVERFLOW_STATUS_CLKDIV2             0x8
+#define  DMC620_PMU_OVERFLOW_STATUS_CLKDIV2_MASK       \
+               (DMC620_PMU_CLKDIV2_MAX_COUNTERS - 1)
+#define DMC620_PMU_OVERFLOW_STATUS_CLK                 0xC
+#define  DMC620_PMU_OVERFLOW_STATUS_CLK_MASK           \
+               (DMC620_PMU_CLK_MAX_COUNTERS - 1)
+#define DMC620_PMU_COUNTERS_BASE                       0x10
+#define DMC620_PMU_COUNTERn_MASK_31_00                 0x0
+#define DMC620_PMU_COUNTERn_MASK_63_32                 0x4
+#define DMC620_PMU_COUNTERn_MATCH_31_00                        0x8
+#define DMC620_PMU_COUNTERn_MATCH_63_32                        0xC
+#define DMC620_PMU_COUNTERn_CONTROL                    0x10
+#define  DMC620_PMU_COUNTERn_CONTROL_ENABLE            BIT(0)
+#define  DMC620_PMU_COUNTERn_CONTROL_INVERT            BIT(1)
+#define  DMC620_PMU_COUNTERn_CONTROL_EVENT_MUX         GENMASK(6, 2)
+#define  DMC620_PMU_COUNTERn_CONTROL_INCR_MUX          GENMASK(8, 7)
+#define DMC620_PMU_COUNTERn_VALUE                      0x20
+/* Offset of the registers for a given counter, relative to 0xA00 */
+#define DMC620_PMU_COUNTERn_OFFSET(n) \
+       (DMC620_PMU_COUNTERS_BASE + 0x28 * (n))
+
+static LIST_HEAD(dmc620_pmu_irqs);
+static DEFINE_MUTEX(dmc620_pmu_irqs_lock);
+
+struct dmc620_pmu_irq {
+       struct hlist_node node;
+       struct list_head pmus_node;
+       struct list_head irqs_node;
+       refcount_t refcount;
+       unsigned int irq_num;
+       unsigned int cpu;
+};
+
+struct dmc620_pmu {
+       struct pmu pmu;
+
+       void __iomem *base;
+       struct dmc620_pmu_irq *irq;
+       struct list_head pmus_node;
+
+       /*
+        * We put all clkdiv2 and clk counters to a same array.
+        * The first DMC620_PMU_CLKDIV2_MAX_COUNTERS bits belong to
+        * clkdiv2 counters, the last DMC620_PMU_CLK_MAX_COUNTERS
+        * belong to clk counters.
+        */
+       DECLARE_BITMAP(used_mask, DMC620_PMU_MAX_COUNTERS);
+       struct perf_event *events[DMC620_PMU_MAX_COUNTERS];
+};
+
+#define to_dmc620_pmu(p) (container_of(p, struct dmc620_pmu, pmu))
+
+static int cpuhp_state_num;
+
+struct dmc620_pmu_event_attr {
+       struct device_attribute attr;
+       u8 clkdiv2;
+       u8 eventid;
+};
+
+static ssize_t
+dmc620_pmu_event_show(struct device *dev,
+                          struct device_attribute *attr, char *page)
+{
+       struct dmc620_pmu_event_attr *eattr;
+
+       eattr = container_of(attr, typeof(*eattr), attr);
+
+       return sprintf(page, "event=0x%x,clkdiv2=0x%x\n", eattr->eventid, eattr->clkdiv2);
+}
+
+#define DMC620_PMU_EVENT_ATTR(_name, _eventid, _clkdiv2)               \
+       (&((struct dmc620_pmu_event_attr[]) {{                          \
+               .attr = __ATTR(_name, 0444, dmc620_pmu_event_show, NULL),       \
+               .clkdiv2 = _clkdiv2,                                            \
+               .eventid = _eventid,                                    \
+       }})[0].attr.attr)
+
+static struct attribute *dmc620_pmu_events_attrs[] = {
+       /* clkdiv2 events list */
+       DMC620_PMU_EVENT_ATTR(clkdiv2_cycle_count, 0x0, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_allocate, 0x1, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_queue_depth, 0x2, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_waiting_for_wr_data, 0x3, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_read_backlog, 0x4, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_waiting_for_mi, 0x5, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_hazard_resolution, 0x6, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_enqueue, 0x7, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_arbitrate, 0x8, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_lrank_turnaround_activate, 0x9, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_prank_turnaround_activate, 0xa, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_read_depth, 0xb, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_write_depth, 0xc, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_highigh_qos_depth, 0xd, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_high_qos_depth, 0xe, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_medium_qos_depth, 0xf, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_low_qos_depth, 0x10, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_activate, 0x11, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_rdwr, 0x12, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_refresh, 0x13, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_training_request, 0x14, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_t_mac_tracker, 0x15, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_bk_fsm_tracker, 0x16, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_bk_open_tracker, 0x17, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_ranks_in_pwr_down, 0x18, 1),
+       DMC620_PMU_EVENT_ATTR(clkdiv2_ranks_in_sref, 0x19, 1),
+
+       /* clk events list */
+       DMC620_PMU_EVENT_ATTR(clk_cycle_count, 0x0, 0),
+       DMC620_PMU_EVENT_ATTR(clk_request, 0x1, 0),
+       DMC620_PMU_EVENT_ATTR(clk_upload_stall, 0x2, 0),
+       NULL,
+};
+
+static struct attribute_group dmc620_pmu_events_attr_group = {
+       .name = "events",
+       .attrs = dmc620_pmu_events_attrs,
+};
+
+/* User ABI */
+#define ATTR_CFG_FLD_mask_CFG          config
+#define ATTR_CFG_FLD_mask_LO           0
+#define ATTR_CFG_FLD_mask_HI           44
+#define ATTR_CFG_FLD_match_CFG         config1
+#define ATTR_CFG_FLD_match_LO          0
+#define ATTR_CFG_FLD_match_HI          44
+#define ATTR_CFG_FLD_invert_CFG                config2
+#define ATTR_CFG_FLD_invert_LO         0
+#define ATTR_CFG_FLD_invert_HI         0
+#define ATTR_CFG_FLD_incr_CFG          config2
+#define ATTR_CFG_FLD_incr_LO           1
+#define ATTR_CFG_FLD_incr_HI           2
+#define ATTR_CFG_FLD_event_CFG         config2
+#define ATTR_CFG_FLD_event_LO          3
+#define ATTR_CFG_FLD_event_HI          8
+#define ATTR_CFG_FLD_clkdiv2_CFG       config2
+#define ATTR_CFG_FLD_clkdiv2_LO                9
+#define ATTR_CFG_FLD_clkdiv2_HI                9
+
+#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi)                     \
+       (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi
+
+#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi)                      \
+       __GEN_PMU_FORMAT_ATTR(cfg, lo, hi)
+
+#define GEN_PMU_FORMAT_ATTR(name)                              \
+       PMU_FORMAT_ATTR(name,                                   \
+       _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG,         \
+                            ATTR_CFG_FLD_##name##_LO,          \
+                            ATTR_CFG_FLD_##name##_HI))
+
+#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi)                   \
+       ((((attr)->cfg) >> lo) & GENMASK_ULL(hi - lo, 0))
+
+#define ATTR_CFG_GET_FLD(attr, name)                           \
+       _ATTR_CFG_GET_FLD(attr,                                 \
+                         ATTR_CFG_FLD_##name##_CFG,            \
+                         ATTR_CFG_FLD_##name##_LO,             \
+                         ATTR_CFG_FLD_##name##_HI)
+
+GEN_PMU_FORMAT_ATTR(mask);
+GEN_PMU_FORMAT_ATTR(match);
+GEN_PMU_FORMAT_ATTR(invert);
+GEN_PMU_FORMAT_ATTR(incr);
+GEN_PMU_FORMAT_ATTR(event);
+GEN_PMU_FORMAT_ATTR(clkdiv2);
+
+static struct attribute *dmc620_pmu_formats_attrs[] = {
+       &format_attr_mask.attr,
+       &format_attr_match.attr,
+       &format_attr_invert.attr,
+       &format_attr_incr.attr,
+       &format_attr_event.attr,
+       &format_attr_clkdiv2.attr,
+       NULL,
+};
+
+static struct attribute_group dmc620_pmu_format_attr_group = {
+       .name   = "format",
+       .attrs  = dmc620_pmu_formats_attrs,
+};
+
+static const struct attribute_group *dmc620_pmu_attr_groups[] = {
+       &dmc620_pmu_events_attr_group,
+       &dmc620_pmu_format_attr_group,
+       NULL,
+};
+
+static inline
+u32 dmc620_pmu_creg_read(struct dmc620_pmu *dmc620_pmu,
+                       unsigned int idx, unsigned int reg)
+{
+       return readl(dmc620_pmu->base + DMC620_PMU_COUNTERn_OFFSET(idx) + reg);
+}
+
+static inline
+void dmc620_pmu_creg_write(struct dmc620_pmu *dmc620_pmu,
+                       unsigned int idx, unsigned int reg, u32 val)
+{
+       writel(val, dmc620_pmu->base + DMC620_PMU_COUNTERn_OFFSET(idx) + reg);
+}
+
+static
+unsigned int dmc620_event_to_counter_control(struct perf_event *event)
+{
+       struct perf_event_attr *attr = &event->attr;
+       unsigned int reg = 0;
+
+       reg |= FIELD_PREP(DMC620_PMU_COUNTERn_CONTROL_INVERT,
+                       ATTR_CFG_GET_FLD(attr, invert));
+       reg |= FIELD_PREP(DMC620_PMU_COUNTERn_CONTROL_EVENT_MUX,
+                       ATTR_CFG_GET_FLD(attr, event));
+       reg |= FIELD_PREP(DMC620_PMU_COUNTERn_CONTROL_INCR_MUX,
+                       ATTR_CFG_GET_FLD(attr, incr));
+
+       return reg;
+}
+
+static int dmc620_get_event_idx(struct perf_event *event)
+{
+       struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+       int idx, start_idx, end_idx;
+
+       if (ATTR_CFG_GET_FLD(&event->attr, clkdiv2)) {
+               start_idx = 0;
+               end_idx = DMC620_PMU_CLKDIV2_MAX_COUNTERS;
+       } else {
+               start_idx = DMC620_PMU_CLKDIV2_MAX_COUNTERS;
+               end_idx = DMC620_PMU_MAX_COUNTERS;
+       }
+
+       for (idx = start_idx; idx < end_idx; ++idx) {
+               if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
+                       return idx;
+       }
+
+       /* The counters are all in use. */
+       return -EAGAIN;
+}
+
+static inline
+u64 dmc620_pmu_read_counter(struct perf_event *event)
+{
+       struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+
+       return dmc620_pmu_creg_read(dmc620_pmu,
+                                   event->hw.idx, DMC620_PMU_COUNTERn_VALUE);
+}
+
+static void dmc620_pmu_event_update(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 delta, prev_count, new_count;
+
+       do {
+               /* We may also be called from the irq handler */
+               prev_count = local64_read(&hwc->prev_count);
+               new_count = dmc620_pmu_read_counter(event);
+       } while (local64_cmpxchg(&hwc->prev_count,
+                       prev_count, new_count) != prev_count);
+       delta = (new_count - prev_count) & DMC620_CNT_MAX_PERIOD;
+       local64_add(delta, &event->count);
+}
+
+static void dmc620_pmu_event_set_period(struct perf_event *event)
+{
+       struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+
+       local64_set(&event->hw.prev_count, DMC620_CNT_INIT);
+       dmc620_pmu_creg_write(dmc620_pmu,
+                             event->hw.idx, DMC620_PMU_COUNTERn_VALUE, DMC620_CNT_INIT);
+}
+
+static void dmc620_pmu_enable_counter(struct perf_event *event)
+{
+       struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+       u32 reg;
+
+       reg = dmc620_event_to_counter_control(event) | DMC620_PMU_COUNTERn_CONTROL_ENABLE;
+       dmc620_pmu_creg_write(dmc620_pmu,
+                             event->hw.idx, DMC620_PMU_COUNTERn_CONTROL, reg);
+}
+
+static void dmc620_pmu_disable_counter(struct perf_event *event)
+{
+       struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+
+       dmc620_pmu_creg_write(dmc620_pmu,
+                             event->hw.idx, DMC620_PMU_COUNTERn_CONTROL, 0);
+}
+
+static irqreturn_t dmc620_pmu_handle_irq(int irq_num, void *data)
+{
+       struct dmc620_pmu_irq *irq = data;
+       struct dmc620_pmu *dmc620_pmu;
+       irqreturn_t ret = IRQ_NONE;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(dmc620_pmu, &irq->pmus_node, pmus_node) {
+               unsigned long status;
+               struct perf_event *event;
+               unsigned int idx;
+
+               /*
+                * HW doesn't provide a control to atomically disable all counters.
+                * To prevent race condition (overflow happens while clearing status register),
+                * disable all events before continuing
+                */
+               for (idx = 0; idx < DMC620_PMU_MAX_COUNTERS; idx++) {
+                       event = dmc620_pmu->events[idx];
+                       if (!event)
+                               continue;
+                       dmc620_pmu_disable_counter(event);
+               }
+
+               status = readl(dmc620_pmu->base + DMC620_PMU_OVERFLOW_STATUS_CLKDIV2);
+               status |= (readl(dmc620_pmu->base + DMC620_PMU_OVERFLOW_STATUS_CLK) <<
+                               DMC620_PMU_CLKDIV2_MAX_COUNTERS);
+               if (status) {
+                       for_each_set_bit(idx, &status,
+                                       DMC620_PMU_MAX_COUNTERS) {
+                               event = dmc620_pmu->events[idx];
+                               if (WARN_ON_ONCE(!event))
+                                       continue;
+                               dmc620_pmu_event_update(event);
+                               dmc620_pmu_event_set_period(event);
+                       }
+
+                       if (status & DMC620_PMU_OVERFLOW_STATUS_CLKDIV2_MASK)
+                               writel(0, dmc620_pmu->base + DMC620_PMU_OVERFLOW_STATUS_CLKDIV2);
+
+                       if ((status >> DMC620_PMU_CLKDIV2_MAX_COUNTERS) &
+                               DMC620_PMU_OVERFLOW_STATUS_CLK_MASK)
+                               writel(0, dmc620_pmu->base + DMC620_PMU_OVERFLOW_STATUS_CLK);
+               }
+
+               for (idx = 0; idx < DMC620_PMU_MAX_COUNTERS; idx++) {
+                       event = dmc620_pmu->events[idx];
+                       if (!event)
+                               continue;
+                       if (!(event->hw.state & PERF_HES_STOPPED))
+                               dmc620_pmu_enable_counter(event);
+               }
+
+               ret = IRQ_HANDLED;
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static struct dmc620_pmu_irq *__dmc620_pmu_get_irq(int irq_num)
+{
+       struct dmc620_pmu_irq *irq;
+       int ret;
+
+       list_for_each_entry(irq, &dmc620_pmu_irqs, irqs_node)
+               if (irq->irq_num == irq_num && refcount_inc_not_zero(&irq->refcount))
+                       return irq;
+
+       irq = kzalloc(sizeof(*irq), GFP_KERNEL);
+       if (!irq)
+               return ERR_PTR(-ENOMEM);
+
+       INIT_LIST_HEAD(&irq->pmus_node);
+
+       /* Pick one CPU to be the preferred one to use */
+       irq->cpu = raw_smp_processor_id();
+       refcount_set(&irq->refcount, 1);
+
+       ret = request_irq(irq_num, dmc620_pmu_handle_irq,
+                         IRQF_NOBALANCING | IRQF_NO_THREAD,
+                         "dmc620-pmu", irq);
+       if (ret)
+               goto out_free_aff;
+
+       ret = irq_set_affinity_hint(irq_num, cpumask_of(irq->cpu));
+       if (ret)
+               goto out_free_irq;
+
+       ret = cpuhp_state_add_instance_nocalls(cpuhp_state_num, &irq->node);
+       if (ret)
+               goto out_free_irq;
+
+       irq->irq_num = irq_num;
+       list_add(&irq->irqs_node, &dmc620_pmu_irqs);
+
+       return irq;
+
+out_free_irq:
+       free_irq(irq_num, irq);
+out_free_aff:
+       kfree(irq);
+       return ERR_PTR(ret);
+}
+
+static int dmc620_pmu_get_irq(struct dmc620_pmu *dmc620_pmu, int irq_num)
+{
+       struct dmc620_pmu_irq *irq;
+
+       mutex_lock(&dmc620_pmu_irqs_lock);
+       irq = __dmc620_pmu_get_irq(irq_num);
+       mutex_unlock(&dmc620_pmu_irqs_lock);
+
+       if (IS_ERR(irq))
+               return PTR_ERR(irq);
+
+       dmc620_pmu->irq = irq;
+       mutex_lock(&dmc620_pmu_irqs_lock);
+       list_add_rcu(&dmc620_pmu->pmus_node, &irq->pmus_node);
+       mutex_unlock(&dmc620_pmu_irqs_lock);
+
+       return 0;
+}
+
+static void dmc620_pmu_put_irq(struct dmc620_pmu *dmc620_pmu)
+{
+       struct dmc620_pmu_irq *irq = dmc620_pmu->irq;
+
+       mutex_lock(&dmc620_pmu_irqs_lock);
+       list_del_rcu(&dmc620_pmu->pmus_node);
+
+       if (!refcount_dec_and_test(&irq->refcount)) {
+               mutex_unlock(&dmc620_pmu_irqs_lock);
+               return;
+       }
+
+       list_del(&irq->irqs_node);
+       mutex_unlock(&dmc620_pmu_irqs_lock);
+
+       WARN_ON(irq_set_affinity_hint(irq->irq_num, NULL));
+       free_irq(irq->irq_num, irq);
+       cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &irq->node);
+       kfree(irq);
+}
+
+static int dmc620_pmu_event_init(struct perf_event *event)
+{
+       struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       struct perf_event *sibling;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       /*
+        * DMC 620 PMUs are shared across all cpus and cannot
+        * support task bound and sampling events.
+        */
+       if (is_sampling_event(event) ||
+               event->attach_state & PERF_ATTACH_TASK) {
+               dev_dbg(dmc620_pmu->pmu.dev,
+                       "Can't support per-task counters\n");
+               return -EOPNOTSUPP;
+       }
+
+       /*
+        * Many perf core operations (eg. events rotation) operate on a
+        * single CPU context. This is obvious for CPU PMUs, where one
+        * expects the same sets of events being observed on all CPUs,
+        * but can lead to issues for off-core PMUs, where each
+        * event could be theoretically assigned to a different CPU. To
+        * mitigate this, we enforce CPU assignment to one, selected
+        * processor.
+        */
+       event->cpu = dmc620_pmu->irq->cpu;
+       if (event->cpu < 0)
+               return -EINVAL;
+
+       /*
+        * We can't atomically disable all HW counters so only one event allowed,
+        * although software events are acceptable.
+        */
+       if (event->group_leader != event &&
+                       !is_software_event(event->group_leader))
+               return -EINVAL;
+
+       for_each_sibling_event(sibling, event->group_leader) {
+               if (sibling != event &&
+                               !is_software_event(sibling))
+                       return -EINVAL;
+       }
+
+       hwc->idx = -1;
+       return 0;
+}
+
+static void dmc620_pmu_read(struct perf_event *event)
+{
+       dmc620_pmu_event_update(event);
+}
+
+static void dmc620_pmu_start(struct perf_event *event, int flags)
+{
+       event->hw.state = 0;
+       dmc620_pmu_event_set_period(event);
+       dmc620_pmu_enable_counter(event);
+}
+
+static void dmc620_pmu_stop(struct perf_event *event, int flags)
+{
+       if (event->hw.state & PERF_HES_STOPPED)
+               return;
+
+       dmc620_pmu_disable_counter(event);
+       dmc620_pmu_event_update(event);
+       event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int dmc620_pmu_add(struct perf_event *event, int flags)
+{
+       struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+       struct perf_event_attr *attr = &event->attr;
+       struct hw_perf_event *hwc = &event->hw;
+       int idx;
+       u64 reg;
+
+       idx = dmc620_get_event_idx(event);
+       if (idx < 0)
+               return idx;
+
+       hwc->idx = idx;
+       dmc620_pmu->events[idx] = event;
+       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+       reg = ATTR_CFG_GET_FLD(attr, mask);
+       dmc620_pmu_creg_write(dmc620_pmu,
+                             idx, DMC620_PMU_COUNTERn_MASK_31_00, lower_32_bits(reg));
+       dmc620_pmu_creg_write(dmc620_pmu,
+                             idx, DMC620_PMU_COUNTERn_MASK_63_32, upper_32_bits(reg));
+
+       reg = ATTR_CFG_GET_FLD(attr, match);
+       dmc620_pmu_creg_write(dmc620_pmu,
+                             idx, DMC620_PMU_COUNTERn_MATCH_31_00, lower_32_bits(reg));
+       dmc620_pmu_creg_write(dmc620_pmu,
+                             idx, DMC620_PMU_COUNTERn_MATCH_63_32, upper_32_bits(reg));
+
+       if (flags & PERF_EF_START)
+               dmc620_pmu_start(event, PERF_EF_RELOAD);
+
+       perf_event_update_userpage(event);
+       return 0;
+}
+
+static void dmc620_pmu_del(struct perf_event *event, int flags)
+{
+       struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       dmc620_pmu_stop(event, PERF_EF_UPDATE);
+       dmc620_pmu->events[idx] = NULL;
+       clear_bit(idx, dmc620_pmu->used_mask);
+       perf_event_update_userpage(event);
+}
+
+static int dmc620_pmu_cpu_teardown(unsigned int cpu,
+                                  struct hlist_node *node)
+{
+       struct dmc620_pmu_irq *irq;
+       struct dmc620_pmu *dmc620_pmu;
+       unsigned int target;
+
+       irq = hlist_entry_safe(node, struct dmc620_pmu_irq, node);
+       if (cpu != irq->cpu)
+               return 0;
+
+       target = cpumask_any_but(cpu_online_mask, cpu);
+       if (target >= nr_cpu_ids)
+               return 0;
+
+       /* We're only reading, but this isn't the place to be involving RCU */
+       mutex_lock(&dmc620_pmu_irqs_lock);
+       list_for_each_entry(dmc620_pmu, &irq->pmus_node, pmus_node)
+               perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target);
+       mutex_unlock(&dmc620_pmu_irqs_lock);
+
+       WARN_ON(irq_set_affinity_hint(irq->irq_num, cpumask_of(target)));
+       irq->cpu = target;
+
+       return 0;
+}
+
+static int dmc620_pmu_device_probe(struct platform_device *pdev)
+{
+       struct dmc620_pmu *dmc620_pmu;
+       struct resource *res;
+       char *name;
+       int irq_num;
+       int i, ret;
+
+       dmc620_pmu = devm_kzalloc(&pdev->dev,
+                       sizeof(struct dmc620_pmu), GFP_KERNEL);
+       if (!dmc620_pmu)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, dmc620_pmu);
+
+       dmc620_pmu->pmu = (struct pmu) {
+               .module = THIS_MODULE,
+               .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
+               .task_ctx_nr    = perf_invalid_context,
+               .event_init     = dmc620_pmu_event_init,
+               .add            = dmc620_pmu_add,
+               .del            = dmc620_pmu_del,
+               .start          = dmc620_pmu_start,
+               .stop           = dmc620_pmu_stop,
+               .read           = dmc620_pmu_read,
+               .attr_groups    = dmc620_pmu_attr_groups,
+       };
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       dmc620_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(dmc620_pmu->base))
+               return PTR_ERR(dmc620_pmu->base);
+
+       /* Make sure device is reset before enabling interrupt */
+       for (i = 0; i < DMC620_PMU_MAX_COUNTERS; i++)
+               dmc620_pmu_creg_write(dmc620_pmu, i, DMC620_PMU_COUNTERn_CONTROL, 0);
+       writel(0, dmc620_pmu->base + DMC620_PMU_OVERFLOW_STATUS_CLKDIV2);
+       writel(0, dmc620_pmu->base + DMC620_PMU_OVERFLOW_STATUS_CLK);
+
+       irq_num = platform_get_irq(pdev, 0);
+       if (irq_num < 0)
+               return irq_num;
+
+       ret = dmc620_pmu_get_irq(dmc620_pmu, irq_num);
+       if (ret)
+               return ret;
+
+       name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+                                 "%s_%llx", DMC620_PMUNAME,
+                                 (u64)(res->start >> DMC620_PA_SHIFT));
+       if (!name) {
+               dev_err(&pdev->dev,
+                         "Create name failed, PMU @%pa\n", &res->start);
+               goto out_teardown_dev;
+       }
+
+       ret = perf_pmu_register(&dmc620_pmu->pmu, name, -1);
+       if (ret)
+               goto out_teardown_dev;
+
+       return 0;
+
+out_teardown_dev:
+       dmc620_pmu_put_irq(dmc620_pmu);
+       synchronize_rcu();
+       return ret;
+}
+
+static int dmc620_pmu_device_remove(struct platform_device *pdev)
+{
+       struct dmc620_pmu *dmc620_pmu = platform_get_drvdata(pdev);
+
+       dmc620_pmu_put_irq(dmc620_pmu);
+
+       /* perf will synchronise RCU before devres can free dmc620_pmu */
+       perf_pmu_unregister(&dmc620_pmu->pmu);
+
+       return 0;
+}
+
+static const struct acpi_device_id dmc620_acpi_match[] = {
+       { "ARMHD620", 0},
+       {},
+};
+MODULE_DEVICE_TABLE(acpi, dmc620_acpi_match);
+static struct platform_driver dmc620_pmu_driver = {
+       .driver = {
+               .name           = DMC620_DRVNAME,
+               .acpi_match_table = dmc620_acpi_match,
+       },
+       .probe  = dmc620_pmu_device_probe,
+       .remove = dmc620_pmu_device_remove,
+};
+
+static int __init dmc620_pmu_init(void)
+{
+       cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+                                     DMC620_DRVNAME,
+                                     NULL,
+                                     dmc620_pmu_cpu_teardown);
+       if (cpuhp_state_num < 0)
+               return cpuhp_state_num;
+
+       return platform_driver_register(&dmc620_pmu_driver);
+}
+
+static void __exit dmc620_pmu_exit(void)
+{
+       platform_driver_unregister(&dmc620_pmu_driver);
+       cpuhp_remove_multi_state(cpuhp_state_num);
+}
+
+module_init(dmc620_pmu_init);
+module_exit(dmc620_pmu_exit);
+
+MODULE_DESCRIPTION("Perf driver for the ARM DMC-620 memory controller");
+MODULE_AUTHOR("Tuan Phan <tuanphan@os.amperecomputing.com");
+MODULE_LICENSE("GPL v2");
index 98e68ed..0459a34 100644 (file)
@@ -716,9 +716,6 @@ static int dsu_pmu_device_probe(struct platform_device *pdev)
        if (IS_ERR(dsu_pmu))
                return PTR_ERR(dsu_pmu);
 
-       if (IS_ERR_OR_NULL(fwnode))
-               return -ENOENT;
-
        if (is_of_node(fwnode))
                rc = dsu_pmu_dt_get_cpus(&pdev->dev, &dsu_pmu->associated_cpus);
        else if (is_acpi_device_node(fwnode))
index cb2f55f..794a37d 100644 (file)
@@ -726,6 +726,11 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
        return per_cpu(hw_events->irq, cpu);
 }
 
+bool arm_pmu_irq_is_nmi(void)
+{
+       return has_nmi;
+}
+
 /*
  * PMU hardware loses all context when a CPU goes offline.
  * When a CPU is hotplugged back in, since some hardware registers are
index 5274f7f..74474bb 100644 (file)
@@ -74,6 +74,7 @@
 #define SMMU_PMCG_CFGR_NCTR             GENMASK(5, 0)
 #define SMMU_PMCG_CR                    0xE04
 #define SMMU_PMCG_CR_ENABLE             BIT(0)
+#define SMMU_PMCG_IIDR                  0xE08
 #define SMMU_PMCG_CEID0                 0xE20
 #define SMMU_PMCG_CEID1                 0xE28
 #define SMMU_PMCG_IRQ_CTRL              0xE50
@@ -112,6 +113,7 @@ struct smmu_pmu {
        void __iomem *reloc_base;
        u64 counter_mask;
        u32 options;
+       u32 iidr;
        bool global_filter;
 };
 
@@ -552,6 +554,40 @@ static struct attribute_group smmu_pmu_events_group = {
        .is_visible = smmu_pmu_event_is_visible,
 };
 
+static ssize_t smmu_pmu_identifier_attr_show(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *page)
+{
+       struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
+
+       return snprintf(page, PAGE_SIZE, "0x%08x\n", smmu_pmu->iidr);
+}
+
+static umode_t smmu_pmu_identifier_attr_visible(struct kobject *kobj,
+                                               struct attribute *attr,
+                                               int n)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
+
+       if (!smmu_pmu->iidr)
+               return 0;
+       return attr->mode;
+}
+
+static struct device_attribute smmu_pmu_identifier_attr =
+       __ATTR(identifier, 0444, smmu_pmu_identifier_attr_show, NULL);
+
+static struct attribute *smmu_pmu_identifier_attrs[] = {
+       &smmu_pmu_identifier_attr.attr,
+       NULL
+};
+
+static struct attribute_group smmu_pmu_identifier_group = {
+       .attrs = smmu_pmu_identifier_attrs,
+       .is_visible = smmu_pmu_identifier_attr_visible,
+};
+
 /* Formats */
 PMU_FORMAT_ATTR(event,            "config:0-15");
 PMU_FORMAT_ATTR(filter_stream_id,  "config1:0-31");
@@ -575,6 +611,7 @@ static const struct attribute_group *smmu_pmu_attr_grps[] = {
        &smmu_pmu_cpumask_group,
        &smmu_pmu_events_group,
        &smmu_pmu_format_group,
+       &smmu_pmu_identifier_group,
        NULL
 };
 
@@ -795,6 +832,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
                return err;
        }
 
+       smmu_pmu->iidr = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_IIDR);
+
        name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx",
                              (res_0->start) >> SMMU_PMCG_PA_SHIFT);
        if (!name) {
index 397540a..a11bfd8 100644 (file)
@@ -50,6 +50,7 @@ static DEFINE_IDA(ddr_ida);
 
 struct fsl_ddr_devtype_data {
        unsigned int quirks;    /* quirks needed for different DDR Perf core */
+       const char *identifier; /* system PMU identifier for userspace */
 };
 
 static const struct fsl_ddr_devtype_data imx8_devtype_data;
@@ -58,13 +59,32 @@ static const struct fsl_ddr_devtype_data imx8m_devtype_data = {
        .quirks = DDR_CAP_AXI_ID_FILTER,
 };
 
+static const struct fsl_ddr_devtype_data imx8mq_devtype_data = {
+       .quirks = DDR_CAP_AXI_ID_FILTER,
+       .identifier = "i.MX8MQ",
+};
+
+static const struct fsl_ddr_devtype_data imx8mm_devtype_data = {
+       .quirks = DDR_CAP_AXI_ID_FILTER,
+       .identifier = "i.MX8MM",
+};
+
+static const struct fsl_ddr_devtype_data imx8mn_devtype_data = {
+       .quirks = DDR_CAP_AXI_ID_FILTER,
+       .identifier = "i.MX8MN",
+};
+
 static const struct fsl_ddr_devtype_data imx8mp_devtype_data = {
        .quirks = DDR_CAP_AXI_ID_FILTER_ENHANCED,
+       .identifier = "i.MX8MP",
 };
 
 static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
        { .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data},
        { .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data},
+       { .compatible = "fsl,imx8mq-ddr-pmu", .data = &imx8mq_devtype_data},
+       { .compatible = "fsl,imx8mm-ddr-pmu", .data = &imx8mm_devtype_data},
+       { .compatible = "fsl,imx8mn-ddr-pmu", .data = &imx8mn_devtype_data},
        { .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data},
        { /* sentinel */ }
 };
@@ -84,6 +104,40 @@ struct ddr_pmu {
        int id;
 };
 
+static ssize_t ddr_perf_identifier_show(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *page)
+{
+       struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+       return sprintf(page, "%s\n", pmu->devtype_data->identifier);
+}
+
+static umode_t ddr_perf_identifier_attr_visible(struct kobject *kobj,
+                                               struct attribute *attr,
+                                               int n)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+       if (!pmu->devtype_data->identifier)
+               return 0;
+       return attr->mode;
+};
+
+static struct device_attribute ddr_perf_identifier_attr =
+       __ATTR(identifier, 0444, ddr_perf_identifier_show, NULL);
+
+static struct attribute *ddr_perf_identifier_attrs[] = {
+       &ddr_perf_identifier_attr.attr,
+       NULL,
+};
+
+static struct attribute_group ddr_perf_identifier_attr_group = {
+       .attrs = ddr_perf_identifier_attrs,
+       .is_visible = ddr_perf_identifier_attr_visible,
+};
+
 enum ddr_perf_filter_capabilities {
        PERF_CAP_AXI_ID_FILTER = 0,
        PERF_CAP_AXI_ID_FILTER_ENHANCED,
@@ -237,6 +291,7 @@ static const struct attribute_group *attr_groups[] = {
        &ddr_perf_format_attr_group,
        &ddr_perf_cpumask_attr_group,
        &ddr_perf_filter_cap_attr_group,
+       &ddr_perf_identifier_attr_group,
        NULL,
 };
 
@@ -361,25 +416,6 @@ static int ddr_perf_event_init(struct perf_event *event)
        return 0;
 }
 
-
-static void ddr_perf_event_update(struct perf_event *event)
-{
-       struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
-       struct hw_perf_event *hwc = &event->hw;
-       u64 delta, prev_raw_count, new_raw_count;
-       int counter = hwc->idx;
-
-       do {
-               prev_raw_count = local64_read(&hwc->prev_count);
-               new_raw_count = ddr_perf_read_counter(pmu, counter);
-       } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                       new_raw_count) != prev_raw_count);
-
-       delta = (new_raw_count - prev_raw_count) & 0xFFFFFFFF;
-
-       local64_add(delta, &event->count);
-}
-
 static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
                                  int counter, bool enable)
 {
@@ -404,6 +440,56 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
        }
 }
 
+static bool ddr_perf_counter_overflow(struct ddr_pmu *pmu, int counter)
+{
+       int val;
+
+       val = readl_relaxed(pmu->base + counter * 4 + COUNTER_CNTL);
+
+       return val & CNTL_OVER;
+}
+
+static void ddr_perf_counter_clear(struct ddr_pmu *pmu, int counter)
+{
+       u8 reg = counter * 4 + COUNTER_CNTL;
+       int val;
+
+       val = readl_relaxed(pmu->base + reg);
+       val &= ~CNTL_CLEAR;
+       writel(val, pmu->base + reg);
+
+       val |= CNTL_CLEAR;
+       writel(val, pmu->base + reg);
+}
+
+static void ddr_perf_event_update(struct perf_event *event)
+{
+       struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       u64 new_raw_count;
+       int counter = hwc->idx;
+       int ret;
+
+       new_raw_count = ddr_perf_read_counter(pmu, counter);
+       local64_add(new_raw_count, &event->count);
+
+       /*
+        * For legacy SoCs: event counter continue counting when overflow,
+        *                  no need to clear the counter.
+        * For new SoCs: event counter stop counting when overflow, need
+        *               clear counter to let it count again.
+        */
+       if (counter != EVENT_CYCLES_COUNTER) {
+               ret = ddr_perf_counter_overflow(pmu, counter);
+               if (ret)
+                       dev_warn_ratelimited(pmu->dev,  "events lost due to counter overflow (config 0x%llx)\n",
+                                            event->attr.config);
+       }
+
+       /* clear counter every time for both cycle counter and event counter */
+       ddr_perf_counter_clear(pmu, counter);
+}
+
 static void ddr_perf_event_start(struct perf_event *event, int flags)
 {
        struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
@@ -537,7 +623,7 @@ static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
 {
        int i;
        struct ddr_pmu *pmu = (struct ddr_pmu *) p;
-       struct perf_event *event, *cycle_event = NULL;
+       struct perf_event *event;
 
        /* all counter will stop if cycle counter disabled */
        ddr_perf_counter_enable(pmu,
@@ -547,7 +633,9 @@ static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
        /*
         * When the cycle counter overflows, all counters are stopped,
         * and an IRQ is raised. If any other counter overflows, it
-        * continues counting, and no IRQ is raised.
+        * continues counting, and no IRQ is raised. But for new SoCs,
+        * such as i.MX8MP, event counter would stop when overflow, so
+        * we need use cycle counter to stop overflow of event counter.
         *
         * Cycles occur at least 4 times as often as other events, so we
         * can update all events on a cycle counter overflow and not
@@ -562,17 +650,12 @@ static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
                event = pmu->events[i];
 
                ddr_perf_event_update(event);
-
-               if (event->hw.idx == EVENT_CYCLES_COUNTER)
-                       cycle_event = event;
        }
 
        ddr_perf_counter_enable(pmu,
                              EVENT_CYCLES_ID,
                              EVENT_CYCLES_COUNTER,
                              true);
-       if (cycle_event)
-               ddr_perf_event_update(cycle_event);
 
        return IRQ_HANDLED;
 }
index 5e3645c..5ac6c91 100644 (file)
@@ -33,6 +33,7 @@
 #define DDRC_INT_MASK          0x6c8
 #define DDRC_INT_STATUS                0x6cc
 #define DDRC_INT_CLEAR         0x6d0
+#define DDRC_VERSION           0x710
 
 /* DDRC has 8-counters */
 #define DDRC_NR_COUNTERS       0x8
@@ -267,6 +268,8 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
                return PTR_ERR(ddrc_pmu->base);
        }
 
+       ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION);
+
        return 0;
 }
 
@@ -308,10 +311,23 @@ static const struct attribute_group hisi_ddrc_pmu_cpumask_attr_group = {
        .attrs = hisi_ddrc_pmu_cpumask_attrs,
 };
 
+static struct device_attribute hisi_ddrc_pmu_identifier_attr =
+       __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = {
+       &hisi_ddrc_pmu_identifier_attr.attr,
+       NULL
+};
+
+static struct attribute_group hisi_ddrc_pmu_identifier_group = {
+       .attrs = hisi_ddrc_pmu_identifier_attrs,
+};
+
 static const struct attribute_group *hisi_ddrc_pmu_attr_groups[] = {
        &hisi_ddrc_pmu_format_group,
        &hisi_ddrc_pmu_events_group,
        &hisi_ddrc_pmu_cpumask_attr_group,
+       &hisi_ddrc_pmu_identifier_group,
        NULL,
 };
 
index 5eb8168..41b2dce 100644 (file)
@@ -23,6 +23,7 @@
 #define HHA_INT_MASK           0x0804
 #define HHA_INT_STATUS         0x0808
 #define HHA_INT_CLEAR          0x080C
+#define HHA_VERSION            0x1cf0
 #define HHA_PERF_CTRL          0x1E00
 #define HHA_EVENT_CTRL         0x1E04
 #define HHA_EVENT_TYPE0                0x1E80
@@ -261,6 +262,8 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
                return PTR_ERR(hha_pmu->base);
        }
 
+       hha_pmu->identifier = readl(hha_pmu->base + HHA_VERSION);
+
        return 0;
 }
 
@@ -320,10 +323,23 @@ static const struct attribute_group hisi_hha_pmu_cpumask_attr_group = {
        .attrs = hisi_hha_pmu_cpumask_attrs,
 };
 
+static struct device_attribute hisi_hha_pmu_identifier_attr =
+       __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_hha_pmu_identifier_attrs[] = {
+       &hisi_hha_pmu_identifier_attr.attr,
+       NULL
+};
+
+static struct attribute_group hisi_hha_pmu_identifier_group = {
+       .attrs = hisi_hha_pmu_identifier_attrs,
+};
+
 static const struct attribute_group *hisi_hha_pmu_attr_groups[] = {
        &hisi_hha_pmu_format_group,
        &hisi_hha_pmu_events_group,
        &hisi_hha_pmu_cpumask_attr_group,
+       &hisi_hha_pmu_identifier_group,
        NULL,
 };
 
index 3e8b5ea..705501d 100644 (file)
@@ -25,6 +25,7 @@
 #define L3C_INT_STATUS         0x0808
 #define L3C_INT_CLEAR          0x080c
 #define L3C_EVENT_CTRL         0x1c00
+#define L3C_VERSION            0x1cf0
 #define L3C_EVENT_TYPE0                0x1d00
 /*
  * Each counter is 48-bits and [48:63] are reserved
@@ -264,6 +265,8 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
                return PTR_ERR(l3c_pmu->base);
        }
 
+       l3c_pmu->identifier = readl(l3c_pmu->base + L3C_VERSION);
+
        return 0;
 }
 
@@ -310,10 +313,23 @@ static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = {
        .attrs = hisi_l3c_pmu_cpumask_attrs,
 };
 
+static struct device_attribute hisi_l3c_pmu_identifier_attr =
+       __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_l3c_pmu_identifier_attrs[] = {
+       &hisi_l3c_pmu_identifier_attr.attr,
+       NULL
+};
+
+static struct attribute_group hisi_l3c_pmu_identifier_group = {
+       .attrs = hisi_l3c_pmu_identifier_attrs,
+};
+
 static const struct attribute_group *hisi_l3c_pmu_attr_groups[] = {
        &hisi_l3c_pmu_format_group,
        &hisi_l3c_pmu_events_group,
        &hisi_l3c_pmu_cpumask_attr_group,
+       &hisi_l3c_pmu_identifier_group,
        NULL,
 };
 
index 97aff87..9dbdc3f 100644 (file)
@@ -119,6 +119,16 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event)
 }
 EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx);
 
+ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
+                                            struct device_attribute *attr,
+                                            char *page)
+{
+       struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
+
+       return snprintf(page, PAGE_SIZE, "0x%08x\n", hisi_pmu->identifier);
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show);
+
 static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx)
 {
        if (!hisi_uncore_pmu_counter_valid(hisi_pmu, idx)) {
index b59ec22..25b7cbe 100644 (file)
@@ -75,6 +75,7 @@ struct hisi_pmu {
        int counter_bits;
        /* check event code range */
        int check_event;
+       u32 identifier;
 };
 
 int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx);
@@ -97,4 +98,10 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev,
                                struct device_attribute *attr, char *buf);
 int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node);
 int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node);
+
+ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
+                                            struct device_attribute *attr,
+                                            char *page);
+
+
 #endif /* __HISI_UNCORE_PMU_H__ */
index 20a3212..1a12baa 100644 (file)
@@ -38,6 +38,7 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
 const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
                                                const u32 *id_in);
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
+phys_addr_t acpi_iort_dma_get_max_cpu_address(void);
 #else
 static inline void acpi_iort_init(void) { }
 static inline u32 iort_msi_map_id(struct device *dev, u32 id)
@@ -55,6 +56,9 @@ static inline const struct iommu_ops *iort_iommu_configure_id(
 static inline
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
 { return 0; }
+
+static inline phys_addr_t acpi_iort_dma_get_max_cpu_address(void)
+{ return PHYS_ADDR_MAX; }
 #endif
 
 #endif /* __ACPI_IORT_H__ */
index fb3bf69..9d0c454 100644 (file)
@@ -354,26 +354,6 @@ enum zone_type {
         * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit
         * platforms may need both zones as they support peripherals with
         * different DMA addressing limitations.
-        *
-        * Some examples:
-        *
-        *  - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the
-        *    rest of the lower 4G.
-        *
-        *  - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on
-        *    the specific device.
-        *
-        *  - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the
-        *    lower 4G.
-        *
-        *  - powerpc only uses ZONE_DMA, the size, up to 2G, may vary
-        *    depending on the specific device.
-        *
-        *  - s390 uses ZONE_DMA fixed to the lower 2G.
-        *
-        *  - ia64 and riscv only use ZONE_DMA32.
-        *
-        *  - parisc uses neither.
         */
 #ifdef CONFIG_ZONE_DMA
        ZONE_DMA,
index 5d51891..9ed5b85 100644 (file)
@@ -558,6 +558,8 @@ int of_map_id(struct device_node *np, u32 id,
               const char *map_name, const char *map_mask_name,
               struct device_node **target, u32 *id_out);
 
+phys_addr_t of_dma_get_max_cpu_address(struct device_node *np);
+
 #else /* CONFIG_OF */
 
 static inline void of_core_init(void)
@@ -995,6 +997,11 @@ static inline int of_map_id(struct device_node *np, u32 id,
        return -EINVAL;
 }
 
+static inline phys_addr_t of_dma_get_max_cpu_address(struct device_node *np)
+{
+       return PHYS_ADDR_MAX;
+}
+
 #define of_match_ptr(_ptr)     NULL
 #define of_match_node(_matches, _node) NULL
 #endif /* CONFIG_OF */
index 5054802..bf79667 100644 (file)
@@ -163,6 +163,8 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn);
 static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
 #endif
 
+bool arm_pmu_irq_is_nmi(void);
+
 /* Internal functions only for core arm_pmu code */
 struct arm_pmu *armpmu_alloc(void);
 struct arm_pmu *armpmu_alloc_atomic(void);
index 6dec390..18122d9 100644 (file)
 
 #ifdef CONFIG_SHADOW_CALL_STACK
 
-/*
- * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
- * architecture) provided ~40% safety margin on stack usage while keeping
- * memory allocation overhead reasonable.
- */
-#define SCS_SIZE               SZ_1K
+#define SCS_ORDER              0
+#define SCS_SIZE               (PAGE_SIZE << SCS_ORDER)
 #define GFP_SCS                        (GFP_KERNEL | __GFP_ZERO)
 
 /* An illegal pointer value to mark the end of the shadow stack. */
 #define SCS_END_MAGIC          (0x5f6UL + POISON_POINTER_DELTA)
 
-/* Allocate a static per-CPU shadow stack */
-#define DEFINE_SCS(name)                                               \
-       DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name)     \
-
 #define task_scs(tsk)          (task_thread_info(tsk)->scs_base)
 #define task_scs_sp(tsk)       (task_thread_info(tsk)->scs_sp)
 
+void *scs_alloc(int node);
+void scs_free(void *s);
 void scs_init(void);
 int scs_prepare(struct task_struct *tsk, int node);
 void scs_release(struct task_struct *tsk);
@@ -61,6 +55,8 @@ static inline bool task_scs_end_corrupted(struct task_struct *tsk)
 
 #else /* CONFIG_SHADOW_CALL_STACK */
 
+static inline void *scs_alloc(int node) { return NULL; }
+static inline void scs_free(void *s) {}
 static inline void scs_init(void) {}
 static inline void scs_task_reset(struct task_struct *tsk) {}
 static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
index b256f9c..205526c 100644 (file)
@@ -469,4 +469,18 @@ struct seq_file;
 extern void render_sigset_t(struct seq_file *, const char *, sigset_t *);
 #endif
 
+#ifndef arch_untagged_si_addr
+/*
+ * Given a fault address and a signal and si_code which correspond to the
+ * _sigfault union member, returns the address that must appear in si_addr if
+ * the signal handler does not have SA_EXPOSE_TAGBITS enabled in sa_flags.
+ */
+static inline void __user *arch_untagged_si_addr(void __user *addr,
+                                                unsigned long sig,
+                                                unsigned long si_code)
+{
+       return addr;
+}
+#endif
+
 #endif /* _LINUX_SIGNAL_H */
index f8a90ae..68e06c7 100644 (file)
@@ -68,4 +68,16 @@ struct ksignal {
        int sig;
 };
 
+#ifndef __ARCH_UAPI_SA_FLAGS
+#ifdef SA_RESTORER
+#define __ARCH_UAPI_SA_FLAGS   SA_RESTORER
+#else
+#define __ARCH_UAPI_SA_FLAGS   0
+#endif
+#endif
+
+#define UAPI_SA_FLAGS                                                          \
+       (SA_NOCLDSTOP | SA_NOCLDWAIT | SA_SIGINFO | SA_ONSTACK | SA_RESTART |  \
+        SA_NODEFER | SA_RESETHAND | SA_EXPOSE_TAGBITS | __ARCH_UAPI_SA_FLAGS)
+
 #endif /* _LINUX_SIGNAL_TYPES_H */
index e9304c9..fe929e7 100644 (file)
@@ -4,6 +4,69 @@
 
 #include <linux/compiler.h>
 
+/*
+ * SA_FLAGS values:
+ *
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_SIGINFO delivers the signal with SIGINFO structs.
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_UNSUPPORTED is a flag bit that will never be supported. Kernels from
+ * before the introduction of SA_UNSUPPORTED did not clear unknown bits from
+ * sa_flags when read using the oldact argument to sigaction and rt_sigaction,
+ * so this bit allows flag bit support to be detected from userspace while
+ * allowing an old kernel to be distinguished from a kernel that supports every
+ * flag bit.
+ * SA_EXPOSE_TAGBITS exposes an architecture-defined set of tag bits in
+ * siginfo.si_addr.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#ifndef SA_NOCLDSTOP
+#define SA_NOCLDSTOP   0x00000001
+#endif
+#ifndef SA_NOCLDWAIT
+#define SA_NOCLDWAIT   0x00000002
+#endif
+#ifndef SA_SIGINFO
+#define SA_SIGINFO     0x00000004
+#endif
+/* 0x00000008 used on alpha, mips, parisc */
+/* 0x00000010 used on alpha, parisc */
+/* 0x00000020 used on alpha, parisc, sparc */
+/* 0x00000040 used on alpha, parisc */
+/* 0x00000080 used on parisc */
+/* 0x00000100 used on sparc */
+/* 0x00000200 used on sparc */
+#define SA_UNSUPPORTED 0x00000400
+#define SA_EXPOSE_TAGBITS      0x00000800
+/* 0x00010000 used on mips */
+/* 0x01000000 used on x86 */
+/* 0x02000000 used on x86 */
+/*
+ * New architectures should not define the obsolete
+ *     SA_RESTORER     0x04000000
+ */
+#ifndef SA_ONSTACK
+#define SA_ONSTACK     0x08000000
+#endif
+#ifndef SA_RESTART
+#define SA_RESTART     0x10000000
+#endif
+#ifndef SA_NODEFER
+#define SA_NODEFER     0x40000000
+#endif
+#ifndef SA_RESETHAND
+#define SA_RESETHAND   0x80000000
+#endif
+
+#define SA_NOMASK      SA_NODEFER
+#define SA_ONESHOT     SA_RESETHAND
+
 #ifndef SIG_BLOCK
 #define SIG_BLOCK          0   /* for blocking signals */
 #endif
index 5c716a9..f634822 100644 (file)
 #define SIGRTMAX       _NSIG
 #endif
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001
-#define SA_NOCLDWAIT   0x00000002
-#define SA_SIGINFO     0x00000004
-#define SA_ONSTACK     0x08000000
-#define SA_RESTART     0x10000000
-#define SA_NODEFER     0x40000000
-#define SA_RESETHAND   0x80000000
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
-/*
- * New architectures should not define the obsolete
- *     SA_RESTORER     0x04000000
- */
-
 #if !defined MINSIGSTKSZ || !defined SIGSTKSZ
 #define MINSIGSTKSZ    2048
 #define SIGSTKSZ       8192
index 4ff4a7b..e2a71fc 100644 (file)
@@ -5,26 +5,49 @@
  * Copyright (C) 2019 Google LLC
  */
 
+#include <linux/cpuhotplug.h>
 #include <linux/kasan.h>
 #include <linux/mm.h>
 #include <linux/scs.h>
-#include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <linux/vmstat.h>
 
-static struct kmem_cache *scs_cache;
-
 static void __scs_account(void *s, int account)
 {
-       struct page *scs_page = virt_to_page(s);
+       struct page *scs_page = vmalloc_to_page(s);
 
        mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB,
                            account * (SCS_SIZE / SZ_1K));
 }
 
-static void *scs_alloc(int node)
+/* Matches NR_CACHED_STACKS for VMAP_STACK */
+#define NR_CACHED_SCS 2
+static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
+
+static void *__scs_alloc(int node)
 {
-       void *s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+       int i;
+       void *s;
+
+       for (i = 0; i < NR_CACHED_SCS; i++) {
+               s = this_cpu_xchg(scs_cache[i], NULL);
+               if (s) {
+                       kasan_unpoison_vmalloc(s, SCS_SIZE);
+                       memset(s, 0, SCS_SIZE);
+                       return s;
+               }
+       }
+
+       return __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END,
+                                   GFP_SCS, PAGE_KERNEL, 0, node,
+                                   __builtin_return_address(0));
+}
 
+void *scs_alloc(int node)
+{
+       void *s;
+
+       s = __scs_alloc(node);
        if (!s)
                return NULL;
 
@@ -34,21 +57,47 @@ static void *scs_alloc(int node)
         * Poison the allocation to catch unintentional accesses to
         * the shadow stack when KASAN is enabled.
         */
-       kasan_poison_object_data(scs_cache, s);
+       kasan_poison_vmalloc(s, SCS_SIZE);
        __scs_account(s, 1);
        return s;
 }
 
-static void scs_free(void *s)
+void scs_free(void *s)
 {
+       int i;
+
        __scs_account(s, -1);
-       kasan_unpoison_object_data(scs_cache, s);
-       kmem_cache_free(scs_cache, s);
+
+       /*
+        * We cannot sleep as this can be called in interrupt context,
+        * so use this_cpu_cmpxchg to update the cache, and vfree_atomic
+        * to free the stack.
+        */
+
+       for (i = 0; i < NR_CACHED_SCS; i++)
+               if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
+                       return;
+
+       vfree_atomic(s);
+}
+
+static int scs_cleanup(unsigned int cpu)
+{
+       int i;
+       void **cache = per_cpu_ptr(scs_cache, cpu);
+
+       for (i = 0; i < NR_CACHED_SCS; i++) {
+               vfree(cache[i]);
+               cache[i] = NULL;
+       }
+
+       return 0;
 }
 
 void __init scs_init(void)
 {
-       scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, 0, 0, NULL);
+       cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
+                         scs_cleanup);
 }
 
 int scs_prepare(struct task_struct *tsk, int node)
index ef8f2a2..26018c5 100644 (file)
@@ -2524,6 +2524,26 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info)
        return signr;
 }
 
+static void hide_si_addr_tag_bits(struct ksignal *ksig)
+{
+       switch (siginfo_layout(ksig->sig, ksig->info.si_code)) {
+       case SIL_FAULT:
+       case SIL_FAULT_MCEERR:
+       case SIL_FAULT_BNDERR:
+       case SIL_FAULT_PKUERR:
+               ksig->info.si_addr = arch_untagged_si_addr(
+                       ksig->info.si_addr, ksig->sig, ksig->info.si_code);
+               break;
+       case SIL_KILL:
+       case SIL_TIMER:
+       case SIL_POLL:
+       case SIL_CHLD:
+       case SIL_RT:
+       case SIL_SYS:
+               break;
+       }
+}
+
 bool get_signal(struct ksignal *ksig)
 {
        struct sighand_struct *sighand = current->sighand;
@@ -2761,6 +2781,10 @@ relock:
        spin_unlock_irq(&sighand->siglock);
 
        ksig->sig = signr;
+
+       if (!(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS))
+               hide_si_addr_tag_bits(ksig);
+
        return ksig->sig > 0;
 }
 
@@ -3985,6 +4009,22 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
        if (oact)
                *oact = *k;
 
+       /*
+        * Make sure that we never accidentally claim to support SA_UNSUPPORTED,
+        * e.g. by having an architecture use the bit in their uapi.
+        */
+       BUILD_BUG_ON(UAPI_SA_FLAGS & SA_UNSUPPORTED);
+
+       /*
+        * Clear unknown flag bits in order to allow userspace to detect missing
+        * support for flag bits and to allow the kernel to use non-uapi bits
+        * internally.
+        */
+       if (act)
+               act->sa.sa_flags &= UAPI_SA_FLAGS;
+       if (oact)
+               oact->sa.sa_flags &= UAPI_SA_FLAGS;
+
        sigaction_compat_abi(act, oact);
 
        if (act) {