Merge tag 'powerpc-5.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 30 Apr 2021 19:22:28 +0000 (12:22 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 30 Apr 2021 19:22:28 +0000 (12:22 -0700)
Pull powerpc updates from Michael Ellerman:

 - Enable KFENCE for 32-bit.

 - Implement EBPF for 32-bit.

 - Convert 32-bit to do interrupt entry/exit in C.

 - Convert 64-bit BookE to do interrupt entry/exit in C.

 - Changes to our signal handling code to use user_access_begin/end()
   more extensively.

 - Add support for time namespaces (CONFIG_TIME_NS)

 - A series of fixes that allow us to reenable STRICT_KERNEL_RWX.

 - Other smaller features, fixes & cleanups.

Thanks to Alexey Kardashevskiy, Andreas Schwab, Andrew Donnellan, Aneesh
Kumar K.V, Athira Rajeev, Bhaskar Chowdhury, Bixuan Cui, Cédric Le
Goater, Chen Huang, Chris Packham, Christophe Leroy, Christopher M.
Riedl, Colin Ian King, Dan Carpenter, Daniel Axtens, Daniel Henrique
Barboza, David Gibson, Davidlohr Bueso, Denis Efremov, dingsenjie,
Dmitry Safonov, Dominic DeMarco, Fabiano Rosas, Ganesh Goudar, Geert
Uytterhoeven, Geetika Moolchandani, Greg Kurz, Guenter Roeck, Haren
Myneni, He Ying, Jiapeng Chong, Jordan Niethe, Laurent Dufour, Lee
Jones, Leonardo Bras, Li Huafei, Madhavan Srinivasan, Mahesh Salgaonkar,
Masahiro Yamada, Nathan Chancellor, Nathan Lynch, Nicholas Piggin,
Oliver O'Halloran, Paul Menzel, Pu Lehui, Randy Dunlap, Ravi Bangoria,
Rosen Penev, Russell Currey, Santosh Sivaraj, Sebastian Andrzej Siewior,
Segher Boessenkool, Shivaprasad G Bhat, Srikar Dronamraju, Stephen
Rothwell, Thadeu Lima de Souza Cascardo, Thomas Gleixner, Tony Ambardar,
Tyrel Datwyler, Vaibhav Jain, Vincenzo Frascino, Xiongwei Song, Yang Li,
Yu Kuai, and Zhang Yunkai.

* tag 'powerpc-5.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (302 commits)
  powerpc/signal32: Fix erroneous SIGSEGV on RT signal return
  powerpc: Avoid clang uninitialized warning in __get_user_size_allowed
  powerpc/papr_scm: Mark nvdimm as unarmed if needed during probe
  powerpc/kvm: Fix build error when PPC_MEM_KEYS/PPC_PSERIES=n
  powerpc/kasan: Fix shadow start address with modules
  powerpc/kernel/iommu: Use largepool as a last resort when !largealloc
  powerpc/kernel/iommu: Align size for IOMMU_PAGE_SIZE() to save TCEs
  powerpc/44x: fix spelling mistake in Kconfig "varients" -> "variants"
  powerpc/iommu: Annotate nested lock for lockdep
  powerpc/iommu: Do not immediately panic when failed IOMMU table allocation
  powerpc/iommu: Allocate it_map by vmalloc
  selftests/powerpc: remove unneeded semicolon
  powerpc/64s: remove unneeded semicolon
  powerpc/eeh: remove unneeded semicolon
  powerpc/selftests: Add selftest to test concurrent perf/ptrace events
  powerpc/selftests/perf-hwbreak: Add testcases for 2nd DAWR
  powerpc/selftests/perf-hwbreak: Coalesce event creation code
  powerpc/selftests/ptrace-hwbreak: Add testcases for 2nd DAWR
  powerpc/configs: Add IBMVNIC to some 64-bit configs
  selftests/powerpc: Add uaccess flush test
  ...

234 files changed:
Documentation/admin-guide/sysctl/net.rst
Documentation/features/debug/debug-vm-pgtable/arch-support.txt
Documentation/powerpc/papr_hcalls.rst
Documentation/powerpc/vas-api.rst
arch/arm64/include/asm/vdso/compat_gettimeofday.h
arch/arm64/include/asm/vdso/gettimeofday.h
arch/powerpc/Kconfig
arch/powerpc/Kconfig.debug
arch/powerpc/Makefile
arch/powerpc/configs/ppc64_defconfig
arch/powerpc/configs/pseries_defconfig
arch/powerpc/include/asm/Kbuild
arch/powerpc/include/asm/asm-prototypes.h
arch/powerpc/include/asm/barrier.h
arch/powerpc/include/asm/book3s/32/kup.h
arch/powerpc/include/asm/book3s/32/pgtable.h
arch/powerpc/include/asm/book3s/32/tlbflush.h
arch/powerpc/include/asm/book3s/64/kup.h
arch/powerpc/include/asm/book3s/64/mmu-hash.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/book3s/64/radix.h
arch/powerpc/include/asm/bug.h
arch/powerpc/include/asm/cacheflush.h
arch/powerpc/include/asm/cpm2.h
arch/powerpc/include/asm/fixmap.h
arch/powerpc/include/asm/futex.h
arch/powerpc/include/asm/hvcall.h
arch/powerpc/include/asm/hvconsole.h
arch/powerpc/include/asm/hydra.h
arch/powerpc/include/asm/inst.h
arch/powerpc/include/asm/interrupt.h
arch/powerpc/include/asm/irq.h
arch/powerpc/include/asm/jump_label.h
arch/powerpc/include/asm/kasan.h
arch/powerpc/include/asm/kfence.h [new file with mode: 0644]
arch/powerpc/include/asm/kup.h
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/include/asm/nohash/32/kup-8xx.h
arch/powerpc/include/asm/nohash/32/mmu-8xx.h
arch/powerpc/include/asm/nohash/64/pgtable.h
arch/powerpc/include/asm/opal.h
arch/powerpc/include/asm/perf_event_server.h
arch/powerpc/include/asm/pgtable.h
arch/powerpc/include/asm/ppc-opcode.h
arch/powerpc/include/asm/ppc_asm.h
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/ptrace.h
arch/powerpc/include/asm/qspinlock.h
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/rtas.h
arch/powerpc/include/asm/simple_spinlock.h
arch/powerpc/include/asm/smp.h
arch/powerpc/include/asm/spinlock.h
arch/powerpc/include/asm/thread_info.h
arch/powerpc/include/asm/topology.h
arch/powerpc/include/asm/uaccess.h
arch/powerpc/include/asm/unistd.h
arch/powerpc/include/asm/vdso/gettimeofday.h
arch/powerpc/include/asm/vdso_datapage.h
arch/powerpc/include/asm/vio.h
arch/powerpc/include/asm/xive.h
arch/powerpc/include/uapi/asm/errno.h
arch/powerpc/include/uapi/asm/posix_types.h
arch/powerpc/kernel/align.c
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/entry_32.S
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/exceptions-64e.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/fadump.c
arch/powerpc/kernel/fpu.S
arch/powerpc/kernel/head_32.h
arch/powerpc/kernel/head_40x.S
arch/powerpc/kernel/head_44x.S
arch/powerpc/kernel/head_8xx.S
arch/powerpc/kernel/head_book3s_32.S
arch/powerpc/kernel/head_booke.h
arch/powerpc/kernel/head_fsl_booke.S
arch/powerpc/kernel/hw_breakpoint_constraints.c
arch/powerpc/kernel/idle_6xx.S
arch/powerpc/kernel/idle_book3s.S
arch/powerpc/kernel/idle_e500.S
arch/powerpc/kernel/interrupt.c
arch/powerpc/kernel/iommu.c
arch/powerpc/kernel/irq.c
arch/powerpc/kernel/jump_label.c
arch/powerpc/kernel/kgdb.c
arch/powerpc/kernel/legacy_serial.c
arch/powerpc/kernel/mce.c
arch/powerpc/kernel/misc_32.S
arch/powerpc/kernel/misc_64.S
arch/powerpc/kernel/module.c
arch/powerpc/kernel/optprobes.c
arch/powerpc/kernel/optprobes_head.S
arch/powerpc/kernel/process.c
arch/powerpc/kernel/prom.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/ptrace/ptrace-view.c
arch/powerpc/kernel/ptrace/ptrace.c
arch/powerpc/kernel/ptrace/ptrace32.c
arch/powerpc/kernel/rtas-proc.c
arch/powerpc/kernel/rtas.c
arch/powerpc/kernel/security.c
arch/powerpc/kernel/setup-common.c
arch/powerpc/kernel/setup_32.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/signal.h
arch/powerpc/kernel/signal_32.c
arch/powerpc/kernel/signal_64.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/stacktrace.c
arch/powerpc/kernel/syscalls.c
arch/powerpc/kernel/syscalls/Makefile
arch/powerpc/kernel/syscalls/syscallhdr.sh [deleted file]
arch/powerpc/kernel/syscalls/syscalltbl.sh [deleted file]
arch/powerpc/kernel/systbl.S
arch/powerpc/kernel/trace/ftrace.c
arch/powerpc/kernel/traps.c
arch/powerpc/kernel/uprobes.c
arch/powerpc/kernel/vdso.c
arch/powerpc/kernel/vdso32/vdso32.lds.S
arch/powerpc/kernel/vdso64/vdso64.lds.S
arch/powerpc/kernel/vector.S
arch/powerpc/kexec/crash.c
arch/powerpc/kvm/book3s_64_mmu_host.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/book3s_hv_nested.c
arch/powerpc/kvm/book3s_hv_rm_mmu.c
arch/powerpc/lib/Makefile
arch/powerpc/lib/checksum_wrappers.c
arch/powerpc/lib/code-patching.c
arch/powerpc/lib/inst.c [deleted file]
arch/powerpc/lib/sstep.c
arch/powerpc/math-emu/math.c
arch/powerpc/mm/Makefile
arch/powerpc/mm/book3s32/Makefile
arch/powerpc/mm/book3s32/hash_low.S
arch/powerpc/mm/book3s32/kuep.c [new file with mode: 0644]
arch/powerpc/mm/book3s32/mmu.c
arch/powerpc/mm/book3s64/hash_pgtable.c
arch/powerpc/mm/book3s64/hash_utils.c
arch/powerpc/mm/book3s64/mmu_context.c
arch/powerpc/mm/book3s64/pkeys.c
arch/powerpc/mm/book3s64/radix_pgtable.c
arch/powerpc/mm/cacheflush.c [new file with mode: 0644]
arch/powerpc/mm/fault.c
arch/powerpc/mm/init_32.c
arch/powerpc/mm/maccess.c
arch/powerpc/mm/mem.c
arch/powerpc/mm/mmu_context.c
arch/powerpc/mm/mmu_decl.h
arch/powerpc/mm/nohash/8xx.c
arch/powerpc/net/Makefile
arch/powerpc/net/bpf_jit.h
arch/powerpc/net/bpf_jit32.h [deleted file]
arch/powerpc/net/bpf_jit64.h
arch/powerpc/net/bpf_jit_asm.S [deleted file]
arch/powerpc/net/bpf_jit_comp.c
arch/powerpc/net/bpf_jit_comp32.c [new file with mode: 0644]
arch/powerpc/net/bpf_jit_comp64.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/perf/hv-24x7.c
arch/powerpc/perf/isa207-common.c
arch/powerpc/perf/isa207-common.h
arch/powerpc/perf/power10-events-list.h
arch/powerpc/perf/power10-pmu.c
arch/powerpc/perf/power9-pmu.c
arch/powerpc/platforms/44x/Kconfig
arch/powerpc/platforms/52xx/lite5200_sleep.S
arch/powerpc/platforms/Kconfig.cputype
arch/powerpc/platforms/cell/iommu.c
arch/powerpc/platforms/cell/spu_callbacks.c
arch/powerpc/platforms/chrp/pci.c
arch/powerpc/platforms/embedded6xx/Kconfig
arch/powerpc/platforms/maple/pci.c
arch/powerpc/platforms/pasemi/iommu.c
arch/powerpc/platforms/powernv/memtrace.c
arch/powerpc/platforms/powernv/opal-core.c
arch/powerpc/platforms/powernv/opal-prd.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/setup.c
arch/powerpc/platforms/pseries/dlpar.c
arch/powerpc/platforms/pseries/hotplug-cpu.c
arch/powerpc/platforms/pseries/hvCall_inst.c
arch/powerpc/platforms/pseries/iommu.c
arch/powerpc/platforms/pseries/lpar.c
arch/powerpc/platforms/pseries/lparcfg.c
arch/powerpc/platforms/pseries/papr_scm.c
arch/powerpc/platforms/pseries/pci_dlpar.c
arch/powerpc/platforms/pseries/pmem.c
arch/powerpc/platforms/pseries/pseries.h
arch/powerpc/platforms/pseries/ras.c
arch/powerpc/platforms/pseries/rtas-fadump.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/platforms/pseries/vio.c
arch/powerpc/purgatory/trampoline_64.S
arch/powerpc/sysdev/dart_iommu.c
arch/powerpc/sysdev/fsl_pci.c
arch/powerpc/sysdev/xive/common.c
arch/powerpc/sysdev/xive/native.c
arch/powerpc/sysdev/xive/spapr.c
arch/powerpc/sysdev/xive/xive-internal.h
arch/powerpc/xmon/xmon.c
arch/s390/include/asm/vdso/gettimeofday.h
arch/x86/include/asm/vdso/gettimeofday.h
drivers/i2c/busses/Kconfig
drivers/macintosh/via-pmu.c
drivers/macintosh/windfarm_core.c
drivers/macintosh/windfarm_pm121.c
drivers/macintosh/windfarm_smu_controls.c
include/linux/compat.h
include/linux/uaccess.h
lib/vdso/gettimeofday.c
tools/testing/selftests/powerpc/alignment/alignment_handler.c
tools/testing/selftests/powerpc/mm/Makefile
tools/testing/selftests/powerpc/mm/stress_code_patching.sh [new file with mode: 0755]
tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
tools/testing/selftests/powerpc/ptrace/.gitignore
tools/testing/selftests/powerpc/ptrace/Makefile
tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c [new file with mode: 0644]
tools/testing/selftests/powerpc/security/Makefile
tools/testing/selftests/powerpc/security/entry_flush.c
tools/testing/selftests/powerpc/security/flush_utils.c
tools/testing/selftests/powerpc/security/flush_utils.h
tools/testing/selftests/powerpc/security/rfi_flush.c
tools/testing/selftests/powerpc/security/uaccess_flush.c [new file with mode: 0644]
tools/testing/selftests/powerpc/tm/tm-trap.c
tools/testing/selftests/timens/gettime_perf.c

index c941b21..4150f74 100644 (file)
@@ -64,6 +64,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on:
   - arm64
   - arm32
   - ppc64
+  - ppc32
   - sparc64
   - mips64
   - s390x
@@ -73,7 +74,6 @@ two flavors of JITs, the newer eBPF JIT currently supported on:
 And the older cBPF JIT supported on the following archs:
 
   - mips
-  - ppc
   - sparc
 
 eBPF JITs are a superset of cBPF JITs, meaning the kernel will
index 7aff505..fa83403 100644 (file)
@@ -21,7 +21,7 @@
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
-    |     powerpc: | TODO |
+    |     powerpc: |  ok  |
     |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
index 3d553e8..fce8bc7 100644 (file)
@@ -275,6 +275,20 @@ Health Bitmap Flags:
 Given a DRC Index collect the performance statistics for NVDIMM and copy them
 to the resultBuffer.
 
+**H_SCM_FLUSH**
+
+| Input: *drcIndex, continue-token*
+| Out: *continue-token*
+| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY*
+
+Given a DRC Index Flush the data to backend NVDIMM device.
+
+The hcall returns H_BUSY when the flush takes longer time and the hcall needs
+to be issued multiple times in order to be completely serviced. The
+*continue-token* from the output to be passed in the argument list of
+subsequent hcalls to the hypervisor until the hcall is completely serviced
+at which point H_SUCCESS or other error is returned by the hypervisor.
+
 References
 ==========
 .. [1] "Power Architecture Platform Reference"
index 90c50ed..bdb50fe 100644 (file)
@@ -254,7 +254,7 @@ using this window. the signal will be issued to the thread group leader
 signals.
 
 NX-GZIP User's Manual:
-https://github.com/libnxz/power-gzip/blob/master/power_nx_gzip_um.pdf
+https://github.com/libnxz/power-gzip/blob/master/doc/power_nx_gzip_um.pdf
 
 Simple example
 ==============
@@ -301,5 +301,5 @@ Simple example
                        close(fd) or window can be closed upon process exit
                }
 
-       Refer https://github.com/abalib/power-gzip for tests or more
+       Refer https://github.com/libnxz/power-gzip for tests or more
        use cases.
index 7508b0a..ecb6fd4 100644 (file)
@@ -155,7 +155,8 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
 }
 
 #ifdef CONFIG_TIME_NS
-static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
 {
        const struct vdso_data *ret;
 
index 4b4c0da..4f7a629 100644 (file)
@@ -96,7 +96,7 @@ const struct vdso_data *__arch_get_vdso_data(void)
 
 #ifdef CONFIG_TIME_NS
 static __always_inline
-const struct vdso_data *__arch_get_timens_vdso_data(void)
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
 {
        return _timens_data;
 }
index 3b34c44..c52b0a4 100644 (file)
@@ -119,6 +119,7 @@ config PPC
        #
        select ARCH_32BIT_OFF_T if PPC32
        select ARCH_HAS_DEBUG_VIRTUAL
+       select ARCH_HAS_DEBUG_VM_PGTABLE
        select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_FORTIFY_SOURCE
@@ -135,7 +136,7 @@ config PPC
        select ARCH_HAS_MEMBARRIER_CALLBACKS
        select ARCH_HAS_MEMBARRIER_SYNC_CORE
        select ARCH_HAS_SCALED_CPUTIME          if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
-       select ARCH_HAS_STRICT_KERNEL_RWX       if (PPC32 && !HIBERNATION)
+       select ARCH_HAS_STRICT_KERNEL_RWX       if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION)
        select ARCH_HAS_TICK_BROADCAST          if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_HAS_UACCESS_FLUSHCACHE
        select ARCH_HAS_COPY_MC                 if PPC64
@@ -145,6 +146,7 @@ config PPC
        select ARCH_MIGHT_HAVE_PC_PARPORT
        select ARCH_MIGHT_HAVE_PC_SERIO
        select ARCH_OPTIONAL_KERNEL_RWX         if ARCH_HAS_STRICT_KERNEL_RWX
+       select ARCH_STACKWALK
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC    if PPC32 || PPC_BOOK3S_64
        select ARCH_USE_BUILTIN_BSWAP
@@ -171,6 +173,7 @@ config PPC
        select GENERIC_CPU_AUTOPROBE
        select GENERIC_CPU_VULNERABILITIES      if PPC_BARRIER_NOSPEC
        select GENERIC_EARLY_IOREMAP
+       select GENERIC_GETTIMEOFDAY
        select GENERIC_IRQ_SHOW
        select GENERIC_IRQ_SHOW_LEVEL
        select GENERIC_PCI_IOMAP                if PCI
@@ -178,13 +181,15 @@ config PPC
        select GENERIC_STRNCPY_FROM_USER
        select GENERIC_STRNLEN_USER
        select GENERIC_TIME_VSYSCALL
-       select GENERIC_GETTIMEOFDAY
+       select GENERIC_VDSO_TIME_NS
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_HUGE_VMAP              if PPC_BOOK3S_64 && PPC_RADIX_MMU
        select HAVE_ARCH_JUMP_LABEL
+       select HAVE_ARCH_JUMP_LABEL_RELATIVE
        select HAVE_ARCH_KASAN                  if PPC32 && PPC_PAGE_SHIFT <= 14
        select HAVE_ARCH_KASAN_VMALLOC          if PPC32 && PPC_PAGE_SHIFT <= 14
        select HAVE_ARCH_KGDB
+       select HAVE_ARCH_KFENCE                 if PPC32
        select HAVE_ARCH_MMAP_RND_BITS
        select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if COMPAT
        select HAVE_ARCH_NVRAM_OPS
@@ -192,7 +197,6 @@ config PPC
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ASM_MODVERSIONS
        select HAVE_C_RECORDMCOUNT
-       select HAVE_CBPF_JIT                    if !PPC64
        select HAVE_STACKPROTECTOR              if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
        select HAVE_STACKPROTECTOR              if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
        select HAVE_CONTEXT_TRACKING            if PPC64
@@ -200,7 +204,7 @@ config PPC
        select HAVE_DEBUG_STACKOVERFLOW
        select HAVE_DYNAMIC_FTRACE
        select HAVE_DYNAMIC_FTRACE_WITH_REGS    if MPROFILE_KERNEL
-       select HAVE_EBPF_JIT                    if PPC64
+       select HAVE_EBPF_JIT
        select HAVE_EFFICIENT_UNALIGNED_ACCESS  if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
        select HAVE_FAST_GUP
        select HAVE_FTRACE_MCOUNT_RECORD
@@ -224,8 +228,8 @@ config PPC
        select HAVE_LIVEPATCH                   if HAVE_DYNAMIC_FTRACE_WITH_REGS
        select HAVE_MOD_ARCH_SPECIFIC
        select HAVE_NMI                         if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
-       select HAVE_HARDLOCKUP_DETECTOR_ARCH    if (PPC64 && PPC_BOOK3S)
-       select HAVE_OPTPROBES                   if PPC64
+       select HAVE_HARDLOCKUP_DETECTOR_ARCH    if PPC64 && PPC_BOOK3S && SMP
+       select HAVE_OPTPROBES
        select HAVE_PERF_EVENTS
        select HAVE_PERF_EVENTS_NMI             if PPC64
        select HAVE_HARDLOCKUP_DETECTOR_PERF    if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
@@ -234,7 +238,7 @@ config PPC
        select MMU_GATHER_RCU_TABLE_FREE
        select MMU_GATHER_PAGE_SIZE
        select HAVE_REGS_AND_STACK_ACCESS_API
-       select HAVE_RELIABLE_STACKTRACE         if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
+       select HAVE_RELIABLE_STACKTRACE
        select HAVE_SOFTIRQ_ON_OWN_STACK
        select HAVE_SYSCALL_TRACEPOINTS
        select HAVE_VIRT_CPU_ACCOUNTING
@@ -786,7 +790,7 @@ config THREAD_SHIFT
 config DATA_SHIFT_BOOL
        bool "Set custom data alignment"
        depends on ADVANCED_OPTIONS
-       depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC
+       depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE
        depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX)
        help
          This option allows you to set the kernel data alignment. When
@@ -798,13 +802,13 @@ config DATA_SHIFT_BOOL
 config DATA_SHIFT
        int "Data shift" if DATA_SHIFT_BOOL
        default 24 if STRICT_KERNEL_RWX && PPC64
-       range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32
-       range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx
+       range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
+       range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
        default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
-       default 18 if DEBUG_PAGEALLOC && PPC_BOOK3S_32
+       default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
        default 23 if STRICT_KERNEL_RWX && PPC_8xx
-       default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA
-       default 19 if DEBUG_PAGEALLOC && PPC_8xx
+       default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA
+       default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
        default PPC_PAGE_SHIFT
        help
          On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO.
@@ -1217,7 +1221,7 @@ config TASK_SIZE_BOOL
 config TASK_SIZE
        hex "Size of user task space" if TASK_SIZE_BOOL
        default "0x80000000" if PPC_8xx
-       default "0xb0000000" if PPC_BOOK3S_32 && STRICT_KERNEL_RWX
+       default "0xb0000000" if PPC_BOOK3S_32
        default "0xc0000000"
 endmenu
 
index ae08435..6342f9d 100644 (file)
@@ -353,6 +353,7 @@ config PPC_EARLY_DEBUG_CPM_ADDR
 config FAIL_IOMMU
        bool "Fault-injection capability for IOMMU"
        depends on FAULT_INJECTION
+       depends on PCI || IBMVIO
        help
          Provide fault-injection capability for IOMMU. Each device can
          be selectively enabled via the fail_iommu property.
index 5f8544c..3212d07 100644 (file)
@@ -181,12 +181,6 @@ CC_FLAGS_FTRACE := -pg
 ifdef CONFIG_MPROFILE_KERNEL
 CC_FLAGS_FTRACE += -mprofile-kernel
 endif
-# Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828
-ifndef CONFIG_CC_IS_CLANG
-CC_FLAGS_FTRACE        += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog)
-endif
 endif
 
 CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU))
@@ -444,12 +438,15 @@ endif
 endif
 
 ifdef CONFIG_SMP
+ifdef CONFIG_PPC32
 prepare: task_cpu_prepare
 
 PHONY += task_cpu_prepare
 task_cpu_prepare: prepare0
        $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TASK_CPU") print $$3;}' include/generated/asm-offsets.h))
-endif
+
+endif # CONFIG_PPC32
+endif # CONFIG_SMP
 
 PHONY += checkbin
 # Check toolchain versions:
index 4f05a66..701811c 100644 (file)
@@ -50,6 +50,7 @@ CONFIG_PPC_TRANSACTIONAL_MEM=y
 CONFIG_KEXEC=y
 CONFIG_KEXEC_FILE=y
 CONFIG_CRASH_DUMP=y
+CONFIG_FA_DUMP=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_PPC_64K_PAGES=y
 CONFIG_SCHED_SMT=y
@@ -177,6 +178,7 @@ CONFIG_CHELSIO_T1=m
 CONFIG_BE2NET=m
 CONFIG_IBMVETH=m
 CONFIG_EHEA=m
+CONFIG_IBMVNIC=m
 CONFIG_E100=y
 CONFIG_E1000=y
 CONFIG_E1000E=y
index 7772217..50168dd 100644 (file)
@@ -41,6 +41,7 @@ CONFIG_DTL=y
 CONFIG_SCANLOG=m
 CONFIG_PPC_SMLPAR=y
 CONFIG_IBMEBUS=y
+CONFIG_PAPR_SCM=m
 CONFIG_PPC_SVM=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_RTAS_FLASH=m
@@ -159,6 +160,7 @@ CONFIG_BE2NET=m
 CONFIG_S2IO=m
 CONFIG_IBMVETH=y
 CONFIG_EHEA=y
+CONFIG_IBMVNIC=y
 CONFIG_E100=y
 CONFIG_E1000=y
 CONFIG_E1000E=y
index e1f9b4e..bcf95ce 100644 (file)
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table_32.h
 generated-y += syscall_table_64.h
-generated-y += syscall_table_c32.h
 generated-y += syscall_table_spu.h
 generic-y += export.h
 generic-y += kvm_types.h
index 939f3c9..1c7b758 100644 (file)
@@ -77,8 +77,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
                      u32 len_high, u32 len_low);
 long sys_switch_endian(void);
-notrace unsigned int __check_irq_replay(void);
-void notrace restore_interrupts(void);
 
 /* prom_init (OpenFirmware) */
 unsigned long __init prom_init(unsigned long r3, unsigned long r4,
index aecfde8..7ae29cf 100644 (file)
@@ -80,22 +80,6 @@ do {                                                                 \
        ___p1;                                                          \
 })
 
-#ifdef CONFIG_PPC64
-#define smp_cond_load_relaxed(ptr, cond_expr) ({               \
-       typeof(ptr) __PTR = (ptr);                              \
-       __unqual_scalar_typeof(*ptr) VAL;                       \
-       VAL = READ_ONCE(*__PTR);                                \
-       if (unlikely(!(cond_expr))) {                           \
-               spin_begin();                                   \
-               do {                                            \
-                       VAL = READ_ONCE(*__PTR);                \
-               } while (!(cond_expr));                         \
-               spin_end();                                     \
-       }                                                       \
-       (typeof(*ptr))VAL;                                      \
-})
-#endif
-
 #ifdef CONFIG_PPC_BOOK3S_64
 #define NOSPEC_BARRIER_SLOT   nop
 #elif defined(CONFIG_PPC_FSL_BOOK3E)
index 73bc5d2..1670dfe 100644 (file)
@@ -5,86 +5,7 @@
 #include <asm/bug.h>
 #include <asm/book3s/32/mmu-hash.h>
 
-#ifdef __ASSEMBLY__
-
-.macro kuep_update_sr  gpr1, gpr2              /* NEVER use r0 as gpr2 due to addis */
-101:   mtsrin  \gpr1, \gpr2
-       addi    \gpr1, \gpr1, 0x111             /* next VSID */
-       rlwinm  \gpr1, \gpr1, 0, 0xf0ffffff     /* clear VSID overflow */
-       addis   \gpr2, \gpr2, 0x1000            /* address of next segment */
-       bdnz    101b
-       isync
-.endm
-
-.macro kuep_lock       gpr1, gpr2
-#ifdef CONFIG_PPC_KUEP
-       li      \gpr1, NUM_USER_SEGMENTS
-       li      \gpr2, 0
-       mtctr   \gpr1
-       mfsrin  \gpr1, \gpr2
-       oris    \gpr1, \gpr1, SR_NX@h           /* set Nx */
-       kuep_update_sr \gpr1, \gpr2
-#endif
-.endm
-
-.macro kuep_unlock     gpr1, gpr2
-#ifdef CONFIG_PPC_KUEP
-       li      \gpr1, NUM_USER_SEGMENTS
-       li      \gpr2, 0
-       mtctr   \gpr1
-       mfsrin  \gpr1, \gpr2
-       rlwinm  \gpr1, \gpr1, 0, ~SR_NX         /* Clear Nx */
-       kuep_update_sr \gpr1, \gpr2
-#endif
-.endm
-
-#ifdef CONFIG_PPC_KUAP
-
-.macro kuap_update_sr  gpr1, gpr2, gpr3        /* NEVER use r0 as gpr2 due to addis */
-101:   mtsrin  \gpr1, \gpr2
-       addi    \gpr1, \gpr1, 0x111             /* next VSID */
-       rlwinm  \gpr1, \gpr1, 0, 0xf0ffffff     /* clear VSID overflow */
-       addis   \gpr2, \gpr2, 0x1000            /* address of next segment */
-       cmplw   \gpr2, \gpr3
-       blt-    101b
-       isync
-.endm
-
-.macro kuap_save_and_lock      sp, thread, gpr1, gpr2, gpr3
-       lwz     \gpr2, KUAP(\thread)
-       rlwinm. \gpr3, \gpr2, 28, 0xf0000000
-       stw     \gpr2, STACK_REGS_KUAP(\sp)
-       beq+    102f
-       li      \gpr1, 0
-       stw     \gpr1, KUAP(\thread)
-       mfsrin  \gpr1, \gpr2
-       oris    \gpr1, \gpr1, SR_KS@h   /* set Ks */
-       kuap_update_sr  \gpr1, \gpr2, \gpr3
-102:
-.endm
-
-.macro kuap_restore    sp, current, gpr1, gpr2, gpr3
-       lwz     \gpr2, STACK_REGS_KUAP(\sp)
-       rlwinm. \gpr3, \gpr2, 28, 0xf0000000
-       stw     \gpr2, THREAD + KUAP(\current)
-       beq+    102f
-       mfsrin  \gpr1, \gpr2
-       rlwinm  \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */
-       kuap_update_sr  \gpr1, \gpr2, \gpr3
-102:
-.endm
-
-.macro kuap_check      current, gpr
-#ifdef CONFIG_PPC_KUAP_DEBUG
-       lwz     \gpr, THREAD + KUAP(\current)
-999:   twnei   \gpr, 0
-       EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
-#endif
-.endm
-
-#endif /* CONFIG_PPC_KUAP */
-
-#else /* !__ASSEMBLY__ */
+#ifndef __ASSEMBLY__
 
 #ifdef CONFIG_PPC_KUAP
 
@@ -103,6 +24,51 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
        isync();        /* Context sync required after mtsr() */
 }
 
+static inline void kuap_save_and_lock(struct pt_regs *regs)
+{
+       unsigned long kuap = current->thread.kuap;
+       u32 addr = kuap & 0xf0000000;
+       u32 end = kuap << 28;
+
+       regs->kuap = kuap;
+       if (unlikely(!kuap))
+               return;
+
+       current->thread.kuap = 0;
+       kuap_update_sr(mfsr(addr) | SR_KS, addr, end);  /* Set Ks */
+}
+
+static inline void kuap_user_restore(struct pt_regs *regs)
+{
+}
+
+static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+       u32 addr = regs->kuap & 0xf0000000;
+       u32 end = regs->kuap << 28;
+
+       current->thread.kuap = regs->kuap;
+
+       if (unlikely(regs->kuap == kuap))
+               return;
+
+       kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */
+}
+
+static inline unsigned long kuap_get_and_assert_locked(void)
+{
+       unsigned long kuap = current->thread.kuap;
+
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0);
+
+       return kuap;
+}
+
+static inline void kuap_assert_locked(void)
+{
+       kuap_get_and_assert_locked();
+}
+
 static __always_inline void allow_user_access(void __user *to, const void __user *from,
                                              u32 size, unsigned long dir)
 {
index 415ae29..83c6584 100644 (file)
@@ -194,10 +194,8 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
 #define VMALLOC_END    ioremap_bot
 #endif
 
-#ifdef CONFIG_STRICT_KERNEL_RWX
 #define MODULES_END    ALIGN_DOWN(PAGE_OFFSET, SZ_256M)
 #define MODULES_VADDR  (MODULES_END - SZ_256M)
-#endif
 
 #ifndef __ASSEMBLY__
 #include <linux/sched.h>
index d941c06..ba1743c 100644 (file)
@@ -79,4 +79,4 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
        flush_tlb_mm(mm);
 }
 
-#endif /* _ASM_POWERPC_TLBFLUSH_H */
+#endif /* _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H */
index 8bd9050..9700da3 100644 (file)
@@ -287,7 +287,7 @@ static inline void kuap_kernel_restore(struct pt_regs *regs,
         */
 }
 
-static inline unsigned long kuap_get_and_check_amr(void)
+static inline unsigned long kuap_get_and_assert_locked(void)
 {
        if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
                unsigned long amr = mfspr(SPRN_AMR);
@@ -298,27 +298,7 @@ static inline unsigned long kuap_get_and_check_amr(void)
        return 0;
 }
 
-#else /* CONFIG_PPC_PKEY */
-
-static inline void kuap_user_restore(struct pt_regs *regs)
-{
-}
-
-static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
-{
-}
-
-static inline unsigned long kuap_get_and_check_amr(void)
-{
-       return 0;
-}
-
-#endif /* CONFIG_PPC_PKEY */
-
-
-#ifdef CONFIG_PPC_KUAP
-
-static inline void kuap_check_amr(void)
+static inline void kuap_assert_locked(void)
 {
        if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
                WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
index f911bdb..3004f33 100644 (file)
@@ -18,7 +18,6 @@
  * complete pgtable.h but only a portion of it.
  */
 #include <asm/book3s/64/pgtable.h>
-#include <asm/bug.h>
 #include <asm/task_size_64.h>
 #include <asm/cpu_has_feature.h>
 
index 058601e..a666d56 100644 (file)
@@ -7,6 +7,7 @@
 #ifndef __ASSEMBLY__
 #include <linux/mmdebug.h>
 #include <linux/bug.h>
+#include <linux/sizes.h>
 #endif
 
 /*
  */
 #define _PAGE_KERNEL_RW                (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
 #define _PAGE_KERNEL_RO                 (_PAGE_PRIVILEGED | _PAGE_READ)
+#define _PAGE_KERNEL_ROX        (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC)
 #define _PAGE_KERNEL_RWX       (_PAGE_PRIVILEGED | _PAGE_DIRTY |       \
                                 _PAGE_RW | _PAGE_EXEC)
 /*
@@ -323,7 +325,8 @@ extern unsigned long pci_io_base;
 #define  PHB_IO_END    (KERN_IO_START + FULL_IO_SIZE)
 #define IOREMAP_BASE   (PHB_IO_END)
 #define IOREMAP_START  (ioremap_bot)
-#define IOREMAP_END    (KERN_IO_END)
+#define IOREMAP_END    (KERN_IO_END - FIXADDR_SIZE)
+#define FIXADDR_SIZE   SZ_32M
 
 /* Advertise special mapping type for AGP */
 #define HAVE_PAGE_AGP
index c7813dc..59cab55 100644 (file)
@@ -222,8 +222,10 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr,
         * from ptesync, it should probably go into update_mmu_cache, rather
         * than set_pte_at (which is used to set ptes unrelated to faults).
         *
-        * Spurious faults to vmalloc region are not tolerated, so there is
-        * a ptesync in flush_cache_vmap.
+        * Spurious faults from the kernel memory are not tolerated, so there
+        * is a ptesync in flush_cache_vmap, and __map_kernel_page() follows
+        * the pte update sequence from ISA Book III 6.10 Translation Table
+        * Update Synchronization Requirements.
         */
 }
 
index d1635ff..0b21628 100644 (file)
 #ifndef __ASSEMBLY__
 
 struct pt_regs;
-long do_page_fault(struct pt_regs *);
-long hash__do_page_fault(struct pt_regs *);
+void hash__do_page_fault(struct pt_regs *);
 void bad_page_fault(struct pt_regs *, int);
-void __bad_page_fault(struct pt_regs *regs, int sig);
-void do_bad_page_fault_segv(struct pt_regs *regs);
 extern void _exception(int, struct pt_regs *, int, unsigned long);
 extern void _exception_pkey(struct pt_regs *, unsigned long, int);
 extern void die(const char *, struct pt_regs *, long);
index f634951..7564dd4 100644 (file)
@@ -30,7 +30,19 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end)
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page *page);
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean.  We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+static inline void flush_dcache_page(struct page *page)
+{
+       if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+               return;
+       /* avoid an atomic op if possible */
+       if (test_bit(PG_dcache_clean, &page->flags))
+               clear_bit(PG_dcache_clean, &page->flags);
+}
 
 void flush_icache_range(unsigned long start, unsigned long stop);
 #define flush_icache_range flush_icache_range
@@ -40,7 +52,6 @@ void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
 #define flush_icache_user_page flush_icache_user_page
 
 void flush_dcache_icache_page(struct page *page);
-void __flush_dcache_icache(void *page);
 
 /**
  * flush_dcache_range(): Write any modified data cache blocks out to memory and
index 2211b93..bda4578 100644 (file)
@@ -594,7 +594,7 @@ typedef struct fcc_enet {
        uint    fen_p256c;      /* Total packets 256 < bytes <= 511 */
        uint    fen_p512c;      /* Total packets 512 < bytes <= 1023 */
        uint    fen_p1024c;     /* Total packets 1024 < bytes <= 1518 */
-       uint    fen_cambuf;     /* Internal CAM buffer poiner */
+       uint    fen_cambuf;     /* Internal CAM buffer pointer */
        ushort  fen_rfthr;      /* Received frames threshold */
        ushort  fen_rfcnt;      /* Received frames count */
 } fcc_enet_t;
index 8d03c16..947b5b9 100644 (file)
 #include <asm/kmap_size.h>
 #endif
 
+#ifdef CONFIG_PPC64
+#define FIXADDR_TOP    (IOREMAP_END + FIXADDR_SIZE)
+#else
+#define FIXADDR_SIZE   0
 #ifdef CONFIG_KASAN
 #include <asm/kasan.h>
 #define FIXADDR_TOP    (KASAN_SHADOW_START - PAGE_SIZE)
 #else
 #define FIXADDR_TOP    ((unsigned long)(-PAGE_SIZE))
 #endif
+#endif
 
 /*
  * Here we define all the compile-time 'special' virtual
@@ -50,6 +55,7 @@
  */
 enum fixed_addresses {
        FIX_HOLE,
+#ifdef CONFIG_PPC32
        /* reserve the top 128K for early debugging purposes */
        FIX_EARLY_DEBUG_TOP = FIX_HOLE,
        FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1,
@@ -72,6 +78,7 @@ enum fixed_addresses {
                       FIX_IMMR_SIZE,
 #endif
        /* FIX_PCIE_MCFG, */
+#endif /* CONFIG_PPC32 */
        __end_of_permanent_fixed_addresses,
 
 #define NR_FIX_BTMAPS          (SZ_256K / PAGE_SIZE)
@@ -98,6 +105,8 @@ enum fixed_addresses {
 static inline void __set_fixmap(enum fixed_addresses idx,
                                phys_addr_t phys, pgprot_t flags)
 {
+       BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && __FIXADDR_SIZE > FIXADDR_SIZE);
+
        if (__builtin_constant_p(idx))
                BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
        else if (WARN_ON(idx >= __end_of_fixed_addresses))
index e93ee32..b3001f8 100644 (file)
@@ -33,9 +33,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
 {
        int oldval = 0, ret;
 
-       if (!access_ok(uaddr, sizeof(u32)))
+       if (!user_access_begin(uaddr, sizeof(u32)))
                return -EFAULT;
-       allow_read_write_user(uaddr, uaddr, sizeof(*uaddr));
 
        switch (op) {
        case FUTEX_OP_SET:
@@ -56,10 +55,10 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
        default:
                ret = -ENOSYS;
        }
+       user_access_end();
 
        *oval = oldval;
 
-       prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr));
        return ret;
 }
 
@@ -70,11 +69,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
        int ret = 0;
        u32 prev;
 
-       if (!access_ok(uaddr, sizeof(u32)))
+       if (!user_access_begin(uaddr, sizeof(u32)))
                return -EFAULT;
 
-       allow_read_write_user(uaddr, uaddr, sizeof(*uaddr));
-
         __asm__ __volatile__ (
         PPC_ATOMIC_ENTRY_BARRIER
 "1:     lwarx   %1,0,%3         # futex_atomic_cmpxchg_inatomic\n\
@@ -93,8 +90,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
         : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)
         : "cc", "memory");
 
+       user_access_end();
+
        *uval = prev;
-       prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr));
 
         return ret;
 }
index ed6086d..4430509 100644 (file)
 #define H_SCM_HEALTH            0x400
 #define H_SCM_PERFORMANCE_STATS 0x418
 #define H_RPT_INVALIDATE       0x448
-#define MAX_HCALL_OPCODE       H_RPT_INVALIDATE
+#define H_SCM_FLUSH            0x44C
+#define MAX_HCALL_OPCODE       H_SCM_FLUSH
 
 /* Scope args for H_SCM_UNBIND_ALL */
 #define H_UNBIND_SCOPE_ALL (0x1)
 #define H_CPU_BEHAV_FAVOUR_SECURITY    (1ull << 63) // IBM bit 0
 #define H_CPU_BEHAV_L1D_FLUSH_PR       (1ull << 62) // IBM bit 1
 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR  (1ull << 61) // IBM bit 2
+#define H_CPU_BEHAV_FAVOUR_SECURITY_H  (1ull << 60) // IBM bit 3
 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE  (1ull << 58) // IBM bit 5
 #define H_CPU_BEHAV_FLUSH_LINK_STACK   (1ull << 57) // IBM bit 6
 
index 999ed5a..ccb2034 100644 (file)
@@ -24,5 +24,8 @@
 extern int hvc_get_chars(uint32_t vtermno, char *buf, int count);
 extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count);
 
+/* Provided by HVC VIO */
+void hvc_vio_init_early(void);
+
 #endif /* __KERNEL__ */
 #endif /* _PPC64_HVCONSOLE_H */
index ae02eb5..d024447 100644 (file)
@@ -94,8 +94,6 @@ extern volatile struct Hydra __iomem *Hydra;
 #define HYDRA_INT_EXT7         18      /* Power Off Request */
 #define HYDRA_INT_SPARE                19
 
-extern int hydra_init(void);
-
 #endif /* __KERNEL__ */
 
 #endif /* _ASMPPC_HYDRA_H */
index cc73c12..268d3bd 100644 (file)
@@ -4,6 +4,40 @@
 
 #include <asm/ppc-opcode.h>
 
+#ifdef CONFIG_PPC64
+
+#define ___get_user_instr(gu_op, dest, ptr)                            \
+({                                                                     \
+       long __gui_ret = 0;                                             \
+       unsigned long __gui_ptr = (unsigned long)ptr;                   \
+       struct ppc_inst __gui_inst;                                     \
+       unsigned int __prefix, __suffix;                                \
+       __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr);  \
+       if (__gui_ret == 0) {                                           \
+               if ((__prefix >> 26) == OP_PREFIX) {                    \
+                       __gui_ret = gu_op(__suffix,                     \
+                               (unsigned int __user *)__gui_ptr + 1);  \
+                       __gui_inst = ppc_inst_prefix(__prefix,          \
+                                                    __suffix);         \
+               } else {                                                \
+                       __gui_inst = ppc_inst(__prefix);                \
+               }                                                       \
+               if (__gui_ret == 0)                                     \
+                       (dest) = __gui_inst;                            \
+       }                                                               \
+       __gui_ret;                                                      \
+})
+#else /* !CONFIG_PPC64 */
+#define ___get_user_instr(gu_op, dest, ptr)                            \
+       gu_op((dest).val, (u32 __user *)(ptr))
+#endif /* CONFIG_PPC64 */
+
+#define get_user_instr(x, ptr) \
+       ___get_user_instr(get_user, x, ptr)
+
+#define __get_user_instr(x, ptr) \
+       ___get_user_instr(__get_user, x, ptr)
+
 /*
  * Instruction data type for POWER
  */
@@ -68,6 +102,8 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
 
 #define ppc_inst(x) ((struct ppc_inst){ .val = x })
 
+#define ppc_inst_prefix(x, y) ppc_inst(x)
+
 static inline bool ppc_inst_prefixed(struct ppc_inst x)
 {
        return false;
@@ -113,13 +149,14 @@ static inline struct ppc_inst *ppc_inst_next(void *location, struct ppc_inst *va
        return location + ppc_inst_len(tmp);
 }
 
-static inline u64 ppc_inst_as_u64(struct ppc_inst x)
+static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x)
 {
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-       return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x);
-#else
-       return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x);
-#endif
+       if (IS_ENABLED(CONFIG_PPC32))
+               return ppc_inst_val(x);
+       else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+               return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x);
+       else
+               return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x);
 }
 
 #define PPC_INST_STR_LEN sizeof("00000000 00000000")
@@ -141,10 +178,6 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins
        __str;                          \
 })
 
-int probe_user_read_inst(struct ppc_inst *inst,
-                        struct ppc_inst __user *nip);
-
-int probe_kernel_read_inst(struct ppc_inst *inst,
-                          struct ppc_inst *src);
+int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src);
 
 #endif /* _ASM_POWERPC_INST_H */
index e8d09a8..44cde2e 100644 (file)
@@ -2,6 +2,70 @@
 #ifndef _ASM_POWERPC_INTERRUPT_H
 #define _ASM_POWERPC_INTERRUPT_H
 
+/* BookE/4xx */
+#define INTERRUPT_CRITICAL_INPUT  0x100
+
+/* BookE */
+#define INTERRUPT_DEBUG           0xd00
+#ifdef CONFIG_BOOKE
+#define INTERRUPT_PERFMON         0x260
+#define INTERRUPT_DOORBELL        0x280
+#endif
+
+/* BookS/4xx/8xx */
+#define INTERRUPT_MACHINE_CHECK   0x200
+
+/* BookS/8xx */
+#define INTERRUPT_SYSTEM_RESET    0x100
+
+/* BookS */
+#define INTERRUPT_DATA_SEGMENT    0x380
+#define INTERRUPT_INST_SEGMENT    0x480
+#define INTERRUPT_TRACE           0xd00
+#define INTERRUPT_H_DATA_STORAGE  0xe00
+#define INTERRUPT_HMI                  0xe60
+#define INTERRUPT_H_FAC_UNAVAIL   0xf80
+#ifdef CONFIG_PPC_BOOK3S
+#define INTERRUPT_DOORBELL        0xa00
+#define INTERRUPT_PERFMON         0xf00
+#define INTERRUPT_ALTIVEC_UNAVAIL      0xf20
+#endif
+
+/* BookE/BookS/4xx/8xx */
+#define INTERRUPT_DATA_STORAGE    0x300
+#define INTERRUPT_INST_STORAGE    0x400
+#define INTERRUPT_EXTERNAL             0x500
+#define INTERRUPT_ALIGNMENT       0x600
+#define INTERRUPT_PROGRAM         0x700
+#define INTERRUPT_SYSCALL         0xc00
+#define INTERRUPT_TRACE                        0xd00
+
+/* BookE/BookS/44x */
+#define INTERRUPT_FP_UNAVAIL      0x800
+
+/* BookE/BookS/44x/8xx */
+#define INTERRUPT_DECREMENTER     0x900
+
+#ifndef INTERRUPT_PERFMON
+#define INTERRUPT_PERFMON         0x0
+#endif
+
+/* 8xx */
+#define INTERRUPT_SOFT_EMU_8xx         0x1000
+#define INTERRUPT_INST_TLB_MISS_8xx    0x1100
+#define INTERRUPT_DATA_TLB_MISS_8xx    0x1200
+#define INTERRUPT_INST_TLB_ERROR_8xx   0x1300
+#define INTERRUPT_DATA_TLB_ERROR_8xx   0x1400
+#define INTERRUPT_DATA_BREAKPOINT_8xx  0x1c00
+#define INTERRUPT_INST_BREAKPOINT_8xx  0x1d00
+
+/* 603 */
+#define INTERRUPT_INST_TLB_MISS_603            0x1000
+#define INTERRUPT_DATA_LOAD_TLB_MISS_603       0x1100
+#define INTERRUPT_DATA_STORE_TLB_MISS_603      0x1200
+
+#ifndef __ASSEMBLY__
+
 #include <linux/context_tracking.h>
 #include <linux/hardirq.h>
 #include <asm/cputime.h>
 #include <asm/kprobes.h>
 #include <asm/runlatch.h>
 
-struct interrupt_state {
-#ifdef CONFIG_PPC_BOOK3E_64
-       enum ctx_state ctx_state;
+static inline void nap_adjust_return(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_970_NAP
+       if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
+               /* Can avoid a test-and-clear because NMIs do not call this */
+               clear_thread_local_flags(_TLF_NAPPING);
+               regs->nip = (unsigned long)power4_idle_nap_return;
+       }
 #endif
+}
+
+struct interrupt_state {
 };
 
 static inline void booke_restore_dbcr0(void)
@@ -29,10 +101,19 @@ static inline void booke_restore_dbcr0(void)
 
 static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
 {
-       /*
-        * Book3E reconciles irq soft mask in asm
-        */
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC32
+       if (!arch_irq_disabled_regs(regs))
+               trace_hardirqs_off();
+
+       if (user_mode(regs)) {
+               kuep_lock();
+               account_cpu_user_entry();
+       } else {
+               kuap_save_and_lock(regs);
+       }
+#endif
+
+#ifdef CONFIG_PPC64
        if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
                trace_hardirqs_off();
        local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
@@ -48,16 +129,12 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
                 * CT_WARN_ON comes here via program_check_exception,
                 * so avoid recursion.
                 */
-               if (TRAP(regs) != 0x700)
+               if (TRAP(regs) != INTERRUPT_PROGRAM)
                        CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
        }
 #endif
 
-#ifdef CONFIG_PPC_BOOK3E_64
-       state->ctx_state = exception_enter();
-       if (user_mode(regs))
-               account_cpu_user_entry();
-#endif
+       booke_restore_dbcr0();
 }
 
 /*
@@ -76,23 +153,8 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
  */
 static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
 {
-#ifdef CONFIG_PPC_BOOK3E_64
-       exception_exit(state->ctx_state);
-#endif
-
-       /*
-        * Book3S exits to user via interrupt_exit_user_prepare(), which does
-        * context tracking, which is a cleaner way to handle PREEMPT=y
-        * and avoid context entry/exit in e.g., preempt_schedule_irq()),
-        * which is likely to be where the core code wants to end up.
-        *
-        * The above comment explains why we can't do the
-        *
-        *     if (user_mode(regs))
-        *         user_exit_irqoff();
-        *
-        * sequence here.
-        */
+       if (user_mode(regs))
+               kuep_unlock();
 }
 
 static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
@@ -109,24 +171,46 @@ static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct in
 
 static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
 {
+       /*
+        * Adjust at exit so the main handler sees the true NIA. This must
+        * come before irq_exit() because irq_exit can enable interrupts, and
+        * if another interrupt is taken before nap_adjust_return has run
+        * here, then that interrupt would return directly to idle nap return.
+        */
+       nap_adjust_return(regs);
+
        irq_exit();
        interrupt_exit_prepare(regs, state);
 }
 
 struct interrupt_nmi_state {
 #ifdef CONFIG_PPC64
-#ifdef CONFIG_PPC_BOOK3S_64
        u8 irq_soft_mask;
        u8 irq_happened;
-#endif
        u8 ftrace_enabled;
 #endif
 };
 
+static inline bool nmi_disables_ftrace(struct pt_regs *regs)
+{
+       /* Allow DEC and PMI to be traced when they are soft-NMI */
+       if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
+               if (TRAP(regs) == INTERRUPT_DECREMENTER)
+                      return false;
+               if (TRAP(regs) == INTERRUPT_PERFMON)
+                      return false;
+       }
+       if (IS_ENABLED(CONFIG_PPC_BOOK3E)) {
+               if (TRAP(regs) == INTERRUPT_PERFMON)
+                       return false;
+       }
+
+       return true;
+}
+
 static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
 {
 #ifdef CONFIG_PPC64
-#ifdef CONFIG_PPC_BOOK3S_64
        state->irq_soft_mask = local_paca->irq_soft_mask;
        state->irq_happened = local_paca->irq_happened;
 
@@ -139,9 +223,8 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
        local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 
        /* Don't do any per-CPU operations until interrupt state is fixed */
-#endif
-       /* Allow DEC and PMI to be traced when they are soft-NMI */
-       if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) {
+
+       if (nmi_disables_ftrace(regs)) {
                state->ftrace_enabled = this_cpu_get_ftrace_enabled();
                this_cpu_set_ftrace_enabled(0);
        }
@@ -164,17 +247,20 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
                        radix_enabled() || (mfmsr() & MSR_DR))
                nmi_exit();
 
+       /*
+        * nmi does not call nap_adjust_return because nmi should not create
+        * new work to do (must use irq_work for that).
+        */
+
 #ifdef CONFIG_PPC64
-       if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260)
+       if (nmi_disables_ftrace(regs))
                this_cpu_set_ftrace_enabled(state->ftrace_enabled);
 
-#ifdef CONFIG_PPC_BOOK3S_64
        /* Check we didn't change the pending interrupt mask. */
        WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened);
        local_paca->irq_happened = state->irq_happened;
        local_paca->irq_soft_mask = state->irq_soft_mask;
 #endif
-#endif
 }
 
 /*
@@ -387,6 +473,7 @@ DECLARE_INTERRUPT_HANDLER(SMIException);
 DECLARE_INTERRUPT_HANDLER(handle_hmi_exception);
 DECLARE_INTERRUPT_HANDLER(unknown_exception);
 DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception);
+DECLARE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception);
 DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception);
 DECLARE_INTERRUPT_HANDLER(RunModeException);
 DECLARE_INTERRUPT_HANDLER(single_step_exception);
@@ -410,7 +497,7 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception);
 DECLARE_INTERRUPT_HANDLER(CacheLockingException);
 DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException);
 DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException);
-DECLARE_INTERRUPT_HANDLER(WatchdogException);
+DECLARE_INTERRUPT_HANDLER_NMI(WatchdogException);
 DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
 
 /* slb.c */
@@ -421,7 +508,7 @@ DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault);
 DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault);
 
 /* fault.c */
-DECLARE_INTERRUPT_HANDLER_RET(do_page_fault);
+DECLARE_INTERRUPT_HANDLER(do_page_fault);
 DECLARE_INTERRUPT_HANDLER(do_bad_page_fault_segv);
 
 /* process.c */
@@ -436,7 +523,7 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode);
 
 DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException);
 
-void unrecoverable_exception(struct pt_regs *regs);
+void __noreturn unrecoverable_exception(struct pt_regs *regs);
 
 void replay_system_reset(void);
 void replay_soft_interrupts(void);
@@ -447,4 +534,6 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
                local_irq_enable();
 }
 
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ASM_POWERPC_INTERRUPT_H */
index f3f264e..b2bd588 100644 (file)
@@ -53,8 +53,6 @@ extern void *mcheckirq_ctx[NR_CPUS];
 extern void *hardirq_ctx[NR_CPUS];
 extern void *softirq_ctx[NR_CPUS];
 
-void call_do_softirq(void *sp);
-void call_do_irq(struct pt_regs *regs, void *sp);
 extern void do_IRQ(struct pt_regs *regs);
 extern void __init init_IRQ(void);
 extern void __do_irq(struct pt_regs *regs);
index 09297ec..2d5c6be 100644 (file)
@@ -20,7 +20,8 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
        asm_volatile_goto("1:\n\t"
                 "nop # arch_static_branch\n\t"
                 ".pushsection __jump_table,  \"aw\"\n\t"
-                JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
+                ".long 1b - ., %l[l_yes] - .\n\t"
+                JUMP_ENTRY_TYPE "%c0 - .\n\t"
                 ".popsection \n\t"
                 : :  "i" (&((char *)key)[branch]) : : l_yes);
 
@@ -34,7 +35,8 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
        asm_volatile_goto("1:\n\t"
                 "b %l[l_yes] # arch_static_branch_jump\n\t"
                 ".pushsection __jump_table,  \"aw\"\n\t"
-                JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
+                ".long 1b - ., %l[l_yes] - .\n\t"
+                JUMP_ENTRY_TYPE "%c0 - .\n\t"
                 ".popsection \n\t"
                 : :  "i" (&((char *)key)[branch]) : : l_yes);
 
@@ -43,23 +45,12 @@ l_yes:
        return true;
 }
 
-#ifdef CONFIG_PPC64
-typedef u64 jump_label_t;
-#else
-typedef u32 jump_label_t;
-#endif
-
-struct jump_entry {
-       jump_label_t code;
-       jump_label_t target;
-       jump_label_t key;
-};
-
 #else
 #define ARCH_STATIC_BRANCH(LABEL, KEY)         \
 1098:  nop;                                    \
        .pushsection __jump_table, "aw";        \
-       FTR_ENTRY_LONG 1098b, LABEL, KEY;       \
+       .long 1098b - ., LABEL - .;             \
+       FTR_ENTRY_LONG KEY;                     \
        .popsection
 #endif
 
index 7355ed0..3c478e5 100644 (file)
@@ -19,7 +19,7 @@
 
 #define KASAN_SHADOW_SCALE_SHIFT       3
 
-#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_MODULES) && defined(CONFIG_STRICT_KERNEL_RWX)
+#ifdef CONFIG_MODULES
 #define KASAN_KERN_START       ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
 #else
 #define KASAN_KERN_START       PAGE_OFFSET
diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
new file mode 100644 (file)
index 0000000..a9846b6
--- /dev/null
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * powerpc KFENCE support.
+ *
+ * Copyright (C) 2020 CS GROUP France
+ */
+
+#ifndef __ASM_POWERPC_KFENCE_H
+#define __ASM_POWERPC_KFENCE_H
+
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+
+static inline bool arch_kfence_init_pool(void)
+{
+       return true;
+}
+
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+       pte_t *kpte = virt_to_kpte(addr);
+
+       if (protect) {
+               pte_update(&init_mm, addr, kpte, _PAGE_PRESENT, 0, 0);
+               flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+       } else {
+               pte_update(&init_mm, addr, kpte, 0, _PAGE_PRESENT, 0);
+       }
+
+       return true;
+}
+
+#endif /* __ASM_POWERPC_KFENCE_H */
index 7ec21af..ec96232 100644 (file)
 
 #ifdef __ASSEMBLY__
 #ifndef CONFIG_PPC_KUAP
-.macro kuap_save_and_lock      sp, thread, gpr1, gpr2, gpr3
-.endm
-
-.macro kuap_restore    sp, current, gpr1, gpr2, gpr3
-.endm
-
-.macro kuap_check      current, gpr
-.endm
-
 .macro kuap_check_amr  gpr1, gpr2
 .endm
 
@@ -55,6 +46,14 @@ void setup_kuep(bool disabled);
 static inline void setup_kuep(bool disabled) { }
 #endif /* CONFIG_PPC_KUEP */
 
+#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
+void kuep_lock(void);
+void kuep_unlock(void);
+#else
+static inline void kuep_lock(void) { }
+static inline void kuep_unlock(void) { }
+#endif
+
 #ifdef CONFIG_PPC_KUAP
 void setup_kuap(bool disabled);
 #else
@@ -66,7 +65,15 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
        return false;
 }
 
-static inline void kuap_check_amr(void) { }
+static inline void kuap_assert_locked(void) { }
+static inline void kuap_save_and_lock(struct pt_regs *regs) { }
+static inline void kuap_user_restore(struct pt_regs *regs) { }
+static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { }
+
+static inline unsigned long kuap_get_and_assert_locked(void)
+{
+       return 0;
+}
 
 /*
  * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
index 2f5f919..c581215 100644 (file)
@@ -258,6 +258,8 @@ extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
 extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
                        struct kvm_memory_slot *memslot,
                        unsigned long *map);
+extern unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm,
+                       unsigned long lpcr);
 extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
                        unsigned long mask);
 extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
index 8aacd76..9531b1c 100644 (file)
@@ -767,8 +767,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
                      unsigned long pte_index, unsigned long avpn);
 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu);
 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
-                      unsigned long pte_index, unsigned long avpn,
-                      unsigned long va);
+                      unsigned long pte_index, unsigned long avpn);
 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
                    unsigned long pte_index);
 long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
index 652ce85..4bc45d3 100644 (file)
@@ -263,7 +263,7 @@ extern void arch_exit_mmap(struct mm_struct *mm);
 static inline void arch_unmap(struct mm_struct *mm,
                              unsigned long start, unsigned long end)
 {
-       unsigned long vdso_base = (unsigned long)mm->context.vdso - PAGE_SIZE;
+       unsigned long vdso_base = (unsigned long)mm->context.vdso;
 
        if (start <= vdso_base && vdso_base < end)
                mm->context.vdso = NULL;
index 17a4a61..295ef56 100644 (file)
@@ -7,33 +7,41 @@
 
 #ifdef CONFIG_PPC_KUAP
 
-#ifdef __ASSEMBLY__
-
-.macro kuap_save_and_lock      sp, thread, gpr1, gpr2, gpr3
-       lis     \gpr2, MD_APG_KUAP@h    /* only APG0 and APG1 are used */
-       mfspr   \gpr1, SPRN_MD_AP
-       mtspr   SPRN_MD_AP, \gpr2
-       stw     \gpr1, STACK_REGS_KUAP(\sp)
-.endm
-
-.macro kuap_restore    sp, current, gpr1, gpr2, gpr3
-       lwz     \gpr1, STACK_REGS_KUAP(\sp)
-       mtspr   SPRN_MD_AP, \gpr1
-.endm
-
-.macro kuap_check      current, gpr
-#ifdef CONFIG_PPC_KUAP_DEBUG
-       mfspr   \gpr, SPRN_MD_AP
-       rlwinm  \gpr, \gpr, 16, 0xffff
-999:   twnei   \gpr, MD_APG_KUAP@h
-       EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
-#endif
-.endm
-
-#else /* !__ASSEMBLY__ */
+#ifndef __ASSEMBLY__
 
 #include <asm/reg.h>
 
+static inline void kuap_save_and_lock(struct pt_regs *regs)
+{
+       regs->kuap = mfspr(SPRN_MD_AP);
+       mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+
+static inline void kuap_user_restore(struct pt_regs *regs)
+{
+}
+
+static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+       mtspr(SPRN_MD_AP, regs->kuap);
+}
+
+static inline unsigned long kuap_get_and_assert_locked(void)
+{
+       unsigned long kuap = mfspr(SPRN_MD_AP);
+
+       if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+               WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16);
+
+       return kuap;
+}
+
+static inline void kuap_assert_locked(void)
+{
+       if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+               kuap_get_and_assert_locked();
+}
+
 static inline void allow_user_access(void __user *to, const void __user *from,
                                     unsigned long size, unsigned long dir)
 {
index 4782499..6e4faa0 100644 (file)
 
 #define mmu_linear_psize       MMU_PAGE_8M
 
+#define MODULES_VADDR  (PAGE_OFFSET - SZ_256M)
+#define MODULES_END    PAGE_OFFSET
+
 #ifndef __ASSEMBLY__
 
 #include <linux/mmdebug.h>
index 6cb8aa3..57cd389 100644 (file)
@@ -6,6 +6,8 @@
  * the ppc64 non-hashed page table.
  */
 
+#include <linux/sizes.h>
+
 #include <asm/nohash/64/pgtable-4k.h>
 #include <asm/barrier.h>
 #include <asm/asm-const.h>
@@ -54,7 +56,8 @@
 #define  PHB_IO_END    (KERN_IO_START + FULL_IO_SIZE)
 #define IOREMAP_BASE   (PHB_IO_END)
 #define IOREMAP_START  (ioremap_bot)
-#define IOREMAP_END    (KERN_VIRT_START + KERN_VIRT_SIZE)
+#define IOREMAP_END    (KERN_VIRT_START + KERN_VIRT_SIZE - FIXADDR_SIZE)
+#define FIXADDR_SIZE   SZ_32M
 
 
 /*
index 9986ac3..c761572 100644 (file)
@@ -307,7 +307,7 @@ int opal_secvar_enqueue_update(const char *key, uint64_t key_len, u8 *data,
 
 s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size);
 s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr);
-s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr);
+s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, __be64 *addr);
 
 s64 opal_signal_system_reset(s32 cpu);
 s64 opal_quiesce(u64 shutdown_type, s32 cpu);
index 00e7e67..f4c3428 100644 (file)
@@ -43,7 +43,7 @@ struct power_pmu {
                                u64 alt[]);
        void            (*get_mem_data_src)(union perf_mem_data_src *dsrc,
                                u32 flags, struct pt_regs *regs);
-       void            (*get_mem_weight)(u64 *weight);
+       void            (*get_mem_weight)(u64 *weight, u64 type);
        unsigned long   group_constraint_mask;
        unsigned long   group_constraint_val;
        u64             (*bhrb_filter_map)(u64 branch_sample_type);
@@ -67,6 +67,12 @@ struct power_pmu {
         * the pmu supports extended perf regs capability
         */
        int             capabilities;
+       /*
+        * Function to check event code for values which are
+        * reserved. Function takes struct perf_event as input,
+        * since event code could be spread in attr.config*
+        */
+       int             (*check_attr_config)(struct perf_event *ev);
 };
 
 /*
index 4eed821..c6a6767 100644 (file)
@@ -41,8 +41,6 @@ struct mm_struct;
 
 #ifndef __ASSEMBLY__
 
-#include <asm/tlbflush.h>
-
 /* Keep these as a macros to avoid include dependency mess */
 #define pte_page(x)            pfn_to_page(pte_pfn(x))
 #define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
index ed161ef..ac41776 100644 (file)
 #define PPC_INST_ORI                   0x60000000
 #define PPC_INST_ORIS                  0x64000000
 #define PPC_INST_BRANCH                        0x48000000
+#define PPC_INST_BL                    0x48000001
 #define PPC_INST_BRANCH_COND           0x40800000
 
 /* Prefixes */
 #define PPC_RAW_STFDX(s, a, b)         (0x7c0005ae | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_LVX(t, a, b)           (0x7c0000ce | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_STVX(s, a, b)          (0x7c0001ce | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDE(t, a, b)          (0x7c000114 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDZE(t, a)            (0x7c000194 | ___PPC_RT(t) | ___PPC_RA(a))
+#define PPC_RAW_ADDME(t, a)            (0x7c0001d4 | ___PPC_RT(t) | ___PPC_RA(a))
 #define PPC_RAW_ADD(t, a, b)           (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_ADD_DOT(t, a, b)       (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
 #define PPC_RAW_ADDC(t, a, b)          (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_BLR()                  (PPC_INST_BLR)
 #define PPC_RAW_BLRL()                 (0x4e800021)
 #define PPC_RAW_MTLR(r)                        (0x7c0803a6 | ___PPC_RT(r))
+#define PPC_RAW_MFLR(t)                        (PPC_INST_MFLR | ___PPC_RT(t))
 #define PPC_RAW_BCTR()                 (PPC_INST_BCTR)
 #define PPC_RAW_MTCTR(r)               (PPC_INST_MTCTR | ___PPC_RT(r))
 #define PPC_RAW_ADDI(d, a, i)          (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
 #define PPC_RAW_LI(r, i)               PPC_RAW_ADDI(r, 0, i)
 #define PPC_RAW_ADDIS(d, a, i)         (PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_ADDIC(d, a, i)         (0x30000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_ADDIC_DOT(d, a, i)     (0x34000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
 #define PPC_RAW_LIS(r, i)              PPC_RAW_ADDIS(r, 0, i)
 #define PPC_RAW_STDX(r, base, b)       (0x7c00012a | ___PPC_RS(r) | ___PPC_RA(base) | ___PPC_RB(b))
 #define PPC_RAW_STDU(r, base, i)       (0xf8000001 | ___PPC_RS(r) | ___PPC_RA(base) | ((i) & 0xfffc))
 #define PPC_RAW_CMPLW(a, b)            (0x7c000040 | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_CMPLD(a, b)            (0x7c200040 | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_SUB(d, a, b)           (0x7c000050 | ___PPC_RT(d) | ___PPC_RB(a) | ___PPC_RA(b))
+#define PPC_RAW_SUBFC(d, a, b)         (0x7c000010 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUBFE(d, a, b)         (0x7c000110 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUBFIC(d, a, i)                (0x20000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_SUBFZE(d, a)           (0x7c000190 | ___PPC_RT(d) | ___PPC_RA(a))
 #define PPC_RAW_MULD(d, a, b)          (0x7c0001d2 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_MULW(d, a, b)          (0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_MULHWU(d, a, b)                (0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_DIVDEU_DOT(t, a, b)    (0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
 #define PPC_RAW_AND(d, a, b)           (0x7c000038 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_ANDI(d, a, i)          (0x70000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_ANDIS(d, a, i)         (0x74000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
 #define PPC_RAW_AND_DOT(d, a, b)       (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_OR(d, a, b)            (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_MR(d, a)               PPC_RAW_OR(d, a, a)
 #define PPC_RAW_ORI(d, a, i)           (PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
 #define PPC_RAW_ORIS(d, a, i)          (PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_NOR(d, a, b)           (0x7c0000f8 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_XOR(d, a, b)           (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_XORI(d, a, i)          (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
 #define PPC_RAW_XORIS(d, a, i)         (0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
index 3dceb64..d6739d7 100644 (file)
 
 #define SZL                    (BITS_PER_LONG/8)
 
-/*
- * Stuff for accurate CPU time accounting.
- * These macros handle transitions between user and system state
- * in exception entry and exit and accumulate time to the
- * user_time and system_time fields in the paca.
- */
-
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb)
-#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb)
-#else
-#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb)                            \
-       MFTB(ra);                       /* get timebase */              \
-       PPC_LL  rb, ACCOUNT_STARTTIME_USER(ptr);                        \
-       PPC_STL ra, ACCOUNT_STARTTIME(ptr);                             \
-       subf    rb,rb,ra;               /* subtract start value */      \
-       PPC_LL  ra, ACCOUNT_USER_TIME(ptr);                             \
-       add     ra,ra,rb;               /* add on to user time */       \
-       PPC_STL ra, ACCOUNT_USER_TIME(ptr);                             \
-
-#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb)                             \
-       MFTB(ra);                       /* get timebase */              \
-       PPC_LL  rb, ACCOUNT_STARTTIME(ptr);                             \
-       PPC_STL ra, ACCOUNT_STARTTIME_USER(ptr);                        \
-       subf    rb,rb,ra;               /* subtract start value */      \
-       PPC_LL  ra, ACCOUNT_SYSTEM_TIME(ptr);                           \
-       add     ra,ra,rb;               /* add on to system time */     \
-       PPC_STL ra, ACCOUNT_SYSTEM_TIME(ptr)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-
 /*
  * Macros for storing registers into and loading registers from
  * exception frames.
index 8acc359..7bf8a15 100644 (file)
@@ -144,15 +144,12 @@ struct thread_struct {
 #endif
 #ifdef CONFIG_PPC32
        void            *pgdir;         /* root of page-table tree */
-       unsigned long   ksp_limit;      /* if ksp <= ksp_limit stack overflow */
 #ifdef CONFIG_PPC_RTAS
        unsigned long   rtas_sp;        /* stack pointer for when in RTAS */
 #endif
-#endif
 #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
        unsigned long   kuap;           /* opened segments for user access */
 #endif
-#ifdef CONFIG_VMAP_STACK
        unsigned long   srr0;
        unsigned long   srr1;
        unsigned long   dar;
@@ -161,7 +158,7 @@ struct thread_struct {
        unsigned long   r0, r3, r4, r5, r6, r8, r9, r11;
        unsigned long   lr, ctr;
 #endif
-#endif
+#endif /* CONFIG_PPC32 */
        /* Debug Registers */
        struct debug_reg debug;
 #ifdef CONFIG_PPC_FPU_REGS
@@ -282,7 +279,6 @@ struct thread_struct {
 #ifdef CONFIG_PPC32
 #define INIT_THREAD { \
        .ksp = INIT_SP, \
-       .ksp_limit = INIT_SP_LIMIT, \
        .pgdir = swapper_pg_dir, \
        .fpexc_mode = MSR_FE0 | MSR_FE1, \
        SPEFSCR_INIT \
@@ -393,6 +389,7 @@ extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
 extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
 #ifdef CONFIG_PPC_970_NAP
 extern void power4_idle_nap(void);
+void power4_idle_nap_return(void);
 #endif
 
 extern unsigned long cpuidle_disable;
@@ -417,6 +414,8 @@ extern int fix_alignment(struct pt_regs *);
 #define NET_IP_ALIGN   0
 #endif
 
+int do_mathemu(struct pt_regs *regs);
+
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_PROCESSOR_H */
index 1499e92..9c9ab27 100644 (file)
@@ -185,44 +185,27 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
 #define current_pt_regs() \
        ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1)
 
+/*
+ * The 4 low bits (0xf) are available as flags to overload the trap word,
+ * because interrupt vectors have minimum alignment of 0x10. TRAP_FLAGS_MASK
+ * must cover the bits used as flags, including bit 0 which is used as the
+ * "norestart" bit.
+ */
 #ifdef __powerpc64__
-#ifdef CONFIG_PPC_BOOK3S
-#define TRAP_FLAGS_MASK                0x10
-#define TRAP(regs)             ((regs)->trap & ~TRAP_FLAGS_MASK)
-#define FULL_REGS(regs)                true
-#define SET_FULL_REGS(regs)    do { } while (0)
-#else
-#define TRAP_FLAGS_MASK                0x11
-#define TRAP(regs)             ((regs)->trap & ~TRAP_FLAGS_MASK)
-#define FULL_REGS(regs)                (((regs)->trap & 1) == 0)
-#define SET_FULL_REGS(regs)    ((regs)->trap &= ~1)
-#endif
-#define CHECK_FULL_REGS(regs)  BUG_ON(!FULL_REGS(regs))
-#define NV_REG_POISON          0xdeadbeefdeadbeefUL
+#define TRAP_FLAGS_MASK                0x1
 #else
 /*
- * We use the least-significant bit of the trap field to indicate
- * whether we have saved the full set of registers, or only a
- * partial set.  A 1 there means the partial set.
- * On 4xx we use the next bit to indicate whether the exception
+ * On 4xx we use bit 1 in the trap word to indicate whether the exception
  * is a critical exception (1 means it is).
  */
-#define TRAP_FLAGS_MASK                0x1F
-#define TRAP(regs)             ((regs)->trap & ~TRAP_FLAGS_MASK)
-#define FULL_REGS(regs)                (((regs)->trap & 1) == 0)
-#define SET_FULL_REGS(regs)    ((regs)->trap &= ~1)
+#define TRAP_FLAGS_MASK                0xf
 #define IS_CRITICAL_EXC(regs)  (((regs)->trap & 2) != 0)
 #define IS_MCHECK_EXC(regs)    (((regs)->trap & 4) != 0)
 #define IS_DEBUG_EXC(regs)     (((regs)->trap & 8) != 0)
-#define NV_REG_POISON          0xdeadbeef
-#define CHECK_FULL_REGS(regs)                                                \
-do {                                                                         \
-       if ((regs)->trap & 1)                                                 \
-               printk(KERN_CRIT "%s: partial register set\n", __func__); \
-} while (0)
 #endif /* __powerpc64__ */
+#define TRAP(regs)             ((regs)->trap & ~TRAP_FLAGS_MASK)
 
-static inline void set_trap(struct pt_regs *regs, unsigned long val)
+static __always_inline void set_trap(struct pt_regs *regs, unsigned long val)
 {
        regs->trap = (regs->trap & TRAP_FLAGS_MASK) | (val & ~TRAP_FLAGS_MASK);
 }
@@ -244,12 +227,12 @@ static inline bool trap_is_syscall(struct pt_regs *regs)
 
 static inline bool trap_norestart(struct pt_regs *regs)
 {
-       return regs->trap & 0x10;
+       return regs->trap & 0x1;
 }
 
-static inline void set_trap_norestart(struct pt_regs *regs)
+static __always_inline void set_trap_norestart(struct pt_regs *regs)
 {
-       regs->trap |= 0x10;
+       regs->trap |= 0x1;
 }
 
 #define arch_has_single_step() (1)
index b752d34..07318bc 100644 (file)
@@ -44,20 +44,6 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
 }
 #define queued_spin_lock queued_spin_lock
 
-#define smp_mb__after_spinlock()   smp_mb()
-
-static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
-{
-       /*
-        * This barrier was added to simple spinlocks by commit 51d7d5205d338,
-        * but it should now be possible to remove it, asm arm64 has done with
-        * commit c6f5d02b6a0f.
-        */
-       smp_mb();
-       return atomic_read(&lock->val);
-}
-#define queued_spin_is_locked queued_spin_is_locked
-
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 #define SPIN_THRESHOLD (1<<15) /* not tuned */
 
@@ -86,6 +72,13 @@ static inline void pv_spinlocks_init(void)
 
 #endif
 
+/*
+ * Queued spinlocks rely heavily on smp_cond_load_relaxed() to busy-wait,
+ * which was found to have performance problems if implemented with
+ * the preferred spin_begin()/spin_end() SMT priority pattern. Use the
+ * generic version instead.
+ */
+
 #include <asm-generic/qspinlock.h>
 
 #endif /* _ASM_POWERPC_QSPINLOCK_H */
index da103e9..7c81d3e 100644 (file)
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 #define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
 #else
-#define MSR_TM_ACTIVE(x) 0
+#define MSR_TM_ACTIVE(x) ((void)(x), 0)
 #endif
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 #define   LPCR_VRMA_LP1                ASM_CONST(0x0000800000000000)
 #define   LPCR_RMLS            0x1C000000      /* Implementation dependent RMO limit sel */
 #define   LPCR_RMLS_SH         26
+#define   LPCR_HAIL            ASM_CONST(0x0000000004000000)   /* HV AIL (ISAv3.1) */
 #define   LPCR_ILE             ASM_CONST(0x0000000002000000)   /* !HV irqs set MSR:LE */
 #define   LPCR_AIL             ASM_CONST(0x0000000001800000)   /* Alternate interrupt location */
 #define   LPCR_AIL_0           ASM_CONST(0x0000000000000000)   /* MMU off exception offset 0x0 */
@@ -1393,8 +1394,7 @@ static inline void mtmsr_isync(unsigned long val)
                                     : "r" ((unsigned long)(v)) \
                                     : "memory")
 #endif
-#define wrtspr(rn)     asm volatile("mtspr " __stringify(rn) ",0" : \
-                                    : : "memory")
+#define wrtspr(rn)     asm volatile("mtspr " __stringify(rn) ",2" : : : "memory")
 
 static inline void wrtee(unsigned long val)
 {
index 658448c..9dc97d2 100644 (file)
@@ -19,8 +19,8 @@
 #define RTAS_UNKNOWN_SERVICE (-1)
 #define RTAS_INSTANTIATE_MAX (1ULL<<30) /* Don't instantiate rtas at/above this value */
 
-/* Buffer size for ppc_rtas system call. */
-#define RTAS_RMOBUF_MAX (64 * 1024)
+/* Memory set aside for sys_rtas to use with calls that need a work area. */
+#define RTAS_USER_REGION_SIZE (64 * 1024)
 
 /* RTAS return status codes */
 #define RTAS_BUSY              -2    /* RTAS Busy */
@@ -357,7 +357,7 @@ extern void rtas_take_timebase(void);
 static inline int page_is_rtas_user_buf(unsigned long pfn)
 {
        unsigned long paddr = (pfn << PAGE_SHIFT);
-       if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_RMOBUF_MAX))
+       if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_USER_REGION_SIZE))
                return 1;
        return 0;
 }
index 5b862de..552f325 100644 (file)
@@ -38,8 +38,7 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-       smp_mb();
-       return !arch_spin_value_unlocked(*lock);
+       return !arch_spin_value_unlocked(READ_ONCE(*lock));
 }
 
 /*
@@ -282,7 +281,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 #define arch_read_relax(lock)  rw_yield(lock)
 #define arch_write_relax(lock) rw_yield(lock)
 
-/* See include/linux/spinlock.h */
-#define smp_mb__after_spinlock()   smp_mb()
-
 #endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_H */
index 7a13bc2..03b3d01 100644 (file)
@@ -31,6 +31,7 @@ extern u32 *cpu_to_phys_id;
 extern bool coregroup_enabled;
 
 extern int cpu_to_chip_id(int cpu);
+extern int *chip_id_lookup_table;
 
 #ifdef CONFIG_SMP
 
@@ -121,6 +122,11 @@ static inline struct cpumask *cpu_sibling_mask(int cpu)
        return per_cpu(cpu_sibling_map, cpu);
 }
 
+static inline struct cpumask *cpu_core_mask(int cpu)
+{
+       return per_cpu(cpu_core_map, cpu);
+}
+
 static inline struct cpumask *cpu_l2_cache_mask(int cpu)
 {
        return per_cpu(cpu_l2_cache_map, cpu);
index 6ec7228..bd75872 100644 (file)
@@ -10,6 +10,9 @@
 #include <asm/simple_spinlock.h>
 #endif
 
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock()       smp_mb()
+
 #ifndef CONFIG_PARAVIRT_SPINLOCKS
 static inline void pv_spinlocks_init(void) { }
 #endif
index 386d576..b4ec6c7 100644 (file)
@@ -38,7 +38,6 @@
 #ifndef __ASSEMBLY__
 #include <linux/cache.h>
 #include <asm/processor.h>
-#include <asm/page.h>
 #include <asm/accounting.h>
 
 #define SLB_PRELOAD_NR 16U
@@ -152,6 +151,12 @@ void arch_setup_new_exec(void);
 
 #ifndef __ASSEMBLY__
 
+static inline void clear_thread_local_flags(unsigned int flags)
+{
+       struct thread_info *ti = current_thread_info();
+       ti->local_flags &= ~flags;
+}
+
 static inline bool test_thread_local_flags(unsigned int flags)
 {
        struct thread_info *ti = current_thread_info();
index 3beeb03..e4db64c 100644 (file)
@@ -126,7 +126,7 @@ static inline int cpu_to_coregroup_id(int cpu)
 #define topology_physical_package_id(cpu)      (cpu_to_chip_id(cpu))
 
 #define topology_sibling_cpumask(cpu)  (per_cpu(cpu_sibling_map, cpu))
-#define topology_core_cpumask(cpu)     (cpu_cpu_mask(cpu))
+#define topology_core_cpumask(cpu)     (per_cpu(cpu_core_map, cpu))
 #define topology_core_id(cpu)          (cpu_to_core_id(cpu))
 
 #endif
index 78e2a39..a09e424 100644 (file)
@@ -43,129 +43,39 @@ static inline bool __access_ok(unsigned long addr, unsigned long size)
  * exception handling means that it's no longer "just"...)
  *
  */
-#define get_user(x, ptr) \
-       __get_user_check((x), (ptr), sizeof(*(ptr)))
-#define put_user(x, ptr) \
-       __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
-#define __get_user(x, ptr) \
-       __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true)
-#define __put_user(x, ptr) \
-       __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
-#define __get_user_allowed(x, ptr) \
-       __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false)
-
-#define __get_user_inatomic(x, ptr) \
-       __get_user_nosleep((x), (ptr), sizeof(*(ptr)))
-#define __put_user_inatomic(x, ptr) \
-       __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
-#ifdef CONFIG_PPC64
-
-#define ___get_user_instr(gu_op, dest, ptr)                            \
-({                                                                     \
-       long __gui_ret = 0;                                             \
-       unsigned long __gui_ptr = (unsigned long)ptr;                   \
-       struct ppc_inst __gui_inst;                                     \
-       unsigned int __prefix, __suffix;                                \
-       __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr);  \
-       if (__gui_ret == 0) {                                           \
-               if ((__prefix >> 26) == OP_PREFIX) {                    \
-                       __gui_ret = gu_op(__suffix,                     \
-                               (unsigned int __user *)__gui_ptr + 1);  \
-                       __gui_inst = ppc_inst_prefix(__prefix,          \
-                                                    __suffix);         \
-               } else {                                                \
-                       __gui_inst = ppc_inst(__prefix);                \
-               }                                                       \
-               if (__gui_ret == 0)                                     \
-                       (dest) = __gui_inst;                            \
-       }                                                               \
-       __gui_ret;                                                      \
-})
-
-#define get_user_instr(x, ptr) \
-       ___get_user_instr(get_user, x, ptr)
-
-#define __get_user_instr(x, ptr) \
-       ___get_user_instr(__get_user, x, ptr)
-
-#define __get_user_instr_inatomic(x, ptr) \
-       ___get_user_instr(__get_user_inatomic, x, ptr)
-
-#else /* !CONFIG_PPC64 */
-#define get_user_instr(x, ptr) \
-       get_user((x).val, (u32 __user *)(ptr))
-
-#define __get_user_instr(x, ptr) \
-       __get_user_nocheck((x).val, (u32 __user *)(ptr), sizeof(u32), true)
-
-#define __get_user_instr_inatomic(x, ptr) \
-       __get_user_nosleep((x).val, (u32 __user *)(ptr), sizeof(u32))
-
-#endif /* CONFIG_PPC64 */
-
-extern long __put_user_bad(void);
-
-#define __put_user_size(x, ptr, size, retval)                  \
-do {                                                           \
-       __label__ __pu_failed;                                  \
-                                                               \
-       retval = 0;                                             \
-       allow_write_to_user(ptr, size);                         \
-       __put_user_size_goto(x, ptr, size, __pu_failed);        \
-       prevent_write_to_user(ptr, size);                       \
-       break;                                                  \
-                                                               \
-__pu_failed:                                                   \
-       retval = -EFAULT;                                       \
-       prevent_write_to_user(ptr, size);                       \
-} while (0)
-
-#define __put_user_nocheck(x, ptr, size)                       \
+#define __put_user(x, ptr)                                     \
 ({                                                             \
        long __pu_err;                                          \
        __typeof__(*(ptr)) __user *__pu_addr = (ptr);           \
-       __typeof__(*(ptr)) __pu_val = (x);                      \
-       __typeof__(size) __pu_size = (size);                    \
+       __typeof__(*(ptr)) __pu_val = (__typeof__(*(ptr)))(x);  \
+       __typeof__(sizeof(*(ptr))) __pu_size = sizeof(*(ptr));  \
                                                                \
-       if (!is_kernel_addr((unsigned long)__pu_addr))          \
-               might_fault();                                  \
-       __chk_user_ptr(__pu_addr);                              \
-       __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err);      \
+       might_fault();                                          \
+       do {                                                    \
+               __label__ __pu_failed;                          \
+                                                               \
+               allow_write_to_user(__pu_addr, __pu_size);      \
+               __put_user_size_goto(__pu_val, __pu_addr, __pu_size, __pu_failed);      \
+               prevent_write_to_user(__pu_addr, __pu_size);    \
+               __pu_err = 0;                                   \
+               break;                                          \
+                                                               \
+__pu_failed:                                                   \
+               prevent_write_to_user(__pu_addr, __pu_size);    \
+               __pu_err = -EFAULT;                             \
+       } while (0);                                            \
                                                                \
        __pu_err;                                               \
 })
 
-#define __put_user_check(x, ptr, size)                                 \
+#define put_user(x, ptr)                                               \
 ({                                                                     \
-       long __pu_err = -EFAULT;                                        \
-       __typeof__(*(ptr)) __user *__pu_addr = (ptr);                   \
-       __typeof__(*(ptr)) __pu_val = (x);                              \
-       __typeof__(size) __pu_size = (size);                            \
-                                                                       \
-       might_fault();                                                  \
-       if (access_ok(__pu_addr, __pu_size))                            \
-               __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \
+       __typeof__(*(ptr)) __user *_pu_addr = (ptr);                    \
                                                                        \
-       __pu_err;                                                       \
+       access_ok(_pu_addr, sizeof(*(ptr))) ?                           \
+                 __put_user(x, _pu_addr) : -EFAULT;                    \
 })
 
-#define __put_user_nosleep(x, ptr, size)                       \
-({                                                             \
-       long __pu_err;                                          \
-       __typeof__(*(ptr)) __user *__pu_addr = (ptr);           \
-       __typeof__(*(ptr)) __pu_val = (x);                      \
-       __typeof__(size) __pu_size = (size);                    \
-                                                               \
-       __chk_user_ptr(__pu_addr);                              \
-       __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \
-                                                               \
-       __pu_err;                                               \
-})
-
-
 /*
  * We don't tell gcc that we are accessing memory, but this is OK
  * because we do not write to any memory gcc knows about, so there
@@ -198,25 +108,17 @@ __pu_failed:                                                      \
 
 #define __put_user_size_goto(x, ptr, size, label)              \
 do {                                                           \
+       __typeof__(*(ptr)) __user *__pus_addr = (ptr);          \
+                                                               \
        switch (size) {                                         \
-       case 1: __put_user_asm_goto(x, ptr, label, "stb"); break;       \
-       case 2: __put_user_asm_goto(x, ptr, label, "sth"); break;       \
-       case 4: __put_user_asm_goto(x, ptr, label, "stw"); break;       \
-       case 8: __put_user_asm2_goto(x, ptr, label); break;     \
-       default: __put_user_bad();                              \
+       case 1: __put_user_asm_goto(x, __pus_addr, label, "stb"); break;        \
+       case 2: __put_user_asm_goto(x, __pus_addr, label, "sth"); break;        \
+       case 4: __put_user_asm_goto(x, __pus_addr, label, "stw"); break;        \
+       case 8: __put_user_asm2_goto(x, __pus_addr, label); break;              \
+       default: BUILD_BUG();                                   \
        }                                                       \
 } while (0)
 
-#define __unsafe_put_user_goto(x, ptr, size, label)            \
-do {                                                           \
-       __typeof__(*(ptr)) __user *__pu_addr = (ptr);           \
-       __chk_user_ptr(ptr);                                    \
-       __put_user_size_goto((x), __pu_addr, (size), label);    \
-} while (0)
-
-
-extern long __get_user_bad(void);
-
 /*
  * This does an atomic 128 byte aligned load from userspace.
  * Upto caller to do enable_kernel_vmx() before calling!
@@ -234,6 +136,59 @@ extern long __get_user_bad(void);
                : "=r" (err)                    \
                : "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err))
 
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define __get_user_asm_goto(x, addr, label, op)                        \
+       asm_volatile_goto(                                      \
+               "1:     "op"%U1%X1 %0, %1       # get_user\n"   \
+               EX_TABLE(1b, %l2)                               \
+               : "=r" (x)                                      \
+               : "m"UPD_CONSTR (*addr)                         \
+               :                                               \
+               : label)
+
+#ifdef __powerpc64__
+#define __get_user_asm2_goto(x, addr, label)                   \
+       __get_user_asm_goto(x, addr, label, "ld")
+#else /* __powerpc64__ */
+#define __get_user_asm2_goto(x, addr, label)                   \
+       asm_volatile_goto(                                      \
+               "1:     lwz%X1 %0, %1\n"                        \
+               "2:     lwz%X1 %L0, %L1\n"                      \
+               EX_TABLE(1b, %l2)                               \
+               EX_TABLE(2b, %l2)                               \
+               : "=r" (x)                                      \
+               : "m" (*addr)                                   \
+               :                                               \
+               : label)
+#endif /* __powerpc64__ */
+
+#define __get_user_size_goto(x, ptr, size, label)                              \
+do {                                                                           \
+       BUILD_BUG_ON(size > sizeof(x));                                         \
+       switch (size) {                                                         \
+       case 1: __get_user_asm_goto(x, (u8 __user *)ptr, label, "lbz"); break;  \
+       case 2: __get_user_asm_goto(x, (u16 __user *)ptr, label, "lhz"); break; \
+       case 4: __get_user_asm_goto(x, (u32 __user *)ptr, label, "lwz"); break; \
+       case 8: __get_user_asm2_goto(x, (u64 __user *)ptr, label);  break;      \
+       default: x = 0; BUILD_BUG();                                            \
+       }                                                                       \
+} while (0)
+
+#define __get_user_size_allowed(x, ptr, size, retval)                  \
+do {                                                                   \
+               __label__ __gus_failed;                                 \
+                                                                       \
+               __get_user_size_goto(x, ptr, size, __gus_failed);       \
+               retval = 0;                                             \
+               break;                                                  \
+__gus_failed:                                                          \
+               x = 0;                                                  \
+               retval = -EFAULT;                                       \
+} while (0)
+
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
 #define __get_user_asm(x, addr, err, op)               \
        __asm__ __volatile__(                           \
                "1:     "op"%U2%X2 %1, %2       # get_user\n"   \
@@ -271,25 +226,27 @@ extern long __get_user_bad(void);
 #define __get_user_size_allowed(x, ptr, size, retval)          \
 do {                                                           \
        retval = 0;                                             \
-       __chk_user_ptr(ptr);                                    \
-       if (size > sizeof(x))                                   \
-               (x) = __get_user_bad();                         \
+       BUILD_BUG_ON(size > sizeof(x));                         \
        switch (size) {                                         \
        case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break;      \
        case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break;     \
        case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break;     \
        case 8: __get_user_asm2(x, (u64 __user *)ptr, retval);  break;  \
-       default: (x) = __get_user_bad();                        \
+       default: x = 0; BUILD_BUG();                            \
        }                                                       \
 } while (0)
 
-#define __get_user_size(x, ptr, size, retval)                  \
+#define __get_user_size_goto(x, ptr, size, label)              \
 do {                                                           \
-       allow_read_from_user(ptr, size);                        \
-       __get_user_size_allowed(x, ptr, size, retval);          \
-       prevent_read_from_user(ptr, size);                      \
+       long __gus_retval;                                      \
+                                                               \
+       __get_user_size_allowed(x, ptr, size, __gus_retval);    \
+       if (__gus_retval)                                       \
+               goto label;                                     \
 } while (0)
 
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
 /*
  * This is a type: either unsigned long, if the argument fits into
  * that type, or otherwise unsigned long long.
@@ -297,86 +254,36 @@ do {                                                              \
 #define __long_type(x) \
        __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
 
-#define __get_user_nocheck(x, ptr, size, do_allow)                     \
+#define __get_user(x, ptr)                                     \
 ({                                                             \
        long __gu_err;                                          \
        __long_type(*(ptr)) __gu_val;                           \
        __typeof__(*(ptr)) __user *__gu_addr = (ptr);   \
-       __typeof__(size) __gu_size = (size);                    \
+       __typeof__(sizeof(*(ptr))) __gu_size = sizeof(*(ptr));  \
                                                                \
-       __chk_user_ptr(__gu_addr);                              \
-       if (do_allow && !is_kernel_addr((unsigned long)__gu_addr)) \
-               might_fault();                                  \
-       if (do_allow)                                                           \
-               __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err);      \
-       else                                                                    \
-               __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \
+       might_fault();                                  \
+       allow_read_from_user(__gu_addr, __gu_size);             \
+       __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err);      \
+       prevent_read_from_user(__gu_addr, __gu_size);           \
        (x) = (__typeof__(*(ptr)))__gu_val;                     \
                                                                \
        __gu_err;                                               \
 })
 
-#define __get_user_check(x, ptr, size)                                 \
+#define get_user(x, ptr)                                               \
 ({                                                                     \
-       long __gu_err = -EFAULT;                                        \
-       __long_type(*(ptr)) __gu_val = 0;                               \
-       __typeof__(*(ptr)) __user *__gu_addr = (ptr);           \
-       __typeof__(size) __gu_size = (size);                            \
-                                                                       \
-       might_fault();                                                  \
-       if (access_ok(__gu_addr, __gu_size))                            \
-               __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \
-       (x) = (__force __typeof__(*(ptr)))__gu_val;                             \
+       __typeof__(*(ptr)) __user *_gu_addr = (ptr);                    \
                                                                        \
-       __gu_err;                                                       \
+       access_ok(_gu_addr, sizeof(*(ptr))) ?                           \
+                 __get_user(x, _gu_addr) :                             \
+                 ((x) = (__force __typeof__(*(ptr)))0, -EFAULT);       \
 })
 
-#define __get_user_nosleep(x, ptr, size)                       \
-({                                                             \
-       long __gu_err;                                          \
-       __long_type(*(ptr)) __gu_val;                           \
-       __typeof__(*(ptr)) __user *__gu_addr = (ptr);   \
-       __typeof__(size) __gu_size = (size);                    \
-                                                               \
-       __chk_user_ptr(__gu_addr);                              \
-       __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \
-       (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
-                                                               \
-       __gu_err;                                               \
-})
-
-
 /* more complex routines */
 
 extern unsigned long __copy_tofrom_user(void __user *to,
                const void __user *from, unsigned long size);
 
-#ifdef CONFIG_ARCH_HAS_COPY_MC
-unsigned long __must_check
-copy_mc_generic(void *to, const void *from, unsigned long size);
-
-static inline unsigned long __must_check
-copy_mc_to_kernel(void *to, const void *from, unsigned long size)
-{
-       return copy_mc_generic(to, from, size);
-}
-#define copy_mc_to_kernel copy_mc_to_kernel
-
-static inline unsigned long __must_check
-copy_mc_to_user(void __user *to, const void *from, unsigned long n)
-{
-       if (likely(check_copy_size(from, n, true))) {
-               if (access_ok(to, n)) {
-                       allow_write_to_user(to, n);
-                       n = copy_mc_generic((void *)to, from, n);
-                       prevent_write_to_user(to, n);
-               }
-       }
-
-       return n;
-}
-#endif
-
 #ifdef __powerpc64__
 static inline unsigned long
 raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
@@ -414,26 +321,51 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 
 unsigned long __arch_clear_user(void __user *addr, unsigned long size);
 
-static inline unsigned long clear_user(void __user *addr, unsigned long size)
+static inline unsigned long __clear_user(void __user *addr, unsigned long size)
 {
-       unsigned long ret = size;
+       unsigned long ret;
+
        might_fault();
-       if (likely(access_ok(addr, size))) {
-               allow_write_to_user(addr, size);
-               ret = __arch_clear_user(addr, size);
-               prevent_write_to_user(addr, size);
-       }
+       allow_write_to_user(addr, size);
+       ret = __arch_clear_user(addr, size);
+       prevent_write_to_user(addr, size);
        return ret;
 }
 
-static inline unsigned long __clear_user(void __user *addr, unsigned long size)
+static inline unsigned long clear_user(void __user *addr, unsigned long size)
 {
-       return clear_user(addr, size);
+       return likely(access_ok(addr, size)) ? __clear_user(addr, size) : size;
 }
 
 extern long strncpy_from_user(char *dst, const char __user *src, long count);
 extern __must_check long strnlen_user(const char __user *str, long n);
 
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_generic(void *to, const void *from, unsigned long size);
+
+static inline unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned long size)
+{
+       return copy_mc_generic(to, from, size);
+}
+#define copy_mc_to_kernel copy_mc_to_kernel
+
+static inline unsigned long __must_check
+copy_mc_to_user(void __user *to, const void *from, unsigned long n)
+{
+       if (likely(check_copy_size(from, n, true))) {
+               if (access_ok(to, n)) {
+                       allow_write_to_user(to, n);
+                       n = copy_mc_generic((void *)to, from, n);
+                       prevent_write_to_user(to, n);
+               }
+       }
+
+       return n;
+}
+#endif
+
 extern long __copy_from_user_flushcache(void *dst, const void __user *src,
                unsigned size);
 extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
@@ -482,10 +414,37 @@ user_write_access_begin(const void __user *ptr, size_t len)
 #define user_write_access_begin        user_write_access_begin
 #define user_write_access_end          prevent_current_write_to_user
 
-#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
-#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
+#define unsafe_get_user(x, p, e) do {                                  \
+       __long_type(*(p)) __gu_val;                             \
+       __typeof__(*(p)) __user *__gu_addr = (p);               \
+                                                               \
+       __get_user_size_goto(__gu_val, __gu_addr, sizeof(*(p)), e); \
+       (x) = (__typeof__(*(p)))__gu_val;                       \
+} while (0)
+
 #define unsafe_put_user(x, p, e) \
-       __unsafe_put_user_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e)
+       __put_user_size_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e)
+
+#define unsafe_copy_from_user(d, s, l, e) \
+do {                                                                                   \
+       u8 *_dst = (u8 *)(d);                                                           \
+       const u8 __user *_src = (const u8 __user *)(s);                                 \
+       size_t _len = (l);                                                              \
+       int _i;                                                                         \
+                                                                                       \
+       for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64))               \
+               unsafe_get_user(*(u64 *)(_dst + _i), (u64 __user *)(_src + _i), e);     \
+       if (_len & 4) {                                                                 \
+               unsafe_get_user(*(u32 *)(_dst + _i), (u32 __user *)(_src + _i), e);     \
+               _i += 4;                                                                \
+       }                                                                               \
+       if (_len & 2) {                                                                 \
+               unsafe_get_user(*(u16 *)(_dst + _i), (u16 __user *)(_src + _i), e);     \
+               _i += 2;                                                                \
+       }                                                                               \
+       if (_len & 1)                                                                   \
+               unsafe_get_user(*(u8 *)(_dst + _i), (u8 __user *)(_src + _i), e);       \
+} while (0)
 
 #define unsafe_copy_to_user(d, s, l, e) \
 do {                                                                   \
@@ -494,9 +453,9 @@ do {                                                                        \
        size_t _len = (l);                                              \
        int _i;                                                         \
                                                                        \
-       for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long))             \
-               unsafe_put_user(*(long*)(_src + _i), (long __user *)(_dst + _i), e); \
-       if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) {                   \
+       for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64))       \
+               unsafe_put_user(*(u64 *)(_src + _i), (u64 __user *)(_dst + _i), e); \
+       if (_len & 4) {                                                 \
                unsafe_put_user(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \
                _i += 4;                                                \
        }                                                               \
@@ -511,14 +470,8 @@ do {                                                                       \
 #define HAVE_GET_KERNEL_NOFAULT
 
 #define __get_kernel_nofault(dst, src, type, err_label)                        \
-do {                                                                   \
-       int __kr_err;                                                   \
-                                                                       \
-       __get_user_size_allowed(*((type *)(dst)), (__force type __user *)(src),\
-                       sizeof(type), __kr_err);                        \
-       if (unlikely(__kr_err))                                         \
-               goto err_label;                                         \
-} while (0)
+       __get_user_size_goto(*((type *)(dst)),                          \
+               (__force type __user *)(src), sizeof(type), err_label)
 
 #define __put_kernel_nofault(dst, src, type, err_label)                        \
        __put_user_size_goto(*((type *)(src)),                          \
index 700fcda..b541c69 100644 (file)
@@ -40,6 +40,7 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #ifdef CONFIG_PPC32
 #define __ARCH_WANT_OLD_STAT
+#define __ARCH_WANT_SYS_OLD_SELECT
 #endif
 #ifdef CONFIG_PPC64
 #define __ARCH_WANT_SYS_TIME
index 77c635c..1faff0b 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H
 #define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H
 
+#include <asm/page.h>
+
 #ifdef __ASSEMBLY__
 
 #include <asm/ppc_asm.h>
@@ -154,6 +156,14 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
 
 const struct vdso_data *__arch_get_vdso_data(void);
 
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+       return (void *)vd + PAGE_SIZE;
+}
+#endif
+
 static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
 {
        return true;
index 3f958ec..a585c8e 100644 (file)
@@ -107,9 +107,7 @@ extern struct vdso_arch_data *vdso_data;
        bcl     20, 31, .+4
 999:
        mflr    \ptr
-#if CONFIG_PPC_PAGE_SHIFT > 14
        addis   \ptr, \ptr, (_vdso_datapage - 999b)@ha
-#endif
        addi    \ptr, \ptr, (_vdso_datapage - 999b)@l
 .endm
 
index 721c0d6..e7479a4 100644 (file)
@@ -114,6 +114,7 @@ struct vio_driver {
        const struct vio_device_id *id_table;
        int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
        void (*remove)(struct vio_dev *dev);
+       void (*shutdown)(struct vio_dev *dev);
        /* A driver must have a get_desired_dma() function to
         * be loaded in a CMO environment if it uses DMA.
         */
index 9a312b9..aa094a8 100644 (file)
@@ -102,6 +102,7 @@ void xive_flush_interrupt(void);
 /* xmon hook */
 void xmon_xive_do_dump(int cpu);
 int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d);
+void xmon_xive_get_irq_all(void);
 
 /* APIs used by KVM */
 u32 xive_native_default_eq_shift(void);
index cc79856..4ba87de 100644 (file)
@@ -2,6 +2,7 @@
 #ifndef _ASM_POWERPC_ERRNO_H
 #define _ASM_POWERPC_ERRNO_H
 
+#undef EDEADLOCK
 #include <asm-generic/errno.h>
 
 #undef EDEADLOCK
index f698400..9c03423 100644 (file)
 typedef unsigned long  __kernel_old_dev_t;
 #define __kernel_old_dev_t __kernel_old_dev_t
 #else
-typedef unsigned int   __kernel_size_t;
-typedef int            __kernel_ssize_t;
-typedef long           __kernel_ptrdiff_t;
-#define __kernel_size_t __kernel_size_t
-
 typedef short          __kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 #endif
index c7797eb..bbb4181 100644 (file)
@@ -107,7 +107,6 @@ static struct aligninfo spe_aligninfo[32] = {
 static int emulate_spe(struct pt_regs *regs, unsigned int reg,
                       struct ppc_inst ppc_instr)
 {
-       int ret;
        union {
                u64 ll;
                u32 w[2];
@@ -127,11 +126,6 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
        nb = spe_aligninfo[instr].len;
        flags = spe_aligninfo[instr].flags;
 
-       /* Verify the address of the operand */
-       if (unlikely(user_mode(regs) &&
-                    !access_ok(addr, nb)))
-               return -EFAULT;
-
        /* userland only */
        if (unlikely(!user_mode(regs)))
                return 0;
@@ -169,26 +163,27 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
                }
        } else {
                temp.ll = data.ll = 0;
-               ret = 0;
                p = addr;
 
+               if (!user_read_access_begin(addr, nb))
+                       return -EFAULT;
+
                switch (nb) {
                case 8:
-                       ret |= __get_user_inatomic(temp.v[0], p++);
-                       ret |= __get_user_inatomic(temp.v[1], p++);
-                       ret |= __get_user_inatomic(temp.v[2], p++);
-                       ret |= __get_user_inatomic(temp.v[3], p++);
+                       unsafe_get_user(temp.v[0], p++, Efault_read);
+                       unsafe_get_user(temp.v[1], p++, Efault_read);
+                       unsafe_get_user(temp.v[2], p++, Efault_read);
+                       unsafe_get_user(temp.v[3], p++, Efault_read);
                        fallthrough;
                case 4:
-                       ret |= __get_user_inatomic(temp.v[4], p++);
-                       ret |= __get_user_inatomic(temp.v[5], p++);
+                       unsafe_get_user(temp.v[4], p++, Efault_read);
+                       unsafe_get_user(temp.v[5], p++, Efault_read);
                        fallthrough;
                case 2:
-                       ret |= __get_user_inatomic(temp.v[6], p++);
-                       ret |= __get_user_inatomic(temp.v[7], p++);
-                       if (unlikely(ret))
-                               return -EFAULT;
+                       unsafe_get_user(temp.v[6], p++, Efault_read);
+                       unsafe_get_user(temp.v[7], p++, Efault_read);
                }
+               user_read_access_end();
 
                switch (instr) {
                case EVLDD:
@@ -255,31 +250,41 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
 
        /* Store result to memory or update registers */
        if (flags & ST) {
-               ret = 0;
                p = addr;
+
+               if (!user_write_access_begin(addr, nb))
+                       return -EFAULT;
+
                switch (nb) {
                case 8:
-                       ret |= __put_user_inatomic(data.v[0], p++);
-                       ret |= __put_user_inatomic(data.v[1], p++);
-                       ret |= __put_user_inatomic(data.v[2], p++);
-                       ret |= __put_user_inatomic(data.v[3], p++);
+                       unsafe_put_user(data.v[0], p++, Efault_write);
+                       unsafe_put_user(data.v[1], p++, Efault_write);
+                       unsafe_put_user(data.v[2], p++, Efault_write);
+                       unsafe_put_user(data.v[3], p++, Efault_write);
                        fallthrough;
                case 4:
-                       ret |= __put_user_inatomic(data.v[4], p++);
-                       ret |= __put_user_inatomic(data.v[5], p++);
+                       unsafe_put_user(data.v[4], p++, Efault_write);
+                       unsafe_put_user(data.v[5], p++, Efault_write);
                        fallthrough;
                case 2:
-                       ret |= __put_user_inatomic(data.v[6], p++);
-                       ret |= __put_user_inatomic(data.v[7], p++);
+                       unsafe_put_user(data.v[6], p++, Efault_write);
+                       unsafe_put_user(data.v[7], p++, Efault_write);
                }
-               if (unlikely(ret))
-                       return -EFAULT;
+               user_write_access_end();
        } else {
                *evr = data.w[0];
                regs->gpr[reg] = data.w[1];
        }
 
        return 1;
+
+Efault_read:
+       user_read_access_end();
+       return -EFAULT;
+
+Efault_write:
+       user_write_access_end();
+       return -EFAULT;
 }
 #endif /* CONFIG_SPE */
 
@@ -299,13 +304,12 @@ int fix_alignment(struct pt_regs *regs)
        struct instruction_op op;
        int r, type;
 
-       /*
-        * We require a complete register set, if not, then our assembly
-        * is broken
-        */
-       CHECK_FULL_REGS(regs);
+       if (is_kernel_addr(regs->nip))
+               r = copy_inst_from_kernel_nofault(&instr, (void *)regs->nip);
+       else
+               r = __get_user_instr(instr, (void __user *)regs->nip);
 
-       if (unlikely(__get_user_instr(instr, (void __user *)regs->nip)))
+       if (unlikely(r))
                return -EFAULT;
        if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
                /* We don't handle PPC little-endian any more... */
index f3a6622..28af4ef 100644 (file)
@@ -91,7 +91,6 @@ int main(void)
        DEFINE(SIGSEGV, SIGSEGV);
        DEFINE(NMI_MASK, NMI_MASK);
 #else
-       OFFSET(KSP_LIMIT, thread_struct, ksp_limit);
 #ifdef CONFIG_PPC_RTAS
        OFFSET(RTAS_SP, thread_struct, rtas_sp);
 #endif
@@ -132,7 +131,6 @@ int main(void)
        OFFSET(KSP_VSID, thread_struct, ksp_vsid);
 #else /* CONFIG_PPC64 */
        OFFSET(PGDIR, thread_struct, pgdir);
-#ifdef CONFIG_VMAP_STACK
        OFFSET(SRR0, thread_struct, srr0);
        OFFSET(SRR1, thread_struct, srr1);
        OFFSET(DAR, thread_struct, dar);
@@ -149,7 +147,6 @@ int main(void)
        OFFSET(THLR, thread_struct, lr);
        OFFSET(THCTR, thread_struct, ctr);
 #endif
-#endif
 #ifdef CONFIG_SPE
        OFFSET(THREAD_EVR0, thread_struct, evr[0]);
        OFFSET(THREAD_ACC, thread_struct, acc);
@@ -285,21 +282,11 @@ int main(void)
        OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
        OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
        OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default);
-       OFFSET(ACCOUNT_STARTTIME, paca_struct, accounting.starttime);
-       OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user);
-       OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
-       OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
 #ifdef CONFIG_PPC_BOOK3E
        OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
 #endif
        OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
 #else /* CONFIG_PPC64 */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-       OFFSET(ACCOUNT_STARTTIME, thread_info, accounting.starttime);
-       OFFSET(ACCOUNT_STARTTIME_USER, thread_info, accounting.starttime_user);
-       OFFSET(ACCOUNT_USER_TIME, thread_info, accounting.utime);
-       OFFSET(ACCOUNT_SYSTEM_TIME, thread_info, accounting.stime);
-#endif
 #endif /* CONFIG_PPC64 */
 
        /* RTAS */
@@ -323,9 +310,6 @@ int main(void)
        STACK_PT_REGS_OFFSET(GPR11, gpr[11]);
        STACK_PT_REGS_OFFSET(GPR12, gpr[12]);
        STACK_PT_REGS_OFFSET(GPR13, gpr[13]);
-#ifndef CONFIG_PPC64
-       STACK_PT_REGS_OFFSET(GPR14, gpr[14]);
-#endif /* CONFIG_PPC64 */
        /*
         * Note: these symbols include _ because they overlap with special
         * register names
@@ -381,7 +365,6 @@ int main(void)
        DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1));
        DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0));
        DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
-       DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit));
 #endif
 #endif
 
index cd60bc1..f24cd53 100644 (file)
@@ -362,14 +362,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
        pa = pte_pfn(*ptep);
 
        /* On radix we can do hugepage mappings for io, so handle that */
-       if (hugepage_shift) {
-               pa <<= hugepage_shift;
-               pa |= token & ((1ul << hugepage_shift) - 1);
-       } else {
-               pa <<= PAGE_SHIFT;
-               pa |= token & (PAGE_SIZE - 1);
-       }
+       if (!hugepage_shift)
+               hugepage_shift = PAGE_SHIFT;
 
+       pa <<= PAGE_SHIFT;
+       pa |= token & ((1ul << hugepage_shift) - 1);
        return pa;
 }
 
@@ -779,7 +776,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
        default:
                eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true);
                return -EINVAL;
-       };
+       }
 
        return 0;
 }
@@ -1568,6 +1565,7 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
 }
 EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
 
+#ifdef CONFIG_PROC_FS
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
        if (!eeh_enabled()) {
@@ -1594,6 +1592,7 @@ static int proc_eeh_show(struct seq_file *m, void *v)
 
        return 0;
 }
+#endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_DEBUG_FS
 
index 78c430b..9160285 100644 (file)
  */
        .align  12
 
-#ifdef CONFIG_BOOKE
-       .globl  mcheck_transfer_to_handler
-mcheck_transfer_to_handler:
-       mfspr   r0,SPRN_DSRR0
-       stw     r0,_DSRR0(r11)
-       mfspr   r0,SPRN_DSRR1
-       stw     r0,_DSRR1(r11)
-       /* fall through */
-_ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler)
-
-       .globl  debug_transfer_to_handler
-debug_transfer_to_handler:
-       mfspr   r0,SPRN_CSRR0
-       stw     r0,_CSRR0(r11)
-       mfspr   r0,SPRN_CSRR1
-       stw     r0,_CSRR1(r11)
-       /* fall through */
-_ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler)
-
-       .globl  crit_transfer_to_handler
-crit_transfer_to_handler:
-#ifdef CONFIG_PPC_BOOK3E_MMU
-       mfspr   r0,SPRN_MAS0
-       stw     r0,MAS0(r11)
-       mfspr   r0,SPRN_MAS1
-       stw     r0,MAS1(r11)
-       mfspr   r0,SPRN_MAS2
-       stw     r0,MAS2(r11)
-       mfspr   r0,SPRN_MAS3
-       stw     r0,MAS3(r11)
-       mfspr   r0,SPRN_MAS6
-       stw     r0,MAS6(r11)
-#ifdef CONFIG_PHYS_64BIT
-       mfspr   r0,SPRN_MAS7
-       stw     r0,MAS7(r11)
-#endif /* CONFIG_PHYS_64BIT */
-#endif /* CONFIG_PPC_BOOK3E_MMU */
-#ifdef CONFIG_44x
-       mfspr   r0,SPRN_MMUCR
-       stw     r0,MMUCR(r11)
-#endif
-       mfspr   r0,SPRN_SRR0
-       stw     r0,_SRR0(r11)
-       mfspr   r0,SPRN_SRR1
-       stw     r0,_SRR1(r11)
-
-       /* set the stack limit to the current stack */
-       mfspr   r8,SPRN_SPRG_THREAD
-       lwz     r0,KSP_LIMIT(r8)
-       stw     r0,SAVED_KSP_LIMIT(r11)
-       rlwinm  r0,r1,0,0,(31 - THREAD_SHIFT)
-       stw     r0,KSP_LIMIT(r8)
-       /* fall through */
-_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
-#endif
-
-#ifdef CONFIG_40x
-       .globl  crit_transfer_to_handler
-crit_transfer_to_handler:
-       lwz     r0,crit_r10@l(0)
-       stw     r0,GPR10(r11)
-       lwz     r0,crit_r11@l(0)
-       stw     r0,GPR11(r11)
-       mfspr   r0,SPRN_SRR0
-       stw     r0,crit_srr0@l(0)
-       mfspr   r0,SPRN_SRR1
-       stw     r0,crit_srr1@l(0)
-
-       /* set the stack limit to the current stack */
-       mfspr   r8,SPRN_SPRG_THREAD
-       lwz     r0,KSP_LIMIT(r8)
-       stw     r0,saved_ksp_limit@l(0)
-       rlwinm  r0,r1,0,0,(31 - THREAD_SHIFT)
-       stw     r0,KSP_LIMIT(r8)
-       /* fall through */
-_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
-#endif
-
-/*
- * This code finishes saving the registers to the exception frame
- * and jumps to the appropriate handler for the exception, turning
- * on address translation.
- * Note that we rely on the caller having set cr0.eq iff the exception
- * occurred in kernel mode (i.e. MSR:PR = 0).
- */
-       .globl  transfer_to_handler_full
-transfer_to_handler_full:
-       SAVE_NVGPRS(r11)
-_ASM_NOKPROBE_SYMBOL(transfer_to_handler_full)
-       /* fall through */
-
-       .globl  transfer_to_handler
-transfer_to_handler:
-       stw     r2,GPR2(r11)
-       stw     r12,_NIP(r11)
-       stw     r9,_MSR(r11)
-       andi.   r2,r9,MSR_PR
-       mfctr   r12
-       mfspr   r2,SPRN_XER
-       stw     r12,_CTR(r11)
-       stw     r2,_XER(r11)
-       mfspr   r12,SPRN_SPRG_THREAD
-       tovirt_vmstack r12, r12
-       beq     2f                      /* if from user, fix up THREAD.regs */
-       addi    r2, r12, -THREAD
-       addi    r11,r1,STACK_FRAME_OVERHEAD
-       stw     r11,PT_REGS(r12)
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-       /* Check to see if the dbcr0 register is set up to debug.  Use the
-          internal debug mode bit to do this. */
-       lwz     r12,THREAD_DBCR0(r12)
-       andis.  r12,r12,DBCR0_IDM@h
-#endif
-       ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
-#ifdef CONFIG_PPC_BOOK3S_32
-       kuep_lock r11, r12
-#endif
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-       beq+    3f
-       /* From user and task is ptraced - load up global dbcr0 */
-       li      r12,-1                  /* clear all pending debug events */
-       mtspr   SPRN_DBSR,r12
-       lis     r11,global_dbcr0@ha
-       tophys(r11,r11)
-       addi    r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
-       lwz     r9,TASK_CPU(r2)
-       slwi    r9,r9,2
-       add     r11,r11,r9
-#endif
-       lwz     r12,0(r11)
-       mtspr   SPRN_DBCR0,r12
-#endif
-
-       b       3f
-
-2:     /* if from kernel, check interrupted DOZE/NAP mode and
-         * check for stack overflow
-         */
-       kuap_save_and_lock r11, r12, r9, r2, r6
-       addi    r2, r12, -THREAD
-#ifndef CONFIG_VMAP_STACK
-       lwz     r9,KSP_LIMIT(r12)
-       cmplw   r1,r9                   /* if r1 <= ksp_limit */
-       ble-    stack_ovf               /* then the kernel stack overflowed */
-#endif
-5:
 #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
+       .globl  prepare_transfer_to_handler
+prepare_transfer_to_handler:
+       /* if from kernel, check interrupted DOZE/NAP mode */
        lwz     r12,TI_LOCAL_FLAGS(r2)
        mtcrf   0x01,r12
        bt-     31-TLF_NAPPING,4f
        bt-     31-TLF_SLEEPING,7f
-#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
-       .globl transfer_to_handler_cont
-transfer_to_handler_cont:
-3:
-       mflr    r9
-       tovirt_novmstack r2, r2         /* set r2 to current */
-       tovirt_vmstack r9, r9
-       lwz     r11,0(r9)               /* virtual address of handler */
-       lwz     r9,4(r9)                /* where to go when done */
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-       mtspr   SPRN_NRI, r0
-#endif
-#ifdef CONFIG_TRACE_IRQFLAGS
-       /*
-        * When tracing IRQ state (lockdep) we enable the MMU before we call
-        * the IRQ tracing functions as they might access vmalloc space or
-        * perform IOs for console output.
-        *
-        * To speed up the syscall path where interrupts stay on, let's check
-        * first if we are changing the MSR value at all.
-        */
-       tophys_novmstack r12, r1
-       lwz     r12,_MSR(r12)
-       andi.   r12,r12,MSR_EE
-       bne     1f
-
-       /* MSR isn't changing, just transition directly */
-#endif
-       mtspr   SPRN_SRR0,r11
-       mtspr   SPRN_SRR1,r10
-       mtlr    r9
-       rfi                             /* jump to handler, enable MMU */
-#ifdef CONFIG_40x
-       b .     /* Prevent prefetch past rfi */
-#endif
+       blr
 
-#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
 4:     rlwinm  r12,r12,0,~_TLF_NAPPING
        stw     r12,TI_LOCAL_FLAGS(r2)
        b       power_save_ppc32_restore
@@ -246,97 +67,18 @@ transfer_to_handler_cont:
        lwz     r9,_MSR(r11)            /* if sleeping, clear MSR.EE */
        rlwinm  r9,r9,0,~MSR_EE
        lwz     r12,_LINK(r11)          /* and return to address in LR */
-       kuap_restore r11, r2, r3, r4, r5
        lwz     r2, GPR2(r11)
        b       fast_exception_return
-#endif
-_ASM_NOKPROBE_SYMBOL(transfer_to_handler)
-_ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-1:     /* MSR is changing, re-enable MMU so we can notify lockdep. We need to
-        * keep interrupts disabled at this point otherwise we might risk
-        * taking an interrupt before we tell lockdep they are enabled.
-        */
-       lis     r12,reenable_mmu@h
-       ori     r12,r12,reenable_mmu@l
-       LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
-       mtspr   SPRN_SRR0,r12
-       mtspr   SPRN_SRR1,r0
-       rfi
-#ifdef CONFIG_40x
-       b .     /* Prevent prefetch past rfi */
-#endif
-
-reenable_mmu:
-       /*
-        * We save a bunch of GPRs,
-        * r3 can be different from GPR3(r1) at this point, r9 and r11
-        * contains the old MSR and handler address respectively,
-        * r0, r4-r8, r12, CCR, CTR, XER etc... are left
-        * clobbered as they aren't useful past this point.
-        */
-
-       stwu    r1,-32(r1)
-       stw     r9,8(r1)
-       stw     r11,12(r1)
-       stw     r3,16(r1)
-
-       /* If we are disabling interrupts (normal case), simply log it with
-        * lockdep
-        */
-1:     bl      trace_hardirqs_off
-       lwz     r3,16(r1)
-       lwz     r11,12(r1)
-       lwz     r9,8(r1)
-       addi    r1,r1,32
-       mtctr   r11
-       mtlr    r9
-       bctr                            /* jump to handler */
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
-#ifndef CONFIG_VMAP_STACK
-/*
- * On kernel stack overflow, load up an initial stack pointer
- * and call StackOverflow(regs), which should not return.
- */
-stack_ovf:
-       /* sometimes we use a statically-allocated stack, which is OK. */
-       lis     r12,_end@h
-       ori     r12,r12,_end@l
-       cmplw   r1,r12
-       ble     5b                      /* r1 <= &_end is OK */
-       SAVE_NVGPRS(r11)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       lis     r1,init_thread_union@ha
-       addi    r1,r1,init_thread_union@l
-       addi    r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-       lis     r9,StackOverflow@ha
-       addi    r9,r9,StackOverflow@l
-       LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-       mtspr   SPRN_NRI, r0
-#endif
-       mtspr   SPRN_SRR0,r9
-       mtspr   SPRN_SRR1,r10
-       rfi
-#ifdef CONFIG_40x
-       b .     /* Prevent prefetch past rfi */
-#endif
-_ASM_NOKPROBE_SYMBOL(stack_ovf)
-#endif
+_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
+#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
 
        .globl  transfer_to_syscall
 transfer_to_syscall:
        SAVE_NVGPRS(r1)
-#ifdef CONFIG_PPC_BOOK3S_32
-       kuep_lock r11, r12
-#endif
 
        /* Calling convention has r9 = orig r0, r10 = regs */
        addi    r10,r1,STACK_FRAME_OVERHEAD
        mr      r9,r0
-       stw     r10,THREAD+PT_REGS(r2)
        bl      system_call_exception
 
 ret_from_syscall:
@@ -349,10 +91,6 @@ ret_from_syscall:
        cmplwi  cr0,r5,0
        bne-    2f
 #endif /* CONFIG_PPC_47x */
-#ifdef CONFIG_PPC_BOOK3S_32
-       kuep_unlock r5, r7
-#endif
-       kuap_check r2, r4
        lwz     r4,_LINK(r1)
        lwz     r5,_CCR(r1)
        mtlr    r4
@@ -411,27 +149,6 @@ ret_from_kernel_thread:
        li      r3,0
        b       ret_from_syscall
 
-/*
- * Top-level page fault handling.
- * This is in assembler because if do_page_fault tells us that
- * it is a bad kernel page fault, we want to save the non-volatile
- * registers before calling bad_page_fault.
- */
-       .globl  handle_page_fault
-handle_page_fault:
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      do_page_fault
-       cmpwi   r3,0
-       beq+    ret_from_except
-       SAVE_NVGPRS(r1)
-       lwz     r0,_TRAP(r1)
-       clrrwi  r0,r0,1
-       stw     r0,_TRAP(r1)
-       mr      r4,r3           /* err arg for bad_page_fault */
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      __bad_page_fault
-       b       ret_from_except_full
-
 /*
  * This routine switches between two different tasks.  The process
  * state of one is saved on its kernel stack.  Then the state
@@ -485,7 +202,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
        stw     r10,_CCR(r1)
        stw     r1,KSP(r3)      /* Set old stack pointer */
 
-       kuap_check r2, r0
 #ifdef CONFIG_SMP
        /* We need a sync somewhere here to make sure that if the
         * previous task gets rescheduled on another CPU, it sees all
@@ -529,12 +245,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
 fast_exception_return:
 #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
        andi.   r10,r9,MSR_RI           /* check for recoverable interrupt */
-       beq     1f                      /* if not, we've got problems */
+       beq     3f                      /* if not, we've got problems */
 #endif
 
 2:     REST_4GPRS(3, r11)
        lwz     r10,_CCR(r11)
-       REST_GPR(1, r11)
+       REST_2GPRS(1, r11)
        mtcr    r10
        lwz     r10,_LINK(r11)
        mtlr    r10
@@ -556,257 +272,147 @@ fast_exception_return:
 #endif
 _ASM_NOKPROBE_SYMBOL(fast_exception_return)
 
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-/* check if the exception happened in a restartable section */
-1:     lis     r3,exc_exit_restart_end@ha
-       addi    r3,r3,exc_exit_restart_end@l
-       cmplw   r12,r3
-       bge     3f
-       lis     r4,exc_exit_restart@ha
-       addi    r4,r4,exc_exit_restart@l
-       cmplw   r12,r4
-       blt     3f
-       lis     r3,fee_restarts@ha
-       tophys(r3,r3)
-       lwz     r5,fee_restarts@l(r3)
-       addi    r5,r5,1
-       stw     r5,fee_restarts@l(r3)
-       mr      r12,r4          /* restart at exc_exit_restart */
-       b       2b
-
-       .section .bss
-       .align  2
-fee_restarts:
-       .space  4
-       .previous
-
 /* aargh, a nonrecoverable interrupt, panic */
 /* aargh, we don't know which trap this is */
 3:
        li      r10,-1
        stw     r10,_TRAP(r11)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       lis     r10,MSR_KERNEL@h
-       ori     r10,r10,MSR_KERNEL@l
-       bl      transfer_to_handler_full
-       .long   unrecoverable_exception
-       .long   ret_from_except
-#endif
-
-       .globl  ret_from_except_full
-ret_from_except_full:
-       REST_NVGPRS(r1)
-       /* fall through */
-
-       .globl  ret_from_except
-ret_from_except:
-       /* Hard-disable interrupts so that current_thread_info()->flags
-        * can't change between when we test it and when we return
-        * from the interrupt. */
-       /* Note: We don't bother telling lockdep about it */
-       LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-       mtmsr   r10             /* disable interrupts */
-
-       lwz     r3,_MSR(r1)     /* Returning to user mode? */
-       andi.   r0,r3,MSR_PR
-       beq     resume_kernel
-
-user_exc_return:               /* r10 contains MSR_KERNEL here */
-       /* Check current_thread_info()->flags */
-       lwz     r9,TI_FLAGS(r2)
-       andi.   r0,r9,_TIF_USER_WORK_MASK
-       bne     do_work
+       prepare_transfer_to_handler
+       bl      unrecoverable_exception
+       trap    /* should not get here */
 
-restore_user:
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-       /* Check whether this process has its own DBCR0 value.  The internal
-          debug mode bit tells us that dbcr0 should be loaded. */
-       lwz     r0,THREAD+THREAD_DBCR0(r2)
-       andis.  r10,r0,DBCR0_IDM@h
-       bnel-   load_dbcr0
-#endif
-       ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
-#ifdef CONFIG_PPC_BOOK3S_32
-       kuep_unlock     r10, r11
-#endif
+       .globl interrupt_return
+interrupt_return:
+       lwz     r4,_MSR(r1)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       andi.   r0,r4,MSR_PR
+       beq     .Lkernel_interrupt_return
+       bl      interrupt_exit_user_prepare
+       cmpwi   r3,0
+       bne-    .Lrestore_nvgprs
 
-       b       restore
+.Lfast_user_interrupt_return:
+       lwz     r11,_NIP(r1)
+       lwz     r12,_MSR(r1)
+       mtspr   SPRN_SRR0,r11
+       mtspr   SPRN_SRR1,r12
 
-/* N.B. the only way to get here is from the beq following ret_from_except. */
-resume_kernel:
-       /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
-       lwz     r8,TI_FLAGS(r2)
-       andis.  r0,r8,_TIF_EMULATE_STACK_STORE@h
-       beq+    1f
+BEGIN_FTR_SECTION
+       stwcx.  r0,0,r1         /* to clear the reservation */
+FTR_SECTION_ELSE
+       lwarx   r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
-       addi    r8,r1,INT_FRAME_SIZE    /* Get the kprobed function entry */
+       lwz     r3,_CCR(r1)
+       lwz     r4,_LINK(r1)
+       lwz     r5,_CTR(r1)
+       lwz     r6,_XER(r1)
+       li      r0,0
 
-       lwz     r3,GPR1(r1)
-       subi    r3,r3,INT_FRAME_SIZE    /* dst: Allocate a trampoline exception frame */
-       mr      r4,r1                   /* src:  current exception frame */
-       mr      r1,r3                   /* Reroute the trampoline frame to r1 */
+       /*
+        * Leaving a stale exception_marker on the stack can confuse
+        * the reliable stack unwinder later on. Clear it.
+        */
+       stw     r0,8(r1)
+       REST_4GPRS(7, r1)
+       REST_2GPRS(11, r1)
 
-       /* Copy from the original to the trampoline. */
-       li      r5,INT_FRAME_SIZE/4     /* size: INT_FRAME_SIZE */
-       li      r6,0                    /* start offset: 0 */
+       mtcr    r3
+       mtlr    r4
        mtctr   r5
-2:     lwzx    r0,r6,r4
-       stwx    r0,r6,r3
-       addi    r6,r6,4
-       bdnz    2b
-
-       /* Do real store operation to complete stwu */
-       lwz     r5,GPR1(r1)
-       stw     r8,0(r5)
+       mtspr   SPRN_XER,r6
 
-       /* Clear _TIF_EMULATE_STACK_STORE flag */
-       lis     r11,_TIF_EMULATE_STACK_STORE@h
-       addi    r5,r2,TI_FLAGS
-0:     lwarx   r8,0,r5
-       andc    r8,r8,r11
-       stwcx.  r8,0,r5
-       bne-    0b
-1:
-
-#ifdef CONFIG_PREEMPTION
-       /* check current_thread_info->preempt_count */
-       lwz     r0,TI_PREEMPT(r2)
-       cmpwi   0,r0,0          /* if non-zero, just restore regs and return */
-       bne     restore_kuap
-       andi.   r8,r8,_TIF_NEED_RESCHED
-       beq+    restore_kuap
-       lwz     r3,_MSR(r1)
-       andi.   r0,r3,MSR_EE    /* interrupts off? */
-       beq     restore_kuap    /* don't schedule if so */
-#ifdef CONFIG_TRACE_IRQFLAGS
-       /* Lockdep thinks irqs are enabled, we need to call
-        * preempt_schedule_irq with IRQs off, so we inform lockdep
-        * now that we -did- turn them off already
-        */
-       bl      trace_hardirqs_off
-#endif
-       bl      preempt_schedule_irq
-#ifdef CONFIG_TRACE_IRQFLAGS
-       /* And now, to properly rebalance the above, we tell lockdep they
-        * are being turned back on, which will happen when we return
-        */
-       bl      trace_hardirqs_on
+       REST_4GPRS(2, r1)
+       REST_GPR(6, r1)
+       REST_GPR(0, r1)
+       REST_GPR(1, r1)
+       rfi
+#ifdef CONFIG_40x
+       b .     /* Prevent prefetch past rfi */
 #endif
-#endif /* CONFIG_PREEMPTION */
-restore_kuap:
-       kuap_restore r1, r2, r9, r10, r0
-
-       /* interrupts are hard-disabled at this point */
-restore:
-#if defined(CONFIG_44x) && !defined(CONFIG_PPC_47x)
-       lis     r4,icache_44x_need_flush@ha
-       lwz     r5,icache_44x_need_flush@l(r4)
-       cmplwi  cr0,r5,0
-       beq+    1f
-       li      r6,0
-       iccci   r0,r0
-       stw     r6,icache_44x_need_flush@l(r4)
-1:
-#endif  /* CONFIG_44x */
 
-       lwz     r9,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
-       /* Lockdep doesn't know about the fact that IRQs are temporarily turned
-        * off in this assembly code while peeking at TI_FLAGS() and such. However
-        * we need to inform it if the exception turned interrupts off, and we
-        * are about to trun them back on.
-        */
-       andi.   r10,r9,MSR_EE
-       beq     1f
-       stwu    r1,-32(r1)
-       mflr    r0
-       stw     r0,4(r1)
-       bl      trace_hardirqs_on
-       addi    r1, r1, 32
-       lwz     r9,_MSR(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
+.Lrestore_nvgprs:
+       REST_NVGPRS(r1)
+       b       .Lfast_user_interrupt_return
 
-       lwz     r0,GPR0(r1)
-       lwz     r2,GPR2(r1)
-       REST_4GPRS(3, r1)
-       REST_2GPRS(7, r1)
+.Lkernel_interrupt_return:
+       bl      interrupt_exit_kernel_prepare
 
-       lwz     r10,_XER(r1)
-       lwz     r11,_CTR(r1)
-       mtspr   SPRN_XER,r10
-       mtctr   r11
+.Lfast_kernel_interrupt_return:
+       cmpwi   cr1,r3,0
+       lwz     r11,_NIP(r1)
+       lwz     r12,_MSR(r1)
+       mtspr   SPRN_SRR0,r11
+       mtspr   SPRN_SRR1,r12
 
 BEGIN_FTR_SECTION
-       lwarx   r11,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
-       stwcx.  r0,0,r1                 /* to clear the reservation */
+       stwcx.  r0,0,r1         /* to clear the reservation */
+FTR_SECTION_ELSE
+       lwarx   r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-       andi.   r10,r9,MSR_RI           /* check if this exception occurred */
-       beql    nonrecoverable          /* at a bad place (MSR:RI = 0) */
+       lwz     r3,_LINK(r1)
+       lwz     r4,_CTR(r1)
+       lwz     r5,_XER(r1)
+       lwz     r6,_CCR(r1)
+       li      r0,0
+
+       REST_4GPRS(7, r1)
+       REST_2GPRS(11, r1)
 
-       lwz     r10,_CCR(r1)
-       lwz     r11,_LINK(r1)
-       mtcrf   0xFF,r10
-       mtlr    r11
+       mtlr    r3
+       mtctr   r4
+       mtspr   SPRN_XER,r5
 
-       /* Clear the exception_marker on the stack to avoid confusing stacktrace */
-       li      r10, 0
-       stw     r10, 8(r1)
        /*
-        * Once we put values in SRR0 and SRR1, we are in a state
-        * where exceptions are not recoverable, since taking an
-        * exception will trash SRR0 and SRR1.  Therefore we clear the
-        * MSR:RI bit to indicate this.  If we do take an exception,
-        * we can't return to the point of the exception but we
-        * can restart the exception exit path at the label
-        * exc_exit_restart below.  -- paulus
+        * Leaving a stale exception_marker on the stack can confuse
+        * the reliable stack unwinder later on. Clear it.
         */
-       LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
-       mtmsr   r10             /* clear the RI bit */
-       .globl exc_exit_restart
-exc_exit_restart:
-       lwz     r12,_NIP(r1)
-       mtspr   SPRN_SRR0,r12
-       mtspr   SPRN_SRR1,r9
-       REST_4GPRS(9, r1)
-       lwz     r1,GPR1(r1)
-       .globl exc_exit_restart_end
-exc_exit_restart_end:
+       stw     r0,8(r1)
+
+       REST_4GPRS(2, r1)
+
+       bne-    cr1,1f /* emulate stack store */
+       mtcr    r6
+       REST_GPR(6, r1)
+       REST_GPR(0, r1)
+       REST_GPR(1, r1)
        rfi
-_ASM_NOKPROBE_SYMBOL(exc_exit_restart)
-_ASM_NOKPROBE_SYMBOL(exc_exit_restart_end)
+#ifdef CONFIG_40x
+       b .     /* Prevent prefetch past rfi */
+#endif
 
-#else /* !(CONFIG_4xx || CONFIG_BOOKE) */
-       /*
-        * This is a bit different on 4xx/Book-E because it doesn't have
-        * the RI bit in the MSR.
-        * The TLB miss handler checks if we have interrupted
-        * the exception exit path and restarts it if so
-        * (well maybe one day it will... :).
+1:     /*
+        * Emulate stack store with update. New r1 value was already calculated
+        * and updated in our interrupt regs by emulate_loadstore, but we can't
+        * store the previous value of r1 to the stack before re-loading our
+        * registers from it, otherwise they could be clobbered.  Use
+        * SPRG Scratch0 as temporary storage to hold the store
+        * data, as interrupts are disabled here so it won't be clobbered.
         */
-       lwz     r11,_LINK(r1)
-       mtlr    r11
-       lwz     r10,_CCR(r1)
-       mtcrf   0xff,r10
-       /* Clear the exception_marker on the stack to avoid confusing stacktrace */
-       li      r10, 0
-       stw     r10, 8(r1)
-       REST_2GPRS(9, r1)
-       .globl exc_exit_restart
-exc_exit_restart:
-       lwz     r11,_NIP(r1)
-       lwz     r12,_MSR(r1)
-       mtspr   SPRN_SRR0,r11
-       mtspr   SPRN_SRR1,r12
-       REST_2GPRS(11, r1)
-       lwz     r1,GPR1(r1)
-       .globl exc_exit_restart_end
-exc_exit_restart_end:
+       mtcr    r6
+#ifdef CONFIG_BOOKE
+       mtspr   SPRN_SPRG_WSCRATCH0, r9
+#else
+       mtspr   SPRN_SPRG_SCRATCH0, r9
+#endif
+       addi    r9,r1,INT_FRAME_SIZE /* get original r1 */
+       REST_GPR(6, r1)
+       REST_GPR(0, r1)
+       REST_GPR(1, r1)
+       stw     r9,0(r1) /* perform store component of stwu */
+#ifdef CONFIG_BOOKE
+       mfspr   r9, SPRN_SPRG_RSCRATCH0
+#else
+       mfspr   r9, SPRN_SPRG_SCRATCH0
+#endif
        rfi
-       b       .                       /* prevent prefetch past rfi */
-_ASM_NOKPROBE_SYMBOL(exc_exit_restart)
+#ifdef CONFIG_40x
+       b .     /* Prevent prefetch past rfi */
+#endif
+_ASM_NOKPROBE_SYMBOL(interrupt_return)
+
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
 
 /*
  * Returning from a critical interrupt in user mode doesn't need
@@ -837,8 +443,7 @@ _ASM_NOKPROBE_SYMBOL(exc_exit_restart)
        REST_NVGPRS(r1);                                                \
        lwz     r3,_MSR(r1);                                            \
        andi.   r3,r3,MSR_PR;                                           \
-       LOAD_REG_IMMEDIATE(r10,MSR_KERNEL);                             \
-       bne     user_exc_return;                                        \
+       bne     interrupt_return;                                       \
        lwz     r0,GPR0(r1);                                            \
        lwz     r2,GPR2(r1);                                            \
        REST_4GPRS(3, r1);                                              \
@@ -906,11 +511,6 @@ _ASM_NOKPROBE_SYMBOL(exc_exit_restart)
 #ifdef CONFIG_40x
        .globl  ret_from_crit_exc
 ret_from_crit_exc:
-       mfspr   r9,SPRN_SPRG_THREAD
-       lis     r10,saved_ksp_limit@ha;
-       lwz     r10,saved_ksp_limit@l(r10);
-       tovirt(r9,r9);
-       stw     r10,KSP_LIMIT(r9)
        lis     r9,crit_srr0@ha;
        lwz     r9,crit_srr0@l(r9);
        lis     r10,crit_srr1@ha;
@@ -924,9 +524,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
 #ifdef CONFIG_BOOKE
        .globl  ret_from_crit_exc
 ret_from_crit_exc:
-       mfspr   r9,SPRN_SPRG_THREAD
-       lwz     r10,SAVED_KSP_LIMIT(r1)
-       stw     r10,KSP_LIMIT(r9)
        RESTORE_xSRR(SRR0,SRR1);
        RESTORE_MMU_REGS;
        RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
@@ -934,9 +531,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
 
        .globl  ret_from_debug_exc
 ret_from_debug_exc:
-       mfspr   r9,SPRN_SPRG_THREAD
-       lwz     r10,SAVED_KSP_LIMIT(r1)
-       stw     r10,KSP_LIMIT(r9)
        RESTORE_xSRR(SRR0,SRR1);
        RESTORE_xSRR(CSRR0,CSRR1);
        RESTORE_MMU_REGS;
@@ -945,9 +539,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_debug_exc)
 
        .globl  ret_from_mcheck_exc
 ret_from_mcheck_exc:
-       mfspr   r9,SPRN_SPRG_THREAD
-       lwz     r10,SAVED_KSP_LIMIT(r1)
-       stw     r10,KSP_LIMIT(r9)
        RESTORE_xSRR(SRR0,SRR1);
        RESTORE_xSRR(CSRR0,CSRR1);
        RESTORE_xSRR(DSRR0,DSRR1);
@@ -955,121 +546,8 @@ ret_from_mcheck_exc:
        RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI)
 _ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc)
 #endif /* CONFIG_BOOKE */
-
-/*
- * Load the DBCR0 value for a task that is being ptraced,
- * having first saved away the global DBCR0.  Note that r0
- * has the dbcr0 value to set upon entry to this.
- */
-load_dbcr0:
-       mfmsr   r10             /* first disable debug exceptions */
-       rlwinm  r10,r10,0,~MSR_DE
-       mtmsr   r10
-       isync
-       mfspr   r10,SPRN_DBCR0
-       lis     r11,global_dbcr0@ha
-       addi    r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
-       lwz     r9,TASK_CPU(r2)
-       slwi    r9,r9,2
-       add     r11,r11,r9
-#endif
-       stw     r10,0(r11)
-       mtspr   SPRN_DBCR0,r0
-       li      r11,-1
-       mtspr   SPRN_DBSR,r11   /* clear all pending debug events */
-       blr
-
-       .section .bss
-       .align  4
-       .global global_dbcr0
-global_dbcr0:
-       .space  4*NR_CPUS
-       .previous
 #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
 
-do_work:                       /* r10 contains MSR_KERNEL here */
-       andi.   r0,r9,_TIF_NEED_RESCHED
-       beq     do_user_signal
-
-do_resched:                    /* r10 contains MSR_KERNEL here */
-#ifdef CONFIG_TRACE_IRQFLAGS
-       bl      trace_hardirqs_on
-       mfmsr   r10
-#endif
-       ori     r10,r10,MSR_EE
-       mtmsr   r10             /* hard-enable interrupts */
-       bl      schedule
-recheck:
-       /* Note: And we don't tell it we are disabling them again
-        * neither. Those disable/enable cycles used to peek at
-        * TI_FLAGS aren't advertised.
-        */
-       LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-       mtmsr   r10             /* disable interrupts */
-       lwz     r9,TI_FLAGS(r2)
-       andi.   r0,r9,_TIF_NEED_RESCHED
-       bne-    do_resched
-       andi.   r0,r9,_TIF_USER_WORK_MASK
-       beq     restore_user
-do_user_signal:                        /* r10 contains MSR_KERNEL here */
-       ori     r10,r10,MSR_EE
-       mtmsr   r10             /* hard-enable interrupts */
-       /* save r13-r31 in the exception frame, if not already done */
-       lwz     r3,_TRAP(r1)
-       andi.   r0,r3,1
-       beq     2f
-       SAVE_NVGPRS(r1)
-       rlwinm  r3,r3,0,0,30
-       stw     r3,_TRAP(r1)
-2:     addi    r3,r1,STACK_FRAME_OVERHEAD
-       mr      r4,r9
-       bl      do_notify_resume
-       REST_NVGPRS(r1)
-       b       recheck
-
-/*
- * We come here when we are at the end of handling an exception
- * that occurred at a place where taking an exception will lose
- * state information, such as the contents of SRR0 and SRR1.
- */
-nonrecoverable:
-       lis     r10,exc_exit_restart_end@ha
-       addi    r10,r10,exc_exit_restart_end@l
-       cmplw   r12,r10
-       bge     3f
-       lis     r11,exc_exit_restart@ha
-       addi    r11,r11,exc_exit_restart@l
-       cmplw   r12,r11
-       blt     3f
-       lis     r10,ee_restarts@ha
-       lwz     r12,ee_restarts@l(r10)
-       addi    r12,r12,1
-       stw     r12,ee_restarts@l(r10)
-       mr      r12,r11         /* restart at exc_exit_restart */
-       blr
-3:     /* OK, we can't recover, kill this process */
-       lwz     r3,_TRAP(r1)
-       andi.   r0,r3,1
-       beq     5f
-       SAVE_NVGPRS(r1)
-       rlwinm  r3,r3,0,0,30
-       stw     r3,_TRAP(r1)
-5:     mfspr   r2,SPRN_SPRG_THREAD
-       addi    r2,r2,-THREAD
-       tovirt(r2,r2)                   /* set back r2 to current */
-4:     addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      unrecoverable_exception
-       /* shouldn't return */
-       b       4b
-_ASM_NOKPROBE_SYMBOL(nonrecoverable)
-
-       .section .bss
-       .align  2
-ee_restarts:
-       .space  4
-       .previous
-
 /*
  * PROM code for specific machines follows.  Put it
  * here so it's easy to add arch-specific sections later.
@@ -1088,7 +566,6 @@ _GLOBAL(enter_rtas)
        lis     r6,1f@ha        /* physical return address for rtas */
        addi    r6,r6,1f@l
        tophys(r6,r6)
-       tophys_novmstack r7, r1
        lwz     r8,RTASENTRY(r4)
        lwz     r4,RTASBASE(r4)
        mfmsr   r9
@@ -1097,24 +574,25 @@ _GLOBAL(enter_rtas)
        mtmsr   r0      /* disable interrupts so SRR0/1 don't get trashed */
        li      r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
        mtlr    r6
-       stw     r7, THREAD + RTAS_SP(r2)
+       stw     r1, THREAD + RTAS_SP(r2)
        mtspr   SPRN_SRR0,r8
        mtspr   SPRN_SRR1,r9
        rfi
-1:     tophys_novmstack r9, r1
-#ifdef CONFIG_VMAP_STACK
-       li      r0, MSR_KERNEL & ~MSR_IR        /* can take DTLB miss */
-       mtmsr   r0
-       isync
-#endif
-       lwz     r8,INT_FRAME_SIZE+4(r9) /* get return address */
-       lwz     r9,8(r9)        /* original msr value */
-       addi    r1,r1,INT_FRAME_SIZE
-       li      r0,0
-       tophys_novmstack r7, r2
-       stw     r0, THREAD + RTAS_SP(r7)
+1:
+       lis     r8, 1f@h
+       ori     r8, r8, 1f@l
+       LOAD_REG_IMMEDIATE(r9,MSR_KERNEL)
        mtspr   SPRN_SRR0,r8
        mtspr   SPRN_SRR1,r9
-       rfi                     /* return to caller */
+       rfi                     /* Reactivate MMU translation */
+1:
+       lwz     r8,INT_FRAME_SIZE+4(r1) /* get return address */
+       lwz     r9,8(r1)        /* original msr value */
+       addi    r1,r1,INT_FRAME_SIZE
+       li      r0,0
+       stw     r0, THREAD + RTAS_SP(r2)
+       mtlr    r8
+       mtmsr   r9
+       blr                     /* return to caller */
 _ASM_NOKPROBE_SYMBOL(enter_rtas)
 #endif /* CONFIG_PPC_RTAS */
index 6c4d9e2..0372730 100644 (file)
@@ -117,13 +117,12 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
        /*
-        * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
-        * would clobber syscall parameters. Also we always enter with IRQs
-        * enabled and nothing pending. system_call_exception() will call
-        * trace_hardirqs_off().
-        *
-        * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The
-        * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED.
+        * scv enters with MSR[EE]=1 and is immediately considered soft-masked.
+        * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED,
+        * and interrupts may be masked and pending already.
+        * system_call_exception() will call trace_hardirqs_off() which means
+        * interrupts could already have been blocked before trace_hardirqs_off,
+        * but this is the best we can do.
         */
 
        /* Calling convention has r9 = orig r0, r10 = regs */
@@ -288,9 +287,8 @@ END_BTB_FLUSH_SECTION
        std     r11,-16(r10)            /* "regshere" marker */
 
        /*
-        * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
-        * would clobber syscall parameters. Also we always enter with IRQs
-        * enabled and nothing pending. system_call_exception() will call
+        * We always enter kernel from userspace with irq soft-mask enabled and
+        * nothing pending. system_call_exception() will call
         * trace_hardirqs_off().
         */
        li      r11,IRQS_ALL_DISABLED
@@ -417,19 +415,6 @@ _GLOBAL(ret_from_kernel_thread)
        li      r3,0
        b       .Lsyscall_exit
 
-#ifdef CONFIG_PPC_BOOK3E
-/* Save non-volatile GPRs, if not already saved. */
-_GLOBAL(save_nvgprs)
-       ld      r11,_TRAP(r1)
-       andi.   r0,r11,1
-       beqlr-
-       SAVE_NVGPRS(r1)
-       clrrdi  r0,r11,1
-       std     r0,_TRAP(r1)
-       blr
-_ASM_NOKPROBE_SYMBOL(save_nvgprs);
-#endif
-
 #ifdef CONFIG_PPC_BOOK3S_64
 
 #define FLUSH_COUNT_CACHE      \
@@ -645,7 +630,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        addi    r1,r1,SWITCH_FRAME_SIZE
        blr
 
-#ifdef CONFIG_PPC_BOOK3S
        /*
         * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
         * touched, no exit work created, then this can be used.
@@ -657,6 +641,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return)
        kuap_check_amr r3, r4
        ld      r5,_MSR(r1)
        andi.   r0,r5,MSR_PR
+#ifdef CONFIG_PPC_BOOK3S
        bne     .Lfast_user_interrupt_return_amr
        kuap_kernel_restore r3, r4
        andi.   r0,r5,MSR_RI
@@ -665,6 +650,10 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      unrecoverable_exception
        b       . /* should not get here */
+#else
+       bne     .Lfast_user_interrupt_return
+       b       .Lfast_kernel_interrupt_return
+#endif
 
        .balign IFETCH_ALIGN_BYTES
        .globl interrupt_return
@@ -678,8 +667,10 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return)
        cmpdi   r3,0
        bne-    .Lrestore_nvgprs
 
+#ifdef CONFIG_PPC_BOOK3S
 .Lfast_user_interrupt_return_amr:
        kuap_user_restore r3, r4
+#endif
 .Lfast_user_interrupt_return:
        ld      r11,_NIP(r1)
        ld      r12,_MSR(r1)
@@ -788,7 +779,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
        RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
-#endif /* CONFIG_PPC_BOOK3S */
 
 #ifdef CONFIG_PPC_RTAS
 /*
index e8eb999..7c3654b 100644 (file)
@@ -63,9 +63,6 @@
        ld      reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
 
 special_reg_save:
-       lbz     r9,PACAIRQHAPPENED(r13)
-       RECONCILE_IRQ_STATE(r3,r4)
-
        /*
         * We only need (or have stack space) to save this stuff if
         * we interrupted the kernel.
@@ -119,15 +116,11 @@ BEGIN_FTR_SECTION
        mtspr   SPRN_MAS5,r10
        mtspr   SPRN_MAS8,r10
 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
-       SPECIAL_EXC_STORE(r9,IRQHAPPENED)
-
        mfspr   r10,SPRN_DEAR
        SPECIAL_EXC_STORE(r10,DEAR)
        mfspr   r10,SPRN_ESR
        SPECIAL_EXC_STORE(r10,ESR)
 
-       lbz     r10,PACAIRQSOFTMASK(r13)
-       SPECIAL_EXC_STORE(r10,SOFTE)
        ld      r10,_NIP(r1)
        SPECIAL_EXC_STORE(r10,CSRR0)
        ld      r10,_MSR(r1)
@@ -139,7 +132,8 @@ ret_from_level_except:
        ld      r3,_MSR(r1)
        andi.   r3,r3,MSR_PR
        beq     1f
-       b       ret_from_except
+       REST_NVGPRS(r1)
+       b       interrupt_return
 1:
 
        LOAD_REG_ADDR(r11,extlb_level_exc)
@@ -193,27 +187,6 @@ BEGIN_FTR_SECTION
        mtspr   SPRN_MAS8,r10
 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
 
-       lbz     r6,PACAIRQSOFTMASK(r13)
-       ld      r5,SOFTE(r1)
-
-       /* Interrupts had better not already be enabled... */
-       tweqi   r6,IRQS_ENABLED
-
-       andi.   r6,r5,IRQS_DISABLED
-       bne     1f
-
-       TRACE_ENABLE_INTS
-       stb     r5,PACAIRQSOFTMASK(r13)
-1:
-       /*
-        * Restore PACAIRQHAPPENED rather than setting it based on
-        * the return MSR[EE], since we could have interrupted
-        * __check_irq_replay() or other inconsistent transitory
-        * states that must remain that way.
-        */
-       SPECIAL_EXC_LOAD(r10,IRQHAPPENED)
-       stb     r10,PACAIRQHAPPENED(r13)
-
        SPECIAL_EXC_LOAD(r10,DEAR)
        mtspr   SPRN_DEAR,r10
        SPECIAL_EXC_LOAD(r10,ESR)
@@ -417,14 +390,15 @@ exc_##n##_common:                                                     \
        std     r6,_LINK(r1);                                               \
        std     r7,_CTR(r1);                                                \
        std     r8,_XER(r1);                                                \
-       li      r3,(n)+1;               /* indicate partial regs in trap */ \
+       li      r3,(n);                 /* regs.trap vector */              \
        std     r9,0(r1);               /* store stack frame back link */   \
        std     r10,_CCR(r1);           /* store orig CR in stackframe */   \
        std     r9,GPR1(r1);            /* store stack frame back link */   \
        std     r11,SOFTE(r1);          /* and save it to stackframe */     \
        std     r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */       \
        std     r3,_TRAP(r1);           /* set trap number              */  \
-       std     r0,RESULT(r1);          /* clear regs->result */
+       std     r0,RESULT(r1);          /* clear regs->result */            \
+       SAVE_NVGPRS(r1);
 
 #define EXCEPTION_COMMON(n) \
        EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
@@ -435,28 +409,6 @@ exc_##n##_common:                                                      \
 #define EXCEPTION_COMMON_DBG(n) \
        EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG)
 
-/*
- * This is meant for exceptions that don't immediately hard-enable.  We
- * set a bit in paca->irq_happened to ensure that a subsequent call to
- * arch_local_irq_restore() will properly hard-enable and avoid the
- * fast-path, and then reconcile irq state.
- */
-#define INTS_DISABLE   RECONCILE_IRQ_STATE(r3,r4)
-
-/*
- * This is called by exceptions that don't use INTS_DISABLE (that did not
- * touch irq indicators in the PACA).  This will restore MSR:EE to it's
- * previous value
- *
- * XXX In the long run, we may want to open-code it in order to separate the
- *     load from the wrtee, thus limiting the latency caused by the dependency
- *     but at this point, I'll favor code clarity until we have a near to final
- *     implementation
- */
-#define INTS_RESTORE_HARD                                                  \
-       ld      r11,_MSR(r1);                                               \
-       wrtee   r11;
-
 /* XXX FIXME: Restore r14/r15 when necessary */
 #define BAD_STACK_TRAMPOLINE(n)                                                    \
 exc_##n##_bad_stack:                                                       \
@@ -505,12 +457,11 @@ exc_##n##_bad_stack:                                                          \
        START_EXCEPTION(label);                                         \
        NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
        EXCEPTION_COMMON(trapnum)                                       \
-       INTS_DISABLE;                                                   \
        ack(r8);                                                        \
        CHECK_NAPPING();                                                \
        addi    r3,r1,STACK_FRAME_OVERHEAD;                             \
        bl      hdlr;                                                   \
-       b       ret_from_except_lite;
+       b       interrupt_return
 
 /* This value is used to mark exception frames on the stack. */
        .section        ".toc","aw"
@@ -561,11 +512,10 @@ __end_interrupts:
        CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
                              PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON_CRIT(0x100)
-       bl      save_nvgprs
        bl      special_reg_save
        CHECK_NAPPING();
        addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      unknown_exception
+       bl      unknown_nmi_exception
        b       ret_from_crit_except
 
 /* Machine Check Interrupt */
@@ -573,7 +523,6 @@ __end_interrupts:
        MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK,
                            PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON_MC(0x000)
-       bl      save_nvgprs
        bl      special_reg_save
        CHECK_NAPPING();
        addi    r3,r1,STACK_FRAME_OVERHEAD
@@ -587,7 +536,6 @@ __end_interrupts:
        mfspr   r14,SPRN_DEAR
        mfspr   r15,SPRN_ESR
        EXCEPTION_COMMON(0x300)
-       INTS_DISABLE
        b       storage_fault_common
 
 /* Instruction Storage Interrupt */
@@ -597,7 +545,6 @@ __end_interrupts:
        li      r15,0
        mr      r14,r10
        EXCEPTION_COMMON(0x400)
-       INTS_DISABLE
        b       storage_fault_common
 
 /* External Input Interrupt */
@@ -619,13 +566,12 @@ __end_interrupts:
                                PROLOG_ADDITION_1REG)
        mfspr   r14,SPRN_ESR
        EXCEPTION_COMMON(0x700)
-       INTS_DISABLE
        std     r14,_DSISR(r1)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        ld      r14,PACA_EXGEN+EX_R14(r13)
-       bl      save_nvgprs
        bl      program_check_exception
-       b       ret_from_except
+       REST_NVGPRS(r1)
+       b       interrupt_return
 
 /* Floating Point Unavailable Interrupt */
        START_EXCEPTION(fp_unavailable);
@@ -637,12 +583,10 @@ __end_interrupts:
        andi.   r0,r12,MSR_PR;
        beq-    1f
        bl      load_up_fpu
-       b       fast_exception_return
-1:     INTS_DISABLE
-       bl      save_nvgprs
-       addi    r3,r1,STACK_FRAME_OVERHEAD
+       b       fast_interrupt_return
+1:     addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      kernel_fp_unavailable_exception
-       b       ret_from_except
+       b       interrupt_return
 
 /* Altivec Unavailable Interrupt */
        START_EXCEPTION(altivec_unavailable);
@@ -656,15 +600,13 @@ BEGIN_FTR_SECTION
        andi.   r0,r12,MSR_PR;
        beq-    1f
        bl      load_up_altivec
-       b       fast_exception_return
+       b       fast_interrupt_return
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
-       INTS_DISABLE
-       bl      save_nvgprs
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      altivec_unavailable_exception
-       b       ret_from_except
+       b       interrupt_return
 
 /* AltiVec Assist */
        START_EXCEPTION(altivec_assist);
@@ -672,17 +614,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
                                BOOKE_INTERRUPT_ALTIVEC_ASSIST,
                                PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON(0x220)
-       INTS_DISABLE
-       bl      save_nvgprs
        addi    r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
        bl      altivec_assist_exception
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+       REST_NVGPRS(r1)
 #else
        bl      unknown_exception
 #endif
-       b       ret_from_except
+       b       interrupt_return
 
 
 /* Decrementer Interrupt */
@@ -698,14 +639,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
        CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
                              PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON_CRIT(0x9f0)
-       bl      save_nvgprs
        bl      special_reg_save
        CHECK_NAPPING();
        addi    r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_BOOKE_WDT
        bl      WatchdogException
 #else
-       bl      unknown_exception
+       bl      unknown_nmi_exception
 #endif
        b       ret_from_crit_except
 
@@ -722,11 +662,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
        NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
                                PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON(0xf20)
-       INTS_DISABLE
-       bl      save_nvgprs
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      unknown_exception
-       b       ret_from_except
+       b       interrupt_return
 
 /* Debug exception as a critical interrupt*/
        START_EXCEPTION(debug_crit);
@@ -792,9 +730,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        ld      r14,PACA_EXCRIT+EX_R14(r13)
        ld      r15,PACA_EXCRIT+EX_R15(r13)
-       bl      save_nvgprs
        bl      DebugException
-       b       ret_from_except
+       REST_NVGPRS(r1)
+       b       interrupt_return
 
 kernel_dbg_exc:
        b       .       /* NYI */
@@ -859,24 +797,22 @@ kernel_dbg_exc:
         */
        mfspr   r14,SPRN_DBSR
        EXCEPTION_COMMON_DBG(0xd08)
-       INTS_DISABLE
        std     r14,_DSISR(r1)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        ld      r14,PACA_EXDBG+EX_R14(r13)
        ld      r15,PACA_EXDBG+EX_R15(r13)
-       bl      save_nvgprs
        bl      DebugException
-       b       ret_from_except
+       REST_NVGPRS(r1)
+       b       interrupt_return
 
        START_EXCEPTION(perfmon);
        NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
                                PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON(0x260)
-       INTS_DISABLE
        CHECK_NAPPING()
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      performance_monitor_exception
-       b       ret_from_except_lite
+       b       interrupt_return
 
 /* Doorbell interrupt */
        MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
@@ -887,11 +823,10 @@ kernel_dbg_exc:
        CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
                              PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON_CRIT(0x2a0)
-       bl      save_nvgprs
        bl      special_reg_save
        CHECK_NAPPING();
        addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      unknown_exception
+       bl      unknown_nmi_exception
        b       ret_from_crit_except
 
 /*
@@ -903,21 +838,18 @@ kernel_dbg_exc:
                                PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON(0x2c0)
        addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      save_nvgprs
-       INTS_RESTORE_HARD
        bl      unknown_exception
-       b       ret_from_except
+       b       interrupt_return
 
 /* Guest Doorbell critical Interrupt */
        START_EXCEPTION(guest_doorbell_crit);
        CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
                              PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON_CRIT(0x2e0)
-       bl      save_nvgprs
        bl      special_reg_save
        CHECK_NAPPING();
        addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      unknown_exception
+       bl      unknown_nmi_exception
        b       ret_from_crit_except
 
 /* Hypervisor call */
@@ -926,10 +858,8 @@ kernel_dbg_exc:
                                PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON(0x310)
        addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      save_nvgprs
-       INTS_RESTORE_HARD
        bl      unknown_exception
-       b       ret_from_except
+       b       interrupt_return
 
 /* Embedded Hypervisor priviledged  */
        START_EXCEPTION(ehpriv);
@@ -937,10 +867,8 @@ kernel_dbg_exc:
                                PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON(0x320)
        addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      save_nvgprs
-       INTS_RESTORE_HARD
        bl      unknown_exception
-       b       ret_from_except
+       b       interrupt_return
 
 /* LRAT Error interrupt */
        START_EXCEPTION(lrat_error);
@@ -948,10 +876,8 @@ kernel_dbg_exc:
                                PROLOG_ADDITION_NONE)
        EXCEPTION_COMMON(0x340)
        addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      save_nvgprs
-       INTS_RESTORE_HARD
        bl      unknown_exception
-       b       ret_from_except
+       b       interrupt_return
 
 /*
  * An interrupt came in while soft-disabled; We mark paca->irq_happened
@@ -1011,14 +937,7 @@ storage_fault_common:
        ld      r14,PACA_EXGEN+EX_R14(r13)
        ld      r15,PACA_EXGEN+EX_R15(r13)
        bl      do_page_fault
-       cmpdi   r3,0
-       bne-    1f
-       b       ret_from_except_lite
-1:     bl      save_nvgprs
-       mr      r4,r3
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      __bad_page_fault
-       b       ret_from_except
+       b       interrupt_return
 
 /*
  * Alignment exception doesn't fit entirely in the 0x100 bytes so it
@@ -1030,291 +949,9 @@ alignment_more:
        addi    r3,r1,STACK_FRAME_OVERHEAD
        ld      r14,PACA_EXGEN+EX_R14(r13)
        ld      r15,PACA_EXGEN+EX_R15(r13)
-       bl      save_nvgprs
-       INTS_RESTORE_HARD
        bl      alignment_exception
-       b       ret_from_except
-
-       .align  7
-_GLOBAL(ret_from_except)
-       ld      r11,_TRAP(r1)
-       andi.   r0,r11,1
-       bne     ret_from_except_lite
        REST_NVGPRS(r1)
-
-_GLOBAL(ret_from_except_lite)
-       /*
-        * Disable interrupts so that current_thread_info()->flags
-        * can't change between when we test it and when we return
-        * from the interrupt.
-        */
-       wrteei  0
-
-       ld      r9, PACA_THREAD_INFO(r13)
-       ld      r3,_MSR(r1)
-       ld      r10,PACACURRENT(r13)
-       ld      r4,TI_FLAGS(r9)
-       andi.   r3,r3,MSR_PR
-       beq     resume_kernel
-       lwz     r3,(THREAD+THREAD_DBCR0)(r10)
-
-       /* Check current_thread_info()->flags */
-       andi.   r0,r4,_TIF_USER_WORK_MASK
-       bne     1f
-       /*
-        * Check to see if the dbcr0 register is set up to debug.
-        * Use the internal debug mode bit to do this.
-        */
-       andis.  r0,r3,DBCR0_IDM@h
-       beq     restore
-       mfmsr   r0
-       rlwinm  r0,r0,0,~MSR_DE /* Clear MSR.DE */
-       mtmsr   r0
-       mtspr   SPRN_DBCR0,r3
-       li      r10, -1
-       mtspr   SPRN_DBSR,r10
-       b       restore
-1:     andi.   r0,r4,_TIF_NEED_RESCHED
-       beq     2f
-       bl      restore_interrupts
-       SCHEDULE_USER
-       b       ret_from_except_lite
-2:
-       bl      save_nvgprs
-       /*
-        * Use a non volatile GPR to save and restore our thread_info flags
-        * across the call to restore_interrupts.
-        */
-       mr      r30,r4
-       bl      restore_interrupts
-       mr      r4,r30
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      do_notify_resume
-       b       ret_from_except
-
-resume_kernel:
-       /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
-       andis.  r8,r4,_TIF_EMULATE_STACK_STORE@h
-       beq+    1f
-
-       addi    r8,r1,INT_FRAME_SIZE    /* Get the kprobed function entry */
-
-       ld      r3,GPR1(r1)
-       subi    r3,r3,INT_FRAME_SIZE    /* dst: Allocate a trampoline exception frame */
-       mr      r4,r1                   /* src:  current exception frame */
-       mr      r1,r3                   /* Reroute the trampoline frame to r1 */
-
-       /* Copy from the original to the trampoline. */
-       li      r5,INT_FRAME_SIZE/8     /* size: INT_FRAME_SIZE */
-       li      r6,0                    /* start offset: 0 */
-       mtctr   r5
-2:     ldx     r0,r6,r4
-       stdx    r0,r6,r3
-       addi    r6,r6,8
-       bdnz    2b
-
-       /* Do real store operation to complete stdu */
-       ld      r5,GPR1(r1)
-       std     r8,0(r5)
-
-       /* Clear _TIF_EMULATE_STACK_STORE flag */
-       lis     r11,_TIF_EMULATE_STACK_STORE@h
-       addi    r5,r9,TI_FLAGS
-0:     ldarx   r4,0,r5
-       andc    r4,r4,r11
-       stdcx.  r4,0,r5
-       bne-    0b
-1:
-
-#ifdef CONFIG_PREEMPT
-       /* Check if we need to preempt */
-       andi.   r0,r4,_TIF_NEED_RESCHED
-       beq+    restore
-       /* Check that preempt_count() == 0 and interrupts are enabled */
-       lwz     r8,TI_PREEMPT(r9)
-       cmpwi   cr0,r8,0
-       bne     restore
-       ld      r0,SOFTE(r1)
-       andi.   r0,r0,IRQS_DISABLED
-       bne     restore
-
-       /*
-        * Here we are preempting the current task. We want to make
-        * sure we are soft-disabled first and reconcile irq state.
-        */
-       RECONCILE_IRQ_STATE(r3,r4)
-       bl      preempt_schedule_irq
-
-       /*
-        * arch_local_irq_restore() from preempt_schedule_irq above may
-        * enable hard interrupt but we really should disable interrupts
-        * when we return from the interrupt, and so that we don't get
-        * interrupted after loading SRR0/1.
-        */
-       wrteei  0
-#endif /* CONFIG_PREEMPT */
-
-restore:
-       /*
-        * This is the main kernel exit path. First we check if we
-        * are about to re-enable interrupts
-        */
-       ld      r5,SOFTE(r1)
-       lbz     r6,PACAIRQSOFTMASK(r13)
-       andi.   r5,r5,IRQS_DISABLED
-       bne     .Lrestore_irq_off
-
-       /* We are enabling, were we already enabled ? Yes, just return */
-       andi.   r6,r6,IRQS_DISABLED
-       beq     cr0,fast_exception_return
-
-       /*
-        * We are about to soft-enable interrupts (we are hard disabled
-        * at this point). We check if there's anything that needs to
-        * be replayed first.
-        */
-       lbz     r0,PACAIRQHAPPENED(r13)
-       cmpwi   cr0,r0,0
-       bne-    .Lrestore_check_irq_replay
-
-       /*
-        * Get here when nothing happened while soft-disabled, just
-        * soft-enable and move-on. We will hard-enable as a side
-        * effect of rfi
-        */
-.Lrestore_no_replay:
-       TRACE_ENABLE_INTS
-       li      r0,IRQS_ENABLED
-       stb     r0,PACAIRQSOFTMASK(r13);
-
-/* This is the return from load_up_fpu fast path which could do with
- * less GPR restores in fact, but for now we have a single return path
- */
-fast_exception_return:
-       wrteei  0
-1:     mr      r0,r13
-       ld      r10,_MSR(r1)
-       REST_4GPRS(2, r1)
-       andi.   r6,r10,MSR_PR
-       REST_2GPRS(6, r1)
-       beq     1f
-       ACCOUNT_CPU_USER_EXIT(r13, r10, r11)
-       ld      r0,GPR13(r1)
-
-1:     stdcx.  r0,0,r1         /* to clear the reservation */
-
-       ld      r8,_CCR(r1)
-       ld      r9,_LINK(r1)
-       ld      r10,_CTR(r1)
-       ld      r11,_XER(r1)
-       mtcr    r8
-       mtlr    r9
-       mtctr   r10
-       mtxer   r11
-       REST_2GPRS(8, r1)
-       ld      r10,GPR10(r1)
-       ld      r11,GPR11(r1)
-       ld      r12,GPR12(r1)
-       mtspr   SPRN_SPRG_GEN_SCRATCH,r0
-
-       std     r10,PACA_EXGEN+EX_R10(r13);
-       std     r11,PACA_EXGEN+EX_R11(r13);
-       ld      r10,_NIP(r1)
-       ld      r11,_MSR(r1)
-       ld      r0,GPR0(r1)
-       ld      r1,GPR1(r1)
-       mtspr   SPRN_SRR0,r10
-       mtspr   SPRN_SRR1,r11
-       ld      r10,PACA_EXGEN+EX_R10(r13)
-       ld      r11,PACA_EXGEN+EX_R11(r13)
-       mfspr   r13,SPRN_SPRG_GEN_SCRATCH
-       rfi
-
-       /*
-        * We are returning to a context with interrupts soft disabled.
-        *
-        * However, we may also about to hard enable, so we need to
-        * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
-        * or that bit can get out of sync and bad things will happen
-        */
-.Lrestore_irq_off:
-       ld      r3,_MSR(r1)
-       lbz     r7,PACAIRQHAPPENED(r13)
-       andi.   r0,r3,MSR_EE
-       beq     1f
-       rlwinm  r7,r7,0,~PACA_IRQ_HARD_DIS
-       stb     r7,PACAIRQHAPPENED(r13)
-1:
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
-       /* The interrupt should not have soft enabled. */
-       lbz     r7,PACAIRQSOFTMASK(r13)
-1:     tdeqi   r7,IRQS_ENABLED
-       EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-       b       fast_exception_return
-
-       /*
-        * Something did happen, check if a re-emit is needed
-        * (this also clears paca->irq_happened)
-        */
-.Lrestore_check_irq_replay:
-       /* XXX: We could implement a fast path here where we check
-        * for irq_happened being just 0x01, in which case we can
-        * clear it and return. That means that we would potentially
-        * miss a decrementer having wrapped all the way around.
-        *
-        * Still, this might be useful for things like hash_page
-        */
-       bl      __check_irq_replay
-       cmpwi   cr0,r3,0
-       beq     .Lrestore_no_replay
-
-       /*
-        * We need to re-emit an interrupt. We do so by re-using our
-        * existing exception frame. We first change the trap value,
-        * but we need to ensure we preserve the low nibble of it
-        */
-       ld      r4,_TRAP(r1)
-       clrldi  r4,r4,60
-       or      r4,r4,r3
-       std     r4,_TRAP(r1)
-
-       /*
-        * PACA_IRQ_HARD_DIS won't always be set here, so set it now
-        * to reconcile the IRQ state. Tracing is already accounted for.
-        */
-       lbz     r4,PACAIRQHAPPENED(r13)
-       ori     r4,r4,PACA_IRQ_HARD_DIS
-       stb     r4,PACAIRQHAPPENED(r13)
-
-       /*
-        * Then find the right handler and call it. Interrupts are
-        * still soft-disabled and we keep them that way.
-       */
-       cmpwi   cr0,r3,0x500
-       bne     1f
-       addi    r3,r1,STACK_FRAME_OVERHEAD;
-       bl      do_IRQ
-       b       ret_from_except
-1:     cmpwi   cr0,r3,0x900
-       bne     1f
-       addi    r3,r1,STACK_FRAME_OVERHEAD;
-       bl      timer_interrupt
-       b       ret_from_except
-#ifdef CONFIG_PPC_DOORBELL
-1:
-       cmpwi   cr0,r3,0x280
-       bne     1f
-       addi    r3,r1,STACK_FRAME_OVERHEAD;
-       bl      doorbell_exception
-#endif /* CONFIG_PPC_DOORBELL */
-1:     b       ret_from_except /* What else to do here ? */
-
-_ASM_NOKPROBE_SYMBOL(ret_from_except);
-_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
-_ASM_NOKPROBE_SYMBOL(resume_kernel);
-_ASM_NOKPROBE_SYMBOL(restore);
-_ASM_NOKPROBE_SYMBOL(fast_exception_return);
+       b       interrupt_return
 
 /*
  * Trampolines used when spotting a bad kernel stack pointer in
index 8082b69..fa8e52a 100644 (file)
@@ -692,25 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
        ld      r1,GPR1(r1)
 .endm
 
-/*
- * When the idle code in power4_idle puts the CPU into NAP mode,
- * it has to do so in a loop, and relies on the external interrupt
- * and decrementer interrupt entry code to get it out of the loop.
- * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
- * to signal that it is in the loop and needs help to get out.
- */
-#ifdef CONFIG_PPC_970_NAP
-#define FINISH_NAP                             \
-BEGIN_FTR_SECTION                              \
-       ld      r11, PACA_THREAD_INFO(r13);     \
-       ld      r9,TI_LOCAL_FLAGS(r11);         \
-       andi.   r10,r9,_TLF_NAPPING;            \
-       bnel    power4_fixup_nap;               \
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-#else
-#define FINISH_NAP
-#endif
-
 /*
  * There are a few constraints to be concerned with.
  * - Real mode exceptions code/data must be located at their physical location.
@@ -1248,7 +1229,6 @@ EXC_COMMON_BEGIN(machine_check_common)
         */
        GEN_COMMON machine_check
 
-       FINISH_NAP
        /* Enable MSR_RI when finished with PACA_EXMC */
        li      r10,MSR_RI
        mtmsrd  r10,1
@@ -1571,7 +1551,6 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
 EXC_COMMON_BEGIN(hardware_interrupt_common)
        GEN_COMMON hardware_interrupt
-       FINISH_NAP
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_IRQ
        b       interrupt_return
@@ -1801,7 +1780,6 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
 EXC_VIRT_END(decrementer, 0x4900, 0x80)
 EXC_COMMON_BEGIN(decrementer_common)
        GEN_COMMON decrementer
-       FINISH_NAP
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      timer_interrupt
        b       interrupt_return
@@ -1886,7 +1864,6 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
 EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
 EXC_COMMON_BEGIN(doorbell_super_common)
        GEN_COMMON doorbell_super
-       FINISH_NAP
        addi    r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_DOORBELL
        bl      doorbell_exception
@@ -2237,7 +2214,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
 
 EXC_COMMON_BEGIN(hmi_exception_common)
        GEN_COMMON hmi_exception
-       FINISH_NAP
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      handle_hmi_exception
        b       interrupt_return
@@ -2266,7 +2242,6 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
 EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
 EXC_COMMON_BEGIN(h_doorbell_common)
        GEN_COMMON h_doorbell
-       FINISH_NAP
        addi    r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_DOORBELL
        bl      doorbell_exception
@@ -2299,7 +2274,6 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
 EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
 EXC_COMMON_BEGIN(h_virt_irq_common)
        GEN_COMMON h_virt_irq
-       FINISH_NAP
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_IRQ
        b       interrupt_return
@@ -2345,7 +2319,6 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
 EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
 EXC_COMMON_BEGIN(performance_monitor_common)
        GEN_COMMON performance_monitor
-       FINISH_NAP
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      performance_monitor_exception
        b       interrupt_return
@@ -2530,8 +2503,6 @@ EXC_VIRT_NONE(0x5100, 0x100)
 INT_DEFINE_BEGIN(cbe_system_error)
        IVEC=0x1200
        IHSRR=1
-       IKVM_SKIP=1
-       IKVM_REAL=1
 INT_DEFINE_END(cbe_system_error)
 
 EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
@@ -2551,11 +2522,16 @@ EXC_REAL_NONE(0x1200, 0x100)
 EXC_VIRT_NONE(0x5200, 0x100)
 #endif
 
-
+/**
+ * Interrupt 0x1300 - Instruction Address Breakpoint Interrupt.
+ * This has been removed from the ISA before 2.01, which is the earliest
+ * 64-bit BookS ISA supported, however the G5 / 970 implements this
+ * interrupt with a non-architected feature available through the support
+ * processor interface.
+ */
 INT_DEFINE_BEGIN(instruction_breakpoint)
        IVEC=0x1300
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-       IKVM_SKIP=1
        IKVM_REAL=1
 #endif
 INT_DEFINE_END(instruction_breakpoint)
@@ -2701,8 +2677,6 @@ EXC_COMMON_BEGIN(denorm_exception_common)
 INT_DEFINE_BEGIN(cbe_maintenance)
        IVEC=0x1600
        IHSRR=1
-       IKVM_SKIP=1
-       IKVM_REAL=1
 INT_DEFINE_END(cbe_maintenance)
 
 EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
@@ -2754,8 +2728,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
 INT_DEFINE_BEGIN(cbe_thermal)
        IVEC=0x1800
        IHSRR=1
-       IKVM_SKIP=1
-       IKVM_REAL=1
 INT_DEFINE_END(cbe_thermal)
 
 EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
@@ -3096,24 +3068,6 @@ USE_FIXED_SECTION(virt_trampolines)
 __end_interrupts:
 DEFINE_FIXED_SYMBOL(__end_interrupts)
 
-#ifdef CONFIG_PPC_970_NAP
-       /*
-        * Called by exception entry code if _TLF_NAPPING was set, this clears
-        * the NAPPING flag, and redirects the exception exit to
-        * power4_fixup_nap_return.
-        */
-       .globl power4_fixup_nap
-EXC_COMMON_BEGIN(power4_fixup_nap)
-       andc    r9,r9,r10
-       std     r9,TI_LOCAL_FLAGS(r11)
-       LOAD_REG_ADDR(r10, power4_idle_nap_return)
-       std     r10,_NIP(r1)
-       blr
-
-power4_idle_nap_return:
-       blr
-#endif
-
 CLOSE_FIXED_SECTION(real_vectors);
 CLOSE_FIXED_SECTION(real_trampolines);
 CLOSE_FIXED_SECTION(virt_vectors);
index 8482739..b990075 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/fadump.h>
 #include <asm/fadump-internal.h>
 #include <asm/setup.h>
+#include <asm/interrupt.h>
 
 /*
  * The CPU who acquired the lock to trigger the fadump crash should
@@ -44,22 +45,21 @@ static struct fw_dump fw_dump;
 
 static void __init fadump_reserve_crash_area(u64 base);
 
-struct kobject *fadump_kobj;
-
 #ifndef CONFIG_PRESERVE_FA_DUMP
 
+static struct kobject *fadump_kobj;
+
 static atomic_t cpus_in_fadump;
 static DEFINE_MUTEX(fadump_mutex);
 
-struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false };
+static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false };
 
 #define RESERVED_RNGS_SZ       16384 /* 16K - 128 entries */
 #define RESERVED_RNGS_CNT      (RESERVED_RNGS_SZ / \
                                 sizeof(struct fadump_memory_range))
 static struct fadump_memory_range rngs[RESERVED_RNGS_CNT];
-struct fadump_mrange_info reserved_mrange_info = { "reserved", rngs,
-                                                  RESERVED_RNGS_SZ, 0,
-                                                  RESERVED_RNGS_CNT, true };
+static struct fadump_mrange_info
+reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true };
 
 static void __init early_init_dt_scan_reserved_ranges(unsigned long node);
 
@@ -79,7 +79,7 @@ static struct cma *fadump_cma;
  * But for some reason even if it fails we still have the memory reservation
  * with us and we can still continue doing fadump.
  */
-int __init fadump_cma_init(void)
+static int __init fadump_cma_init(void)
 {
        unsigned long long base, size;
        int rc;
@@ -292,7 +292,7 @@ static void fadump_show_config(void)
  * that is required for a kernel to boot successfully.
  *
  */
-static inline u64 fadump_calculate_reserve_size(void)
+static __init u64 fadump_calculate_reserve_size(void)
 {
        u64 base, size, bootmem_min;
        int ret;
@@ -728,7 +728,7 @@ void crash_fadump(struct pt_regs *regs, const char *str)
         * If we came in via system reset, wait a while for the secondary
         * CPUs to enter.
         */
-       if (TRAP(&(fdh->regs)) == 0x100) {
+       if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) {
                msecs = CRASH_TIMEOUT;
                while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0))
                        mdelay(1);
index 3ff9a8f..2c57ece 100644 (file)
@@ -92,9 +92,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
        /* enable use of FP after return */
 #ifdef CONFIG_PPC32
        mfspr   r5,SPRN_SPRG_THREAD     /* current task's THREAD (phys) */
-#ifdef CONFIG_VMAP_STACK
        tovirt(r5, r5)
-#endif
        lwz     r4,THREAD_FPEXC_MODE(r5)
        ori     r9,r9,MSR_FP            /* enable FP for current */
        or      r9,r9,r4
index 5d4706c..a8221dd 100644 (file)
  * We assume sprg3 has the physical address of the current
  * task's thread_struct.
  */
-.macro EXCEPTION_PROLOG handle_dar_dsisr=0
+.macro EXCEPTION_PROLOG                trapno name handle_dar_dsisr=0
        EXCEPTION_PROLOG_0      handle_dar_dsisr=\handle_dar_dsisr
        EXCEPTION_PROLOG_1
-       EXCEPTION_PROLOG_2      handle_dar_dsisr=\handle_dar_dsisr
+       EXCEPTION_PROLOG_2      \trapno \name handle_dar_dsisr=\handle_dar_dsisr
 .endm
 
 .macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0
        mtspr   SPRN_SPRG_SCRATCH0,r10
        mtspr   SPRN_SPRG_SCRATCH1,r11
-#ifdef CONFIG_VMAP_STACK
        mfspr   r10, SPRN_SPRG_THREAD
        .if     \handle_dar_dsisr
+#ifdef CONFIG_40x
+       mfspr   r11, SPRN_DEAR
+#else
        mfspr   r11, SPRN_DAR
+#endif
        stw     r11, DAR(r10)
+#ifdef CONFIG_40x
+       mfspr   r11, SPRN_ESR
+#else
        mfspr   r11, SPRN_DSISR
+#endif
        stw     r11, DSISR(r10)
        .endif
        mfspr   r11, SPRN_SRR0
        stw     r11, SRR0(r10)
-#endif
        mfspr   r11, SPRN_SRR1          /* check whether user or kernel */
-#ifdef CONFIG_VMAP_STACK
        stw     r11, SRR1(r10)
-#endif
        mfcr    r10
        andi.   r11, r11, MSR_PR
 .endm
 
-.macro EXCEPTION_PROLOG_1 for_rtas=0
-#ifdef CONFIG_VMAP_STACK
+.macro EXCEPTION_PROLOG_1
        mtspr   SPRN_SPRG_SCRATCH2,r1
        subi    r1, r1, INT_FRAME_SIZE          /* use r1 if kernel */
        beq     1f
        lwz     r1,TASK_STACK-THREAD(r1)
        addi    r1, r1, THREAD_SIZE - INT_FRAME_SIZE
 1:
+#ifdef CONFIG_VMAP_STACK
        mtcrf   0x3f, r1
-       bt      32 - THREAD_ALIGN_SHIFT, stack_overflow
-#else
-       subi    r11, r1, INT_FRAME_SIZE         /* use r1 if kernel */
-       beq     1f
-       mfspr   r11,SPRN_SPRG_THREAD
-       lwz     r11,TASK_STACK-THREAD(r11)
-       addi    r11, r11, THREAD_SIZE - INT_FRAME_SIZE
-1:     tophys(r11, r11)
+       bt      32 - THREAD_ALIGN_SHIFT, vmap_stack_overflow
 #endif
 .endm
 
-.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0
-#ifdef CONFIG_VMAP_STACK
-       li      r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
-       mtmsr   r11
-       isync
+.macro EXCEPTION_PROLOG_2 trapno name handle_dar_dsisr=0
+#ifdef CONFIG_PPC_8xx
+       .if     \handle_dar_dsisr
+       li      r11, RPN_PATTERN
+       mtspr   SPRN_DAR, r11   /* Tag DAR, to be used in DTLB Error */
+       .endif
+#endif
+       LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~MSR_RI) /* re-enable MMU */
+       mtspr   SPRN_SRR1, r11
+       lis     r11, 1f@h
+       ori     r11, r11, 1f@l
+       mtspr   SPRN_SRR0, r11
        mfspr   r11, SPRN_SPRG_SCRATCH2
+       rfi
+
+       .text
+\name\()_virt:
+1:
        stw     r11,GPR1(r1)
        stw     r11,0(r1)
        mr      r11, r1
-#else
-       stw     r1,GPR1(r11)
-       stw     r1,0(r11)
-       tovirt(r1, r11)         /* set new kernel sp */
-#endif
        stw     r10,_CCR(r11)           /* save registers */
        stw     r12,GPR12(r11)
        stw     r9,GPR9(r11)
@@ -82,7 +86,6 @@
        stw     r12,GPR11(r11)
        mflr    r10
        stw     r10,_LINK(r11)
-#ifdef CONFIG_VMAP_STACK
        mfspr   r12, SPRN_SPRG_THREAD
        tovirt(r12, r12)
        .if     \handle_dar_dsisr
        .endif
        lwz     r9, SRR1(r12)
        lwz     r12, SRR0(r12)
-#else
-       mfspr   r12,SPRN_SRR0
-       mfspr   r9,SPRN_SRR1
-#endif
 #ifdef CONFIG_40x
        rlwinm  r9,r9,0,14,12           /* clear MSR_WE (necessary?) */
+#elif defined(CONFIG_PPC_8xx)
+       mtspr   SPRN_EID, r2            /* Set MSR_RI */
 #else
-#ifdef CONFIG_VMAP_STACK
-       li      r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */
-#else
-       li      r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */
-#endif
+       li      r10, MSR_KERNEL         /* can take exceptions */
        mtmsr   r10                     /* (except for mach check in rtas) */
 #endif
-       stw     r0,GPR0(r11)
+       COMMON_EXCEPTION_PROLOG_END \trapno
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
+.endm
+
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+       stw     r0,GPR0(r1)
        lis     r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
        addi    r10,r10,STACK_FRAME_REGS_MARKER@l
-       stw     r10,8(r11)
-       SAVE_4GPRS(3, r11)
-       SAVE_2GPRS(7, r11)
+       stw     r10,8(r1)
+       li      r10, \trapno
+       stw     r10,_TRAP(r1)
+       SAVE_4GPRS(3, r1)
+       SAVE_2GPRS(7, r1)
+       SAVE_NVGPRS(r1)
+       stw     r2,GPR2(r1)
+       stw     r12,_NIP(r1)
+       stw     r9,_MSR(r1)
+       mfctr   r10
+       mfspr   r2,SPRN_SPRG_THREAD
+       stw     r10,_CTR(r1)
+       tovirt(r2, r2)
+       mfspr   r10,SPRN_XER
+       addi    r2, r2, -THREAD
+       stw     r10,_XER(r1)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+.endm
+
+.macro prepare_transfer_to_handler
+#ifdef CONFIG_PPC_BOOK3S_32
+       andi.   r12,r9,MSR_PR
+       bne     777f
+       bl      prepare_transfer_to_handler
+777:
+#endif
 .endm
 
 .macro SYSCALL_ENTRY trapno
        b       transfer_to_syscall             /* jump to handler */
 .endm
 
-.macro save_dar_dsisr_on_stack reg1, reg2, sp
-#ifndef CONFIG_VMAP_STACK
-       mfspr   \reg1, SPRN_DAR
-       mfspr   \reg2, SPRN_DSISR
-       stw     \reg1, _DAR(\sp)
-       stw     \reg2, _DSISR(\sp)
-#endif
-.endm
-
-.macro get_and_save_dar_dsisr_on_stack reg1, reg2, sp
-#ifdef CONFIG_VMAP_STACK
-       lwz     \reg1, _DAR(\sp)
-       lwz     \reg2, _DSISR(\sp)
-#else
-       save_dar_dsisr_on_stack \reg1, \reg2, \sp
-#endif
-.endm
-
-.macro tovirt_vmstack dst, src
-#ifdef CONFIG_VMAP_STACK
-       tovirt(\dst, \src)
-#else
-       .ifnc   \dst, \src
-       mr      \dst, \src
-       .endif
-#endif
-.endm
-
-.macro tovirt_novmstack dst, src
-#ifndef CONFIG_VMAP_STACK
-       tovirt(\dst, \src)
-#else
-       .ifnc   \dst, \src
-       mr      \dst, \src
-       .endif
-#endif
-.endm
-
-.macro tophys_novmstack dst, src
-#ifndef CONFIG_VMAP_STACK
-       tophys(\dst, \src)
-#else
-       .ifnc   \dst, \src
-       mr      \dst, \src
-       .endif
-#endif
-.endm
-
 /*
  * Note: code which follows this uses cr0.eq (set if from kernel),
  * r11, r12 (SRR0), and r9 (SRR1).
  */
 #ifdef CONFIG_PPC_BOOK3S
 #define        START_EXCEPTION(n, label)               \
+       __HEAD;                                 \
        . = n;                                  \
        DO_KVM n;                               \
 label:
 
 #else
 #define        START_EXCEPTION(n, label)               \
+       __HEAD;                                 \
        . = n;                                  \
 label:
 
 #endif
 
-#define EXCEPTION(n, label, hdlr, xfer)                \
+#define EXCEPTION(n, label, hdlr)              \
        START_EXCEPTION(n, label)               \
-       EXCEPTION_PROLOG;                       \
-       addi    r3,r1,STACK_FRAME_OVERHEAD;     \
-       xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret)          \
-       li      r10,trap;                                       \
-       stw     r10,_TRAP(r11);                                 \
-       LOAD_REG_IMMEDIATE(r10, msr);                           \
-       bl      tfer;                                           \
-       .long   hdlr;                                           \
-       .long   ret
-
-#define EXC_XFER_STD(n, hdlr)          \
-       EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full,        \
-                         ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr)         \
-       EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
-                         ret_from_except)
+       EXCEPTION_PROLOG n label;               \
+       prepare_transfer_to_handler;            \
+       bl      hdlr;                           \
+       b       interrupt_return
 
 .macro vmap_stack_overflow_exception
-#ifdef CONFIG_VMAP_STACK
+       __HEAD
+vmap_stack_overflow:
 #ifdef CONFIG_SMP
        mfspr   r1, SPRN_SPRG_THREAD
        lwz     r1, TASK_CPU - THREAD(r1)
@@ -261,16 +226,11 @@ label:
        lis     r1, emergency_ctx@ha
 #endif
        lwz     r1, emergency_ctx@l(r1)
-       cmpwi   cr1, r1, 0
-       bne     cr1, 1f
-       lis     r1, init_thread_union@ha
-       addi    r1, r1, init_thread_union@l
-1:     addi    r1, r1, THREAD_SIZE - INT_FRAME_SIZE
-       EXCEPTION_PROLOG_2
-       SAVE_NVGPRS(r11)
-       addi    r3, r1, STACK_FRAME_OVERHEAD
-       EXC_XFER_STD(0, stack_overflow_exception)
-#endif
+       addi    r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+       EXCEPTION_PROLOG_2 0 vmap_stack_overflow
+       prepare_transfer_to_handler
+       bl      stack_overflow_exception
+       b       interrupt_return
 .endm
 
 #endif /* __HEAD_32_H__ */
index 24724a7..e1360b8 100644 (file)
@@ -89,7 +89,11 @@ _ENTRY(crit_srr0)
        .space  4
 _ENTRY(crit_srr1)
        .space  4
-_ENTRY(saved_ksp_limit)
+_ENTRY(crit_r1)
+       .space  4
+_ENTRY(crit_dear)
+       .space  4
+_ENTRY(crit_esr)
        .space  4
 
 /*
@@ -100,42 +104,62 @@ _ENTRY(saved_ksp_limit)
  * Instead we use a couple of words of memory at low physical addresses.
  * This is OK since we don't support SMP on these processors.
  */
-#define CRITICAL_EXCEPTION_PROLOG                                           \
-       stw     r10,crit_r10@l(0);      /* save two registers to work with */\
-       stw     r11,crit_r11@l(0);                                           \
-       mfcr    r10;                    /* save CR in r10 for now          */\
-       mfspr   r11,SPRN_SRR3;          /* check whether user or kernel    */\
-       andi.   r11,r11,MSR_PR;                                              \
-       lis     r11,critirq_ctx@ha;                                          \
-       tophys(r11,r11);                                                     \
-       lwz     r11,critirq_ctx@l(r11);                                      \
-       beq     1f;                                                          \
-       /* COMING FROM USER MODE */                                          \
-       mfspr   r11,SPRN_SPRG_THREAD;   /* if from user, start at top of   */\
-       lwz     r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
-1:     addi    r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm  */\
-       tophys(r11,r11);                                                     \
-       stw     r10,_CCR(r11);          /* save various registers          */\
-       stw     r12,GPR12(r11);                                              \
-       stw     r9,GPR9(r11);                                                \
-       mflr    r10;                                                         \
-       stw     r10,_LINK(r11);                                              \
-       mfspr   r12,SPRN_DEAR;          /* save DEAR and ESR in the frame  */\
-       stw     r12,_DEAR(r11);         /* since they may have had stuff   */\
-       mfspr   r9,SPRN_ESR;            /* in them at the point where the  */\
-       stw     r9,_ESR(r11);           /* exception was taken             */\
-       mfspr   r12,SPRN_SRR2;                                               \
-       stw     r1,GPR1(r11);                                                \
-       mfspr   r9,SPRN_SRR3;                                                \
-       stw     r1,0(r11);                                                   \
-       tovirt(r1,r11);                                                      \
-       rlwinm  r9,r9,0,14,12;          /* clear MSR_WE (necessary?)       */\
-       stw     r0,GPR0(r11);                                                \
-       lis     r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
-       addi    r10, r10, STACK_FRAME_REGS_MARKER@l;                         \
-       stw     r10, 8(r11);                                                 \
-       SAVE_4GPRS(3, r11);                                                  \
-       SAVE_2GPRS(7, r11)
+.macro CRITICAL_EXCEPTION_PROLOG trapno name
+       stw     r10,crit_r10@l(0)       /* save two registers to work with */
+       stw     r11,crit_r11@l(0)
+       mfspr   r10,SPRN_SRR0
+       mfspr   r11,SPRN_SRR1
+       stw     r10,crit_srr0@l(0)
+       stw     r11,crit_srr1@l(0)
+       mfspr   r10,SPRN_DEAR
+       mfspr   r11,SPRN_ESR
+       stw     r10,crit_dear@l(0)
+       stw     r11,crit_esr@l(0)
+       mfcr    r10                     /* save CR in r10 for now          */
+       mfspr   r11,SPRN_SRR3           /* check whether user or kernel    */
+       andi.   r11,r11,MSR_PR
+       lis     r11,(critirq_ctx-PAGE_OFFSET)@ha
+       lwz     r11,(critirq_ctx-PAGE_OFFSET)@l(r11)
+       beq     1f
+       /* COMING FROM USER MODE */
+       mfspr   r11,SPRN_SPRG_THREAD    /* if from user, start at top of   */
+       lwz     r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */
+1:     stw     r1,crit_r1@l(0)
+       addi    r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm  */
+       LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)) /* re-enable MMU */
+       mtspr   SPRN_SRR1, r11
+       lis     r11, 1f@h
+       ori     r11, r11, 1f@l
+       mtspr   SPRN_SRR0, r11
+       rfi
+
+       .text
+1:
+\name\()_virt:
+       lwz     r11,crit_r1@l(0)
+       stw     r11,GPR1(r1)
+       stw     r11,0(r1)
+       mr      r11,r1
+       stw     r10,_CCR(r11)           /* save various registers          */
+       stw     r12,GPR12(r11)
+       stw     r9,GPR9(r11)
+       mflr    r10
+       stw     r10,_LINK(r11)
+       lis     r9,PAGE_OFFSET@ha
+       lwz     r10,crit_r10@l(r9)
+       lwz     r12,crit_r11@l(r9)
+       stw     r10,GPR10(r11)
+       stw     r12,GPR11(r11)
+       lwz     r12,crit_dear@l(r9)
+       lwz     r9,crit_esr@l(r9)
+       stw     r12,_DEAR(r11)          /* since they may have had stuff   */
+       stw     r9,_ESR(r11)            /* exception was taken             */
+       mfspr   r12,SPRN_SRR2
+       mfspr   r9,SPRN_SRR3
+       rlwinm  r9,r9,0,14,12           /* clear MSR_WE (necessary?)       */
+       COMMON_EXCEPTION_PROLOG_END \trapno + 2
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
+.endm
 
        /*
         * State at this point:
@@ -155,10 +179,10 @@ _ENTRY(saved_ksp_limit)
  */
 #define CRITICAL_EXCEPTION(n, label, hdlr)                     \
        START_EXCEPTION(n, label);                              \
-       CRITICAL_EXCEPTION_PROLOG;                              \
-       addi    r3,r1,STACK_FRAME_OVERHEAD;                     \
-       EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
-                         crit_transfer_to_handler, ret_from_crit_exc)
+       CRITICAL_EXCEPTION_PROLOG n label;                              \
+       prepare_transfer_to_handler;                            \
+       bl      hdlr;                                           \
+       b       ret_from_crit_exc
 
 /*
  * 0x0100 - Critical Interrupt Exception
@@ -178,69 +202,67 @@ _ENTRY(saved_ksp_limit)
  * if they can't resolve the lightweight TLB fault.
  */
        START_EXCEPTION(0x0300, DataStorage)
-       EXCEPTION_PROLOG
-       mfspr   r5, SPRN_ESR            /* Grab the ESR, save it */
-       stw     r5, _ESR(r11)
-       mfspr   r4, SPRN_DEAR           /* Grab the DEAR, save it */
-       stw     r4, _DEAR(r11)
-       EXC_XFER_LITE(0x300, handle_page_fault)
+       EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1
+       prepare_transfer_to_handler
+       bl      do_page_fault
+       b       interrupt_return
 
 /*
  * 0x0400 - Instruction Storage Exception
  * This is caused by a fetch from non-execute or guarded pages.
  */
        START_EXCEPTION(0x0400, InstructionAccess)
-       EXCEPTION_PROLOG
+       EXCEPTION_PROLOG 0x400 InstructionAccess
        li      r5,0
        stw     r5, _ESR(r11)           /* Zero ESR */
        stw     r12, _DEAR(r11)         /* SRR0 as DEAR */
-       EXC_XFER_LITE(0x400, handle_page_fault)
+       prepare_transfer_to_handler
+       bl      do_page_fault
+       b       interrupt_return
 
 /* 0x0500 - External Interrupt Exception */
-       EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+       EXCEPTION(0x0500, HardwareInterrupt, do_IRQ)
 
 /* 0x0600 - Alignment Exception */
        START_EXCEPTION(0x0600, Alignment)
-       EXCEPTION_PROLOG
-       mfspr   r4,SPRN_DEAR            /* Grab the DEAR and save it */
-       stw     r4,_DEAR(r11)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       EXC_XFER_STD(0x600, alignment_exception)
+       EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1
+       prepare_transfer_to_handler
+       bl      alignment_exception
+       REST_NVGPRS(r1)
+       b       interrupt_return
 
 /* 0x0700 - Program Exception */
        START_EXCEPTION(0x0700, ProgramCheck)
-       EXCEPTION_PROLOG
-       mfspr   r4,SPRN_ESR             /* Grab the ESR and save it */
-       stw     r4,_ESR(r11)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       EXC_XFER_STD(0x700, program_check_exception)
+       EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1
+       prepare_transfer_to_handler
+       bl      program_check_exception
+       REST_NVGPRS(r1)
+       b       interrupt_return
 
-       EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0x0800, Trap_08, unknown_exception)
+       EXCEPTION(0x0900, Trap_09, unknown_exception)
+       EXCEPTION(0x0A00, Trap_0A, unknown_exception)
+       EXCEPTION(0x0B00, Trap_0B, unknown_exception)
 
 /* 0x0C00 - System Call Exception */
        START_EXCEPTION(0x0C00, SystemCall)
        SYSCALL_ENTRY   0xc00
 /*     Trap_0D is commented out to get more space for system call exception */
 
-/*     EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) */
-       EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD)
+/*     EXCEPTION(0x0D00, Trap_0D, unknown_exception) */
+       EXCEPTION(0x0E00, Trap_0E, unknown_exception)
+       EXCEPTION(0x0F00, Trap_0F, unknown_exception)
 
 /* 0x1000 - Programmable Interval Timer (PIT) Exception */
-       . = 0x1000
+       START_EXCEPTION(0x1000, DecrementerTrap)
        b Decrementer
 
-/* 0x1010 - Fixed Interval Timer (FIT) Exception
-*/
-       . = 0x1010
+/* 0x1010 - Fixed Interval Timer (FIT) Exception */
+       START_EXCEPTION(0x1010, FITExceptionTrap)
        b FITException
 
-/* 0x1020 - Watchdog Timer (WDT) Exception
-*/
-       . = 0x1020
+/* 0x1020 - Watchdog Timer (WDT) Exception */
+       START_EXCEPTION(0x1020, WDTExceptionTrap)
        b WDTException
 
 /* 0x1100 - Data TLB Miss Exception
@@ -249,13 +271,13 @@ _ENTRY(saved_ksp_limit)
  * load TLB entries from the page table if they exist.
  */
        START_EXCEPTION(0x1100, DTLBMiss)
-       mtspr   SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
-       mtspr   SPRN_SPRG_SCRATCH1, r11
+       mtspr   SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */
+       mtspr   SPRN_SPRG_SCRATCH6, r11
        mtspr   SPRN_SPRG_SCRATCH3, r12
        mtspr   SPRN_SPRG_SCRATCH4, r9
        mfcr    r12
        mfspr   r9, SPRN_PID
-       mtspr   SPRN_SPRG_SCRATCH5, r9
+       rlwimi  r12, r9, 0, 0xff
        mfspr   r10, SPRN_DEAR          /* Get faulting address */
 
        /* If we are faulting a kernel address, we have to use the
@@ -316,13 +338,12 @@ _ENTRY(saved_ksp_limit)
        /* The bailout.  Restore registers to pre-exception conditions
         * and call the heavyweights to help us out.
         */
-       mfspr   r9, SPRN_SPRG_SCRATCH5
-       mtspr   SPRN_PID, r9
-       mtcr    r12
+       mtspr   SPRN_PID, r12
+       mtcrf   0x80, r12
        mfspr   r9, SPRN_SPRG_SCRATCH4
        mfspr   r12, SPRN_SPRG_SCRATCH3
-       mfspr   r11, SPRN_SPRG_SCRATCH1
-       mfspr   r10, SPRN_SPRG_SCRATCH0
+       mfspr   r11, SPRN_SPRG_SCRATCH6
+       mfspr   r10, SPRN_SPRG_SCRATCH5
        b       DataStorage
 
 /* 0x1200 - Instruction TLB Miss Exception
@@ -330,13 +351,13 @@ _ENTRY(saved_ksp_limit)
  * registers and bailout to a different point.
  */
        START_EXCEPTION(0x1200, ITLBMiss)
-       mtspr   SPRN_SPRG_SCRATCH0, r10  /* Save some working registers */
-       mtspr   SPRN_SPRG_SCRATCH1, r11
+       mtspr   SPRN_SPRG_SCRATCH5, r10  /* Save some working registers */
+       mtspr   SPRN_SPRG_SCRATCH6, r11
        mtspr   SPRN_SPRG_SCRATCH3, r12
        mtspr   SPRN_SPRG_SCRATCH4, r9
        mfcr    r12
        mfspr   r9, SPRN_PID
-       mtspr   SPRN_SPRG_SCRATCH5, r9
+       rlwimi  r12, r9, 0, 0xff
        mfspr   r10, SPRN_SRR0          /* Get faulting address */
 
        /* If we are faulting a kernel address, we have to use the
@@ -397,28 +418,27 @@ _ENTRY(saved_ksp_limit)
        /* The bailout.  Restore registers to pre-exception conditions
         * and call the heavyweights to help us out.
         */
-       mfspr   r9, SPRN_SPRG_SCRATCH5
-       mtspr   SPRN_PID, r9
-       mtcr    r12
+       mtspr   SPRN_PID, r12
+       mtcrf   0x80, r12
        mfspr   r9, SPRN_SPRG_SCRATCH4
        mfspr   r12, SPRN_SPRG_SCRATCH3
-       mfspr   r11, SPRN_SPRG_SCRATCH1
-       mfspr   r10, SPRN_SPRG_SCRATCH0
+       mfspr   r11, SPRN_SPRG_SCRATCH6
+       mfspr   r10, SPRN_SPRG_SCRATCH5
        b       InstructionAccess
 
-       EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0x1300, Trap_13, unknown_exception)
+       EXCEPTION(0x1400, Trap_14, unknown_exception)
+       EXCEPTION(0x1500, Trap_15, unknown_exception)
+       EXCEPTION(0x1600, Trap_16, unknown_exception)
+       EXCEPTION(0x1700, Trap_17, unknown_exception)
+       EXCEPTION(0x1800, Trap_18, unknown_exception)
+       EXCEPTION(0x1900, Trap_19, unknown_exception)
+       EXCEPTION(0x1A00, Trap_1A, unknown_exception)
+       EXCEPTION(0x1B00, Trap_1B, unknown_exception)
+       EXCEPTION(0x1C00, Trap_1C, unknown_exception)
+       EXCEPTION(0x1D00, Trap_1D, unknown_exception)
+       EXCEPTION(0x1E00, Trap_1E, unknown_exception)
+       EXCEPTION(0x1F00, Trap_1F, unknown_exception)
 
 /* Check for a single step debug exception while in an exception
  * handler before state has been saved.  This is to catch the case
@@ -435,7 +455,7 @@ _ENTRY(saved_ksp_limit)
  */
        /* 0x2000 - Debug Exception */
        START_EXCEPTION(0x2000, DebugTrap)
-       CRITICAL_EXCEPTION_PROLOG
+       CRITICAL_EXCEPTION_PROLOG 0x2000 DebugTrap
 
        /*
         * If this is a single step or branch-taken exception in an
@@ -477,32 +497,35 @@ _ENTRY(saved_ksp_limit)
        /* continue normal handling for a critical exception... */
 2:     mfspr   r4,SPRN_DBSR
        stw     r4,_ESR(r11)            /* DebugException takes DBSR in _ESR */
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       EXC_XFER_TEMPLATE(DebugException, 0x2002, \
-               (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
-               crit_transfer_to_handler, ret_from_crit_exc)
+       prepare_transfer_to_handler
+       bl      DebugException
+       b       ret_from_crit_exc
 
        /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
+       __HEAD
 Decrementer:
-       EXCEPTION_PROLOG
+       EXCEPTION_PROLOG 0x1000 Decrementer
        lis     r0,TSR_PIS@h
        mtspr   SPRN_TSR,r0             /* Clear the PIT exception */
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       EXC_XFER_LITE(0x1000, timer_interrupt)
+       prepare_transfer_to_handler
+       bl      timer_interrupt
+       b       interrupt_return
 
        /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
+       __HEAD
 FITException:
-       EXCEPTION_PROLOG
-       addi    r3,r1,STACK_FRAME_OVERHEAD;
-       EXC_XFER_STD(0x1010, unknown_exception)
+       EXCEPTION_PROLOG 0x1010 FITException
+       prepare_transfer_to_handler
+       bl      unknown_exception
+       b       interrupt_return
 
        /* Watchdog Timer (WDT) Exception. (from 0x1020) */
+       __HEAD
 WDTException:
-       CRITICAL_EXCEPTION_PROLOG;
-       addi    r3,r1,STACK_FRAME_OVERHEAD;
-       EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
-                         (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
-                         crit_transfer_to_handler, ret_from_crit_exc)
+       CRITICAL_EXCEPTION_PROLOG 0x1020 WDTException
+       prepare_transfer_to_handler
+       bl      WatchdogException
+       b       ret_from_crit_exc
 
 /* Other PowerPC processors, namely those derived from the 6xx-series
  * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
@@ -510,6 +533,7 @@ WDTException:
  * reserved.
  */
 
+       __HEAD
        /* Damn, I came up one instruction too many to fit into the
         * exception space :-).  Both the instruction and data TLB
         * miss get to this point to load the TLB.
@@ -543,13 +567,12 @@ finish_tlb_load:
 
        /* Done...restore registers and get out of here.
        */
-       mfspr   r9, SPRN_SPRG_SCRATCH5
-       mtspr   SPRN_PID, r9
-       mtcr    r12
+       mtspr   SPRN_PID, r12
+       mtcrf   0x80, r12
        mfspr   r9, SPRN_SPRG_SCRATCH4
        mfspr   r12, SPRN_SPRG_SCRATCH3
-       mfspr   r11, SPRN_SPRG_SCRATCH1
-       mfspr   r10, SPRN_SPRG_SCRATCH0
+       mfspr   r11, SPRN_SPRG_SCRATCH6
+       mfspr   r10, SPRN_SPRG_SCRATCH5
        rfi                     /* Should sync shadow TLBs */
        b       .               /* prevent prefetch past rfi */
 
index 813fa30..5c106ac 100644 (file)
@@ -263,8 +263,7 @@ interrupt_base:
        INSTRUCTION_STORAGE_EXCEPTION
 
        /* External Input Interrupt */
-       EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \
-                 do_IRQ, EXC_XFER_LITE)
+       EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, do_IRQ)
 
        /* Alignment Interrupt */
        ALIGNMENT_EXCEPTION
@@ -277,7 +276,7 @@ interrupt_base:
        FP_UNAVAILABLE_EXCEPTION
 #else
        EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
-                 FloatingPointUnavailable, unknown_exception, EXC_XFER_STD)
+                 FloatingPointUnavailable, unknown_exception)
 #endif
        /* System Call Interrupt */
        START_EXCEPTION(SystemCall)
@@ -285,15 +284,14 @@ interrupt_base:
 
        /* Auxiliary Processor Unavailable Interrupt */
        EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
-                 AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD)
+                 AuxillaryProcessorUnavailable, unknown_exception)
 
        /* Decrementer Interrupt */
        DECREMENTER_EXCEPTION
 
        /* Fixed Internal Timer Interrupt */
        /* TODO: Add FIT support */
-       EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
-                 unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, unknown_exception)
 
        /* Watchdog Timer Interrupt */
        /* TODO: Add watchdog support */
index 46dff3f..7d445e4 100644 (file)
 #include <asm/ptrace.h>
 #include <asm/export.h>
 #include <asm/code-patching-asm.h>
+#include <asm/interrupt.h>
+
+/*
+ * Value for the bits that have fixed value in RPN entries.
+ * Also used for tagging DAR for DTLBerror.
+ */
+#define RPN_PATTERN    0x00f0
 
 #include "head_32.h"
 
 #endif
 .endm
 
-/*
- * Value for the bits that have fixed value in RPN entries.
- * Also used for tagging DAR for DTLBerror.
- */
-#define RPN_PATTERN    0x00f0
-
 #define PAGE_SHIFT_512K                19
 #define PAGE_SHIFT_8M          23
 
@@ -118,56 +119,54 @@ instruction_counter:
 #endif
 
 /* System reset */
-       EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD)
+       EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, system_reset_exception)
 
 /* Machine check */
-       . = 0x200
-MachineCheck:
-       EXCEPTION_PROLOG handle_dar_dsisr=1
-       save_dar_dsisr_on_stack r4, r5, r11
-       li      r6, RPN_PATTERN
-       mtspr   SPRN_DAR, r6    /* Tag DAR, to be used in DTLB Error */
-       addi r3,r1,STACK_FRAME_OVERHEAD
-       EXC_XFER_STD(0x200, machine_check_exception)
+       START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck)
+       EXCEPTION_PROLOG INTERRUPT_MACHINE_CHECK MachineCheck handle_dar_dsisr=1
+       prepare_transfer_to_handler
+       bl      machine_check_exception
+       b       interrupt_return
 
 /* External interrupt */
-       EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+       EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ)
 
 /* Alignment exception */
-       . = 0x600
-Alignment:
-       EXCEPTION_PROLOG handle_dar_dsisr=1
-       save_dar_dsisr_on_stack r4, r5, r11
-       li      r6, RPN_PATTERN
-       mtspr   SPRN_DAR, r6    /* Tag DAR, to be used in DTLB Error */
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       b       .Lalignment_exception_ool
+       START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment)
+       EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1
+       prepare_transfer_to_handler
+       bl      alignment_exception
+       REST_NVGPRS(r1)
+       b       interrupt_return
 
 /* Program check exception */
-       EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+       START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck)
+       EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck
+       prepare_transfer_to_handler
+       bl      program_check_exception
+       REST_NVGPRS(r1)
+       b       interrupt_return
 
 /* Decrementer */
-       EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
-
-       /* With VMAP_STACK there's not enough room for this at 0x600 */
-       . = 0xa00
-.Lalignment_exception_ool:
-       EXC_XFER_STD(0x600, alignment_exception)
+       EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt)
 
 /* System call */
-       . = 0xc00
-SystemCall:
-       SYSCALL_ENTRY   0xc00
+       START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall)
+       SYSCALL_ENTRY   INTERRUPT_SYSCALL
 
 /* Single step - not used on 601 */
-       EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
+       EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception)
 
 /* On the MPC8xx, this is a software emulation interrupt.  It occurs
  * for all unimplemented and illegal instructions.
  */
-       EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD)
+       START_EXCEPTION(INTERRUPT_SOFT_EMU_8xx, SoftEmu)
+       EXCEPTION_PROLOG INTERRUPT_SOFT_EMU_8xx SoftEmu
+       prepare_transfer_to_handler
+       bl      emulation_assist_interrupt
+       REST_NVGPRS(r1)
+       b       interrupt_return
 
-       . = 0x1100
 /*
  * For the MPC8xx, this is a software tablewalk to load the instruction
  * TLB.  The task switch loads the M_TWB register with the pointer to the first
@@ -189,7 +188,7 @@ SystemCall:
 #define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp)
 #endif
 
-InstructionTLBMiss:
+       START_EXCEPTION(INTERRUPT_INST_TLB_MISS_8xx, InstructionTLBMiss)
        mtspr   SPRN_SPRG_SCRATCH2, r10
        mtspr   SPRN_M_TW, r11
 
@@ -245,8 +244,7 @@ InstructionTLBMiss:
        rfi
 #endif
 
-       . = 0x1200
-DataStoreTLBMiss:
+       START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss)
        mtspr   SPRN_SPRG_SCRATCH2, r10
        mtspr   SPRN_M_TW, r11
        mfcr    r11
@@ -309,83 +307,74 @@ DataStoreTLBMiss:
  * to many reasons, such as executing guarded memory or illegal instruction
  * addresses.  There is nothing to do but handle a big time error fault.
  */
-       . = 0x1300
-InstructionTLBError:
-       EXCEPTION_PROLOG
+       START_EXCEPTION(INTERRUPT_INST_TLB_ERROR_8xx, InstructionTLBError)
+       /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
+       EXCEPTION_PROLOG INTERRUPT_INST_STORAGE InstructionTLBError
        andis.  r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
        andis.  r10,r9,SRR1_ISI_NOPT@h
        beq+    .Litlbie
        tlbie   r12
-       /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
 .Litlbie:
        stw     r12, _DAR(r11)
        stw     r5, _DSISR(r11)
-       EXC_XFER_LITE(0x400, handle_page_fault)
+       prepare_transfer_to_handler
+       bl      do_page_fault
+       b       interrupt_return
 
 /* This is the data TLB error on the MPC8xx.  This could be due to
  * many reasons, including a dirty update to a pte.  We bail out to
  * a higher level function that can handle it.
  */
-       . = 0x1400
-DataTLBError:
+       START_EXCEPTION(INTERRUPT_DATA_TLB_ERROR_8xx, DataTLBError)
        EXCEPTION_PROLOG_0 handle_dar_dsisr=1
        mfspr   r11, SPRN_DAR
        cmpwi   cr1, r11, RPN_PATTERN
        beq-    cr1, FixupDAR   /* must be a buggy dcbX, icbi insn. */
 DARFixed:/* Return from dcbx instruction bug workaround */
-#ifdef CONFIG_VMAP_STACK
-       li      r11, RPN_PATTERN
-       mtspr   SPRN_DAR, r11   /* Tag DAR, to be used in DTLB Error */
-#endif
        EXCEPTION_PROLOG_1
-       EXCEPTION_PROLOG_2 handle_dar_dsisr=1
-       get_and_save_dar_dsisr_on_stack r4, r5, r11
+       /* 0x300 is DataAccess exception, needed by bad_page_fault() */
+       EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1
+       lwz     r4, _DAR(r11)
+       lwz     r5, _DSISR(r11)
        andis.  r10,r5,DSISR_NOHPTE@h
        beq+    .Ldtlbie
        tlbie   r4
 .Ldtlbie:
-#ifndef CONFIG_VMAP_STACK
-       li      r10,RPN_PATTERN
-       mtspr   SPRN_DAR,r10    /* Tag DAR, to be used in DTLB Error */
-#endif
-       /* 0x300 is DataAccess exception, needed by bad_page_fault() */
-       EXC_XFER_LITE(0x300, handle_page_fault)
+       prepare_transfer_to_handler
+       bl      do_page_fault
+       b       interrupt_return
 
-stack_overflow:
+#ifdef CONFIG_VMAP_STACK
        vmap_stack_overflow_exception
+#endif
 
 /* On the MPC8xx, these next four traps are used for development
  * support of breakpoints and such.  Someday I will get around to
  * using them.
  */
-do_databreakpoint:
-       EXCEPTION_PROLOG_1
-       EXCEPTION_PROLOG_2 handle_dar_dsisr=1
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       mfspr   r4,SPRN_BAR
-       stw     r4,_DAR(r11)
-#ifndef CONFIG_VMAP_STACK
-       mfspr   r5,SPRN_DSISR
-       stw     r5,_DSISR(r11)
-#endif
-       EXC_XFER_STD(0x1c00, do_break)
-
-       . = 0x1c00
-DataBreakpoint:
+       START_EXCEPTION(INTERRUPT_DATA_BREAKPOINT_8xx, DataBreakpoint)
        EXCEPTION_PROLOG_0 handle_dar_dsisr=1
        mfspr   r11, SPRN_SRR0
        cmplwi  cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
        cmplwi  cr7, r11, (.Litlbie - PAGE_OFFSET)@l
        cror    4*cr1+eq, 4*cr1+eq, 4*cr7+eq
-       bne     cr1, do_databreakpoint
+       bne     cr1, 1f
        mtcr    r10
        mfspr   r10, SPRN_SPRG_SCRATCH0
        mfspr   r11, SPRN_SPRG_SCRATCH1
        rfi
 
+1:     EXCEPTION_PROLOG_1
+       EXCEPTION_PROLOG_2 INTERRUPT_DATA_BREAKPOINT_8xx DataBreakpoint handle_dar_dsisr=1
+       mfspr   r4,SPRN_BAR
+       stw     r4,_DAR(r11)
+       prepare_transfer_to_handler
+       bl      do_break
+       REST_NVGPRS(r1)
+       b       interrupt_return
+
 #ifdef CONFIG_PERF_EVENTS
-       . = 0x1d00
-InstructionBreakpoint:
+       START_EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, InstructionBreakpoint)
        mtspr   SPRN_SPRG_SCRATCH0, r10
        lwz     r10, (instruction_counter - PAGE_OFFSET)@l(0)
        addi    r10, r10, -1
@@ -396,11 +385,12 @@ InstructionBreakpoint:
        mfspr   r10, SPRN_SPRG_SCRATCH0
        rfi
 #else
-       EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
+       EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, Trap_1d, unknown_exception)
 #endif
-       EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0x1e00, Trap_1e, unknown_exception)
+       EXCEPTION(0x1f00, Trap_1f, unknown_exception)
 
+       __HEAD
        . = 0x2000
 
 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
@@ -510,14 +500,10 @@ FixupDAR:/* Entry point for dcbx workaround. */
 152:
        mfdar   r11
        mtctr   r11                     /* restore ctr reg from DAR */
-#ifdef CONFIG_VMAP_STACK
        mfspr   r11, SPRN_SPRG_THREAD
        stw     r10, DAR(r11)
        mfspr   r10, SPRN_DSISR
        stw     r10, DSISR(r11)
-#else
-       mtdar   r10                     /* save fault EA to DAR */
-#endif
        mfspr   r10,SPRN_M_TW
        b       DARFixed                /* Go back to normal TLB handling */
 
@@ -819,7 +805,7 @@ EXPORT_SYMBOL(empty_zero_page)
 swapper_pg_dir:
        .space  PGD_TABLE_SIZE
 
-/* Room for two PTE table poiners, usually the kernel and current user
+/* Room for two PTE table pointers, usually the kernel and current user
  * pointer to their respective root page table (pgdir).
  */
        .globl  abatron_pteptrs
index 565e84e..065178f 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/kvm_book3s_asm.h>
 #include <asm/export.h>
 #include <asm/feature-fixups.h>
+#include <asm/interrupt.h>
 
 #include "head_32.h"
 
@@ -239,7 +240,7 @@ __secondary_hold_acknowledge:
 /* System reset */
 /* core99 pmac starts the seconary here by changing the vector, and
    putting it back to what it was (unknown_async_exception) when done.  */
-       EXCEPTION(0x100, Reset, unknown_async_exception, EXC_XFER_STD)
+       EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, unknown_async_exception)
 
 /* Machine check */
 /*
@@ -255,40 +256,28 @@ __secondary_hold_acknowledge:
  * pointer when we take an exception from supervisor mode.)
  *     -- paulus.
  */
-       . = 0x200
-       DO_KVM  0x200
-MachineCheck:
+       START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck)
        EXCEPTION_PROLOG_0
 #ifdef CONFIG_PPC_CHRP
-#ifdef CONFIG_VMAP_STACK
        mtspr   SPRN_SPRG_SCRATCH2,r1
        mfspr   r1, SPRN_SPRG_THREAD
        lwz     r1, RTAS_SP(r1)
        cmpwi   cr1, r1, 0
        bne     cr1, 7f
        mfspr   r1, SPRN_SPRG_SCRATCH2
-#else
-       mfspr   r11, SPRN_SPRG_THREAD
-       lwz     r11, RTAS_SP(r11)
-       cmpwi   cr1, r11, 0
-       bne     cr1, 7f
-#endif
 #endif /* CONFIG_PPC_CHRP */
-       EXCEPTION_PROLOG_1 for_rtas=1
-7:     EXCEPTION_PROLOG_2
-       addi    r3,r1,STACK_FRAME_OVERHEAD
+       EXCEPTION_PROLOG_1
+7:     EXCEPTION_PROLOG_2 0x200 MachineCheck
 #ifdef CONFIG_PPC_CHRP
-       beq     cr1, machine_check_tramp
+       beq     cr1, 1f
        twi     31, 0, 0
-#else
-       b       machine_check_tramp
 #endif
+1:     prepare_transfer_to_handler
+       bl      machine_check_exception
+       b       interrupt_return
 
 /* Data access exception. */
-       . = 0x300
-       DO_KVM  0x300
-DataAccess:
-#ifdef CONFIG_VMAP_STACK
+       START_EXCEPTION(INTERRUPT_DATA_STORAGE, DataAccess)
 #ifdef CONFIG_PPC_BOOK3S_604
 BEGIN_MMU_FTR_SECTION
        mtspr   SPRN_SPRG_SCRATCH2,r10
@@ -309,30 +298,20 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
 #endif
 1:     EXCEPTION_PROLOG_0 handle_dar_dsisr=1
        EXCEPTION_PROLOG_1
-       b       handle_page_fault_tramp_1
-#else  /* CONFIG_VMAP_STACK */
-       EXCEPTION_PROLOG handle_dar_dsisr=1
-       get_and_save_dar_dsisr_on_stack r4, r5, r11
-#ifdef CONFIG_PPC_BOOK3S_604
-BEGIN_MMU_FTR_SECTION
-       andis.  r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
-       bne     handle_page_fault_tramp_2       /* if not, try to put a PTE */
-       rlwinm  r3, r5, 32 - 15, 21, 21         /* DSISR_STORE -> _PAGE_RW */
-       bl      hash_page
-       b       handle_page_fault_tramp_1
-MMU_FTR_SECTION_ELSE
-#endif
-       b       handle_page_fault_tramp_2
-#ifdef CONFIG_PPC_BOOK3S_604
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
-#endif /* CONFIG_VMAP_STACK */
+       EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1
+       prepare_transfer_to_handler
+       lwz     r5, _DSISR(r11)
+       andis.  r0, r5, DSISR_DABRMATCH@h
+       bne-    1f
+       bl      do_page_fault
+       b       interrupt_return
+1:     bl      do_break
+       REST_NVGPRS(r1)
+       b       interrupt_return
+
 
 /* Instruction access exception. */
-       . = 0x400
-       DO_KVM  0x400
-InstructionAccess:
-#ifdef CONFIG_VMAP_STACK
+       START_EXCEPTION(INTERRUPT_INST_STORAGE, InstructionAccess)
        mtspr   SPRN_SPRG_SCRATCH0,r10
        mtspr   SPRN_SPRG_SCRATCH1,r11
        mfspr   r10, SPRN_SPRG_THREAD
@@ -352,43 +331,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
        andi.   r11, r11, MSR_PR
 
        EXCEPTION_PROLOG_1
-       EXCEPTION_PROLOG_2
-#else  /* CONFIG_VMAP_STACK */
-       EXCEPTION_PROLOG
-       andis.  r0,r9,SRR1_ISI_NOPT@h   /* no pte found? */
-       beq     1f                      /* if so, try to put a PTE */
-       li      r3,0                    /* into the hash table */
-       mr      r4,r12                  /* SRR0 is fault address */
-#ifdef CONFIG_PPC_BOOK3S_604
-BEGIN_MMU_FTR_SECTION
-       bl      hash_page
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
-#endif /* CONFIG_VMAP_STACK */
+       EXCEPTION_PROLOG_2 INTERRUPT_INST_STORAGE InstructionAccess
        andis.  r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
        stw     r5, _DSISR(r11)
        stw     r12, _DAR(r11)
-       EXC_XFER_LITE(0x400, handle_page_fault)
+       prepare_transfer_to_handler
+       bl      do_page_fault
+       b       interrupt_return
 
 /* External interrupt */
-       EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+       EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ)
 
 /* Alignment exception */
-       . = 0x600
-       DO_KVM  0x600
-Alignment:
-       EXCEPTION_PROLOG handle_dar_dsisr=1
-       save_dar_dsisr_on_stack r4, r5, r11
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       b       alignment_exception_tramp
+       START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment)
+       EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1
+       prepare_transfer_to_handler
+       bl      alignment_exception
+       REST_NVGPRS(r1)
+       b       interrupt_return
 
 /* Program check exception */
-       EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+       START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck)
+       EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck
+       prepare_transfer_to_handler
+       bl      program_check_exception
+       REST_NVGPRS(r1)
+       b       interrupt_return
 
 /* Floating-point unavailable */
-       . = 0x800
-       DO_KVM  0x800
-FPUnavailable:
+       START_EXCEPTION(0x800, FPUnavailable)
 #ifdef CONFIG_PPC_FPU
 BEGIN_FTR_SECTION
 /*
@@ -397,30 +368,29 @@ BEGIN_FTR_SECTION
  */
        b       ProgramCheck
 END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
-       EXCEPTION_PROLOG
+       EXCEPTION_PROLOG INTERRUPT_FP_UNAVAIL FPUnavailable
        beq     1f
        bl      load_up_fpu             /* if from user, just load it up */
        b       fast_exception_return
-1:     addi    r3,r1,STACK_FRAME_OVERHEAD
-       EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception)
+1:     prepare_transfer_to_handler
+       bl      kernel_fp_unavailable_exception
+       b       interrupt_return
 #else
        b       ProgramCheck
 #endif
 
 /* Decrementer */
-       EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
+       EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt)
 
-       EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0xa00, Trap_0a, unknown_exception)
+       EXCEPTION(0xb00, Trap_0b, unknown_exception)
 
 /* System call */
-       . = 0xc00
-       DO_KVM  0xc00
-SystemCall:
-       SYSCALL_ENTRY   0xc00
+       START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall)
+       SYSCALL_ENTRY   INTERRUPT_SYSCALL
 
-       EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
-       EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
+       EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception)
+       EXCEPTION(0xe00, Trap_0e, unknown_exception)
 
 /*
  * The Altivec unavailable trap is at 0x0f20.  Foo.
@@ -430,19 +400,18 @@ SystemCall:
  * non-altivec kernel running on a machine with altivec just
  * by executing an altivec instruction.
  */
-       . = 0xf00
-       DO_KVM  0xf00
+       START_EXCEPTION(INTERRUPT_PERFMON, PerformanceMonitorTrap)
        b       PerformanceMonitor
 
-       . = 0xf20
-       DO_KVM  0xf20
+       START_EXCEPTION(INTERRUPT_ALTIVEC_UNAVAIL, AltiVecUnavailableTrap)
        b       AltiVecUnavailable
 
+       __HEAD
 /*
  * Handle TLB miss for instruction on 603/603e.
  * Note: we get an alternate set of r0 - r3 to use automatically.
  */
-       . = 0x1000
+       . = INTERRUPT_INST_TLB_MISS_603
 InstructionTLBMiss:
 /*
  * r0: scratch
@@ -508,7 +477,7 @@ InstructionAddressInvalid:
 /*
  * Handle TLB miss for DATA Load operation on 603/603e
  */
-       . = 0x1100
+       . = INTERRUPT_DATA_LOAD_TLB_MISS_603
 DataLoadTLBMiss:
 /*
  * r0: scratch
@@ -586,7 +555,7 @@ DataAddressInvalid:
 /*
  * Handle TLB miss for DATA Store on 603/603e
  */
-       . = 0x1200
+       . = INTERRUPT_DATA_STORE_TLB_MISS_603
 DataStoreTLBMiss:
 /*
  * r0: scratch
@@ -650,57 +619,39 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
 #define TAUException   unknown_async_exception
 #endif
 
-       EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
-       EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
-       EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD)
-       EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
-       EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD)
-       EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception)
+       EXCEPTION(0x1400, SMI, SMIException)
+       EXCEPTION(0x1500, Trap_15, unknown_exception)
+       EXCEPTION(0x1600, Trap_16, altivec_assist_exception)
+       EXCEPTION(0x1700, Trap_17, TAUException)
+       EXCEPTION(0x1800, Trap_18, unknown_exception)
+       EXCEPTION(0x1900, Trap_19, unknown_exception)
+       EXCEPTION(0x1a00, Trap_1a, unknown_exception)
+       EXCEPTION(0x1b00, Trap_1b, unknown_exception)
+       EXCEPTION(0x1c00, Trap_1c, unknown_exception)
+       EXCEPTION(0x1d00, Trap_1d, unknown_exception)
+       EXCEPTION(0x1e00, Trap_1e, unknown_exception)
+       EXCEPTION(0x1f00, Trap_1f, unknown_exception)
+       EXCEPTION(0x2000, RunMode, RunModeException)
+       EXCEPTION(0x2100, Trap_21, unknown_exception)
+       EXCEPTION(0x2200, Trap_22, unknown_exception)
+       EXCEPTION(0x2300, Trap_23, unknown_exception)
+       EXCEPTION(0x2400, Trap_24, unknown_exception)
+       EXCEPTION(0x2500, Trap_25, unknown_exception)
+       EXCEPTION(0x2600, Trap_26, unknown_exception)
+       EXCEPTION(0x2700, Trap_27, unknown_exception)
+       EXCEPTION(0x2800, Trap_28, unknown_exception)
+       EXCEPTION(0x2900, Trap_29, unknown_exception)
+       EXCEPTION(0x2a00, Trap_2a, unknown_exception)
+       EXCEPTION(0x2b00, Trap_2b, unknown_exception)
+       EXCEPTION(0x2c00, Trap_2c, unknown_exception)
+       EXCEPTION(0x2d00, Trap_2d, unknown_exception)
+       EXCEPTION(0x2e00, Trap_2e, unknown_exception)
+       EXCEPTION(0x2f00, Trap_2f, unknown_exception)
 
+       __HEAD
        . = 0x3000
 
-machine_check_tramp:
-       EXC_XFER_STD(0x200, machine_check_exception)
-
-alignment_exception_tramp:
-       EXC_XFER_STD(0x600, alignment_exception)
-
-handle_page_fault_tramp_1:
-#ifdef CONFIG_VMAP_STACK
-       EXCEPTION_PROLOG_2 handle_dar_dsisr=1
-#endif
-       lwz     r5, _DSISR(r11)
-       /* fall through */
-handle_page_fault_tramp_2:
-       andis.  r0, r5, DSISR_DABRMATCH@h
-       bne-    1f
-       EXC_XFER_LITE(0x300, handle_page_fault)
-1:     EXC_XFER_STD(0x300, do_break)
-
-#ifdef CONFIG_VMAP_STACK
 #ifdef CONFIG_PPC_BOOK3S_604
 .macro save_regs_thread                thread
        stw     r0, THR0(\thread)
@@ -775,26 +726,31 @@ fast_hash_page_return:
        rfi
 #endif /* CONFIG_PPC_BOOK3S_604 */
 
-stack_overflow:
+#ifdef CONFIG_VMAP_STACK
        vmap_stack_overflow_exception
 #endif
 
+       __HEAD
 AltiVecUnavailable:
-       EXCEPTION_PROLOG
+       EXCEPTION_PROLOG 0xf20 AltiVecUnavailable
 #ifdef CONFIG_ALTIVEC
        beq     1f
        bl      load_up_altivec         /* if from user, just load it up */
        b       fast_exception_return
 #endif /* CONFIG_ALTIVEC */
-1:     addi    r3,r1,STACK_FRAME_OVERHEAD
-       EXC_XFER_LITE(0xf20, altivec_unavailable_exception)
+1:     prepare_transfer_to_handler
+       bl      altivec_unavailable_exception
+       b       interrupt_return
 
+       __HEAD
 PerformanceMonitor:
-       EXCEPTION_PROLOG
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       EXC_XFER_STD(0xf00, performance_monitor_exception)
+       EXCEPTION_PROLOG 0xf00 PerformanceMonitor
+       prepare_transfer_to_handler
+       bl      performance_monitor_exception
+       b       interrupt_return
 
 
+       __HEAD
 /*
  * This code is jumped to from the startup code to copy
  * the kernel image to physical address PHYSICAL_START.
index 4785779..f824700 100644 (file)
@@ -44,7 +44,7 @@ END_BTB_FLUSH_SECTION
 #endif
 
 
-#define NORMAL_EXCEPTION_PROLOG(intno)                                              \
+#define NORMAL_EXCEPTION_PROLOG(trapno, intno)                                              \
        mtspr   SPRN_SPRG_WSCRATCH0, r10;       /* save one register */      \
        mfspr   r10, SPRN_SPRG_THREAD;                                       \
        stw     r11, THREAD_NORMSAVE(0)(r10);                                \
@@ -53,6 +53,8 @@ END_BTB_FLUSH_SECTION
        mfspr   r11, SPRN_SRR1;                                              \
        DO_KVM  BOOKE_INTERRUPT_##intno SPRN_SRR1;                           \
        andi.   r11, r11, MSR_PR;       /* check whether user or kernel    */\
+       LOAD_REG_IMMEDIATE(r11, MSR_KERNEL);                            \
+       mtmsr   r11;                                                    \
        mr      r11, r1;                                                     \
        beq     1f;                                                          \
        BOOKE_CLEAR_BTB(r11)                                            \
@@ -76,12 +78,39 @@ END_BTB_FLUSH_SECTION
        stw     r1, 0(r11);                                                  \
        mr      r1, r11;                                                     \
        rlwinm  r9,r9,0,14,12;          /* clear MSR_WE (necessary?)       */\
-       stw     r0,GPR0(r11);                                                \
-       lis     r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \
-       addi    r10, r10, STACK_FRAME_REGS_MARKER@l;                         \
-       stw     r10, 8(r11);                                                 \
-       SAVE_4GPRS(3, r11);                                                  \
-       SAVE_2GPRS(7, r11)
+       COMMON_EXCEPTION_PROLOG_END trapno
+
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+       stw     r0,GPR0(r1)
+       lis     r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+       addi    r10, r10, STACK_FRAME_REGS_MARKER@l
+       stw     r10, 8(r1)
+       li      r10, \trapno
+       stw     r10,_TRAP(r1)
+       SAVE_4GPRS(3, r1)
+       SAVE_2GPRS(7, r1)
+       SAVE_NVGPRS(r1)
+       stw     r2,GPR2(r1)
+       stw     r12,_NIP(r1)
+       stw     r9,_MSR(r1)
+       mfctr   r10
+       mfspr   r2,SPRN_SPRG_THREAD
+       stw     r10,_CTR(r1)
+       tovirt(r2, r2)
+       mfspr   r10,SPRN_XER
+       addi    r2, r2, -THREAD
+       stw     r10,_XER(r1)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+.endm
+
+.macro prepare_transfer_to_handler
+#ifdef CONFIG_E500
+       andi.   r12,r9,MSR_PR
+       bne     777f
+       bl      prepare_transfer_to_handler
+777:
+#endif
+.endm
 
 .macro SYSCALL_ENTRY trapno intno srr1
        mfspr   r10, SPRN_SPRG_THREAD
@@ -180,7 +209,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
  * registers as the normal prolog above. Instead we use a portion of the
  * critical/machine check exception stack at low physical addresses.
  */
-#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \
+#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, trapno, intno, exc_level_srr0, exc_level_srr1) \
        mtspr   SPRN_SPRG_WSCRATCH_##exc_level,r8;                           \
        BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
        stw     r9,GPR9(r8);            /* save various registers          */\
@@ -192,6 +221,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
        DO_KVM  BOOKE_INTERRUPT_##intno exc_level_srr1;                      \
        BOOKE_CLEAR_BTB(r10)                                            \
        andi.   r11,r11,MSR_PR;                                              \
+       LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE));  \
+       mtmsr   r11;                                                    \
        mfspr   r11,SPRN_SPRG_THREAD;   /* if from user, start at top of   */\
        lwz     r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\
        addi    r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame    */\
@@ -221,16 +252,44 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
        stw     r1,0(r11);                                                   \
        mr      r1,r11;                                                      \
        rlwinm  r9,r9,0,14,12;          /* clear MSR_WE (necessary?)       */\
-       stw     r0,GPR0(r11);                                                \
-       SAVE_4GPRS(3, r11);                                                  \
-       SAVE_2GPRS(7, r11)
+       COMMON_EXCEPTION_PROLOG_END trapno
+
+#define SAVE_xSRR(xSRR)                        \
+       mfspr   r0,SPRN_##xSRR##0;      \
+       stw     r0,_##xSRR##0(r1);      \
+       mfspr   r0,SPRN_##xSRR##1;      \
+       stw     r0,_##xSRR##1(r1)
+
+
+.macro SAVE_MMU_REGS
+#ifdef CONFIG_PPC_BOOK3E_MMU
+       mfspr   r0,SPRN_MAS0
+       stw     r0,MAS0(r1)
+       mfspr   r0,SPRN_MAS1
+       stw     r0,MAS1(r1)
+       mfspr   r0,SPRN_MAS2
+       stw     r0,MAS2(r1)
+       mfspr   r0,SPRN_MAS3
+       stw     r0,MAS3(r1)
+       mfspr   r0,SPRN_MAS6
+       stw     r0,MAS6(r1)
+#ifdef CONFIG_PHYS_64BIT
+       mfspr   r0,SPRN_MAS7
+       stw     r0,MAS7(r1)
+#endif /* CONFIG_PHYS_64BIT */
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+#ifdef CONFIG_44x
+       mfspr   r0,SPRN_MMUCR
+       stw     r0,MMUCR(r1)
+#endif
+.endm
 
-#define CRITICAL_EXCEPTION_PROLOG(intno) \
-               EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1)
-#define DEBUG_EXCEPTION_PROLOG \
-               EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
-#define MCHECK_EXCEPTION_PROLOG \
-               EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \
+#define CRITICAL_EXCEPTION_PROLOG(trapno, intno) \
+               EXC_LEVEL_EXCEPTION_PROLOG(CRIT, trapno+2, intno, SPRN_CSRR0, SPRN_CSRR1)
+#define DEBUG_EXCEPTION_PROLOG(trapno) \
+               EXC_LEVEL_EXCEPTION_PROLOG(DBG, trapno+8, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
+#define MCHECK_EXCEPTION_PROLOG(trapno) \
+               EXC_LEVEL_EXCEPTION_PROLOG(MC, trapno+4, MACHINE_CHECK, \
                        SPRN_MCSRR0, SPRN_MCSRR1)
 
 /*
@@ -257,44 +316,34 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
         .align 5;                                                                   \
 label:
 
-#define EXCEPTION(n, intno, label, hdlr, xfer)                 \
+#define EXCEPTION(n, intno, label, hdlr)                       \
        START_EXCEPTION(label);                                 \
-       NORMAL_EXCEPTION_PROLOG(intno);                         \
-       addi    r3,r1,STACK_FRAME_OVERHEAD;                     \
-       xfer(n, hdlr)
+       NORMAL_EXCEPTION_PROLOG(n, intno);                      \
+       prepare_transfer_to_handler;                            \
+       bl      hdlr;                                           \
+       b       interrupt_return
 
 #define CRITICAL_EXCEPTION(n, intno, label, hdlr)                      \
        START_EXCEPTION(label);                                         \
-       CRITICAL_EXCEPTION_PROLOG(intno);                               \
-       addi    r3,r1,STACK_FRAME_OVERHEAD;                             \
-       EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
-                         crit_transfer_to_handler, ret_from_crit_exc)
+       CRITICAL_EXCEPTION_PROLOG(n, intno);                            \
+       SAVE_MMU_REGS;                                                  \
+       SAVE_xSRR(SRR);                                                 \
+       prepare_transfer_to_handler;                                    \
+       bl      hdlr;                                                   \
+       b       ret_from_crit_exc
 
 #define MCHECK_EXCEPTION(n, label, hdlr)                       \
        START_EXCEPTION(label);                                 \
-       MCHECK_EXCEPTION_PROLOG;                                \
+       MCHECK_EXCEPTION_PROLOG(n);                             \
        mfspr   r5,SPRN_ESR;                                    \
        stw     r5,_ESR(r11);                                   \
-       addi    r3,r1,STACK_FRAME_OVERHEAD;                     \
-       EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
-                         mcheck_transfer_to_handler, ret_from_mcheck_exc)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret)  \
-       li      r10,trap;                                       \
-       stw     r10,_TRAP(r11);                                 \
-       lis     r10,msr@h;                                      \
-       ori     r10,r10,msr@l;                                  \
-       bl      tfer;                                           \
-       .long   hdlr;                                           \
-       .long   ret
-
-#define EXC_XFER_STD(n, hdlr)          \
-       EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
-                         ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr)         \
-       EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
-                         ret_from_except)
+       SAVE_xSRR(DSRR);                                        \
+       SAVE_xSRR(CSRR);                                        \
+       SAVE_MMU_REGS;                                          \
+       SAVE_xSRR(SRR);                                         \
+       prepare_transfer_to_handler;                            \
+       bl      hdlr;                                           \
+       b       ret_from_mcheck_exc
 
 /* Check for a single step debug exception while in an exception
  * handler before state has been saved.  This is to catch the case
@@ -311,7 +360,7 @@ label:
  */
 #define DEBUG_DEBUG_EXCEPTION                                                \
        START_EXCEPTION(DebugDebug);                                          \
-       DEBUG_EXCEPTION_PROLOG;                                               \
+       DEBUG_EXCEPTION_PROLOG(2000);                                                 \
                                                                              \
        /*                                                                    \
         * If there is a single step or branch-taken exception in an          \
@@ -360,12 +409,16 @@ label:
        /* continue normal handling for a debug exception... */               \
 2:     mfspr   r4,SPRN_DBSR;                                                 \
        stw     r4,_ESR(r11);           /* DebugException takes DBSR in _ESR */\
-       addi    r3,r1,STACK_FRAME_OVERHEAD;                                   \
-       EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc)
+       SAVE_xSRR(CSRR);                                                      \
+       SAVE_MMU_REGS;                                                        \
+       SAVE_xSRR(SRR);                                                       \
+       prepare_transfer_to_handler;                                  \
+       bl      DebugException;                                               \
+       b       ret_from_debug_exc
 
 #define DEBUG_CRIT_EXCEPTION                                                 \
        START_EXCEPTION(DebugCrit);                                           \
-       CRITICAL_EXCEPTION_PROLOG(DEBUG);                                     \
+       CRITICAL_EXCEPTION_PROLOG(2000,DEBUG);                                \
                                                                              \
        /*                                                                    \
         * If there is a single step or branch-taken exception in an          \
@@ -414,58 +467,71 @@ label:
        /* continue normal handling for a critical exception... */            \
 2:     mfspr   r4,SPRN_DBSR;                                                 \
        stw     r4,_ESR(r11);           /* DebugException takes DBSR in _ESR */\
-       addi    r3,r1,STACK_FRAME_OVERHEAD;                                   \
-       EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc)
+       SAVE_MMU_REGS;                                                        \
+       SAVE_xSRR(SRR);                                                       \
+       prepare_transfer_to_handler;                                          \
+       bl      DebugException;                                               \
+       b       ret_from_crit_exc
 
 #define DATA_STORAGE_EXCEPTION                                               \
        START_EXCEPTION(DataStorage)                                          \
-       NORMAL_EXCEPTION_PROLOG(DATA_STORAGE);                \
+       NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE);                 \
        mfspr   r5,SPRN_ESR;            /* Grab the ESR and save it */        \
        stw     r5,_ESR(r11);                                                 \
        mfspr   r4,SPRN_DEAR;           /* Grab the DEAR */                   \
        stw     r4, _DEAR(r11);                                               \
-       EXC_XFER_LITE(0x0300, handle_page_fault)
+       prepare_transfer_to_handler;                                          \
+       bl      do_page_fault;                                                \
+       b       interrupt_return
 
 #define INSTRUCTION_STORAGE_EXCEPTION                                        \
        START_EXCEPTION(InstructionStorage)                                   \
-       NORMAL_EXCEPTION_PROLOG(INST_STORAGE);                \
+       NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE);                 \
        mfspr   r5,SPRN_ESR;            /* Grab the ESR and save it */        \
        stw     r5,_ESR(r11);                                                 \
        stw     r12, _DEAR(r11);        /* Pass SRR0 as arg2 */               \
-       EXC_XFER_LITE(0x0400, handle_page_fault)
+       prepare_transfer_to_handler;                                          \
+       bl      do_page_fault;                                                \
+       b       interrupt_return
 
 #define ALIGNMENT_EXCEPTION                                                  \
        START_EXCEPTION(Alignment)                                            \
-       NORMAL_EXCEPTION_PROLOG(ALIGNMENT);                   \
+       NORMAL_EXCEPTION_PROLOG(0x600, ALIGNMENT);                    \
        mfspr   r4,SPRN_DEAR;           /* Grab the DEAR and save it */       \
        stw     r4,_DEAR(r11);                                                \
-       addi    r3,r1,STACK_FRAME_OVERHEAD;                                   \
-       EXC_XFER_STD(0x0600, alignment_exception)
+       prepare_transfer_to_handler;                                          \
+       bl      alignment_exception;                                          \
+       REST_NVGPRS(r1);                                                      \
+       b       interrupt_return
 
 #define PROGRAM_EXCEPTION                                                    \
        START_EXCEPTION(Program)                                              \
-       NORMAL_EXCEPTION_PROLOG(PROGRAM);                     \
+       NORMAL_EXCEPTION_PROLOG(0x700, PROGRAM);                      \
        mfspr   r4,SPRN_ESR;            /* Grab the ESR and save it */        \
        stw     r4,_ESR(r11);                                                 \
-       addi    r3,r1,STACK_FRAME_OVERHEAD;                                   \
-       EXC_XFER_STD(0x0700, program_check_exception)
+       prepare_transfer_to_handler;                                          \
+       bl      program_check_exception;                                      \
+       REST_NVGPRS(r1);                                                      \
+       b       interrupt_return
 
 #define DECREMENTER_EXCEPTION                                                \
        START_EXCEPTION(Decrementer)                                          \
-       NORMAL_EXCEPTION_PROLOG(DECREMENTER);                 \
+       NORMAL_EXCEPTION_PROLOG(0x900, DECREMENTER);                  \
        lis     r0,TSR_DIS@h;           /* Setup the DEC interrupt mask */    \
        mtspr   SPRN_TSR,r0;            /* Clear the DEC interrupt */         \
-       addi    r3,r1,STACK_FRAME_OVERHEAD;                                   \
-       EXC_XFER_LITE(0x0900, timer_interrupt)
+       prepare_transfer_to_handler;                                          \
+       bl      timer_interrupt;                                              \
+       b       interrupt_return
 
 #define FP_UNAVAILABLE_EXCEPTION                                             \
        START_EXCEPTION(FloatingPointUnavailable)                             \
-       NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL);                  \
+       NORMAL_EXCEPTION_PROLOG(0x800, FP_UNAVAIL);                   \
        beq     1f;                                                           \
        bl      load_up_fpu;            /* if from user, just load it up */   \
        b       fast_exception_return;                                        \
-1:     addi    r3,r1,STACK_FRAME_OVERHEAD;                                   \
-       EXC_XFER_STD(0x800, kernel_fp_unavailable_exception)
+1:     prepare_transfer_to_handler;                                          \
+       bl      kernel_fp_unavailable_exception;                              \
+       b       interrupt_return
 
 #else /* __ASSEMBLY__ */
 struct exception_regs {
@@ -481,7 +547,6 @@ struct exception_regs {
        unsigned long csrr1;
        unsigned long dsrr0;
        unsigned long dsrr1;
-       unsigned long saved_ksp_limit;
 };
 
 /* ensure this structure is always sized to a multiple of the stack alignment */
index 3f4a40c..a1a5c3f 100644 (file)
@@ -113,7 +113,7 @@ _ENTRY(_start);
 
 1:
        /*
-        * We have the runtime (virutal) address of our base.
+        * We have the runtime (virtual) address of our base.
         * We calculate our shift of offset from a 64M page.
         * We could map the 64M page we belong to at PAGE_OFFSET and
         * get going from there.
@@ -363,23 +363,26 @@ interrupt_base:
 
        /* Data Storage Interrupt */
        START_EXCEPTION(DataStorage)
-       NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
+       NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE)
        mfspr   r5,SPRN_ESR             /* Grab the ESR, save it */
        stw     r5,_ESR(r11)
        mfspr   r4,SPRN_DEAR            /* Grab the DEAR, save it */
        stw     r4, _DEAR(r11)
        andis.  r10,r5,(ESR_ILK|ESR_DLK)@h
        bne     1f
-       EXC_XFER_LITE(0x0300, handle_page_fault)
+       prepare_transfer_to_handler
+       bl      do_page_fault
+       b       interrupt_return
 1:
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       EXC_XFER_LITE(0x0300, CacheLockingException)
+       prepare_transfer_to_handler
+       bl      CacheLockingException
+       b       interrupt_return
 
        /* Instruction Storage Interrupt */
        INSTRUCTION_STORAGE_EXCEPTION
 
        /* External Input Interrupt */
-       EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE)
+       EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ)
 
        /* Alignment Interrupt */
        ALIGNMENT_EXCEPTION
@@ -391,8 +394,7 @@ interrupt_base:
 #ifdef CONFIG_PPC_FPU
        FP_UNAVAILABLE_EXCEPTION
 #else
-       EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
-                 unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception)
 #endif
 
        /* System Call Interrupt */
@@ -400,16 +402,14 @@ interrupt_base:
        SYSCALL_ENTRY   0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1
 
        /* Auxiliary Processor Unavailable Interrupt */
-       EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
-                 unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, unknown_exception)
 
        /* Decrementer Interrupt */
        DECREMENTER_EXCEPTION
 
        /* Fixed Internal Timer Interrupt */
        /* TODO: Add FIT support */
-       EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
-                 unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0x3100, FIT, FixedIntervalTimer, unknown_exception)
 
        /* Watchdog Timer Interrupt */
 #ifdef CONFIG_BOOKE_WDT
@@ -497,7 +497,7 @@ END_BTB_FLUSH_SECTION
 #endif
 #endif
 
-       bne     2f                      /* Bail if permission/valid mismach */
+       bne     2f                      /* Bail if permission/valid mismatch */
 
        /* Jump to common tlb load */
        b       finish_tlb_load
@@ -592,7 +592,7 @@ END_BTB_FLUSH_SECTION
 #endif
 #endif
 
-       bne     2f                      /* Bail if permission mismach */
+       bne     2f                      /* Bail if permission mismatch */
 
        /* Jump to common TLB load point */
        b       finish_tlb_load
@@ -614,38 +614,44 @@ END_BTB_FLUSH_SECTION
 #ifdef CONFIG_SPE
        /* SPE Unavailable */
        START_EXCEPTION(SPEUnavailable)
-       NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
+       NORMAL_EXCEPTION_PROLOG(0x2010, SPE_UNAVAIL)
        beq     1f
        bl      load_up_spe
        b       fast_exception_return
-1:     addi    r3,r1,STACK_FRAME_OVERHEAD
-       EXC_XFER_LITE(0x2010, KernelSPE)
+1:     prepare_transfer_to_handler
+       bl      KernelSPE
+       b       interrupt_return
 #elif defined(CONFIG_SPE_POSSIBLE)
-       EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
-                 unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, unknown_exception)
 #endif /* CONFIG_SPE_POSSIBLE */
 
        /* SPE Floating Point Data */
 #ifdef CONFIG_SPE
-       EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
-                 SPEFloatingPointException, EXC_XFER_STD)
+       START_EXCEPTION(SPEFloatingPointData)
+       NORMAL_EXCEPTION_PROLOG(0x2030, SPE_FP_DATA)
+       prepare_transfer_to_handler
+       bl      SPEFloatingPointException
+       REST_NVGPRS(r1)
+       b       interrupt_return
 
        /* SPE Floating Point Round */
-       EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
-                 SPEFloatingPointRoundException, EXC_XFER_STD)
+       START_EXCEPTION(SPEFloatingPointRound)
+       NORMAL_EXCEPTION_PROLOG(0x2050, SPE_FP_ROUND)
+       prepare_transfer_to_handler
+       bl      SPEFloatingPointRoundException
+       REST_NVGPRS(r1)
+       b       interrupt_return
 #elif defined(CONFIG_SPE_POSSIBLE)
-       EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
-                 unknown_exception, EXC_XFER_STD)
-       EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
-                 unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception)
+       EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, unknown_exception)
 #endif /* CONFIG_SPE_POSSIBLE */
 
 
        /* Performance Monitor */
        EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
-                 performance_monitor_exception, EXC_XFER_STD)
+                 performance_monitor_exception)
 
-       EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD)
+       EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception)
 
        CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
                           CriticalDoorbell, unknown_exception)
@@ -660,10 +666,10 @@ END_BTB_FLUSH_SECTION
                           unknown_exception)
 
        /* Hypercall */
-       EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception)
 
        /* Embedded Hypervisor Privilege */
-       EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception)
 
 interrupt_end:
 
@@ -854,7 +860,7 @@ KernelSPE:
        lwz     r5,_NIP(r1)
        bl      printk
 #endif
-       b       ret_from_except
+       b       interrupt_return
 #ifdef CONFIG_PRINTK
 87:    .string "SPE used in kernel  (task=%p, pc=%x)  \n"
 #endif
index 867ee4a..675d1f6 100644 (file)
@@ -141,7 +141,7 @@ void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
 {
        struct instruction_op op;
 
-       if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip))
+       if (__get_user_instr(*instr, (void __user *)regs->nip))
                return;
 
        analyse_instr(&op, regs, *instr);
index 69df840..13cad92 100644 (file)
@@ -145,9 +145,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 
 /*
  * Return from NAP/DOZE mode, restore some CPU specific registers,
- * we are called with DR/IR still off and r2 containing physical
- * address of current.  R11 points to the exception frame (physical
- * address).  We have to preserve r10.
+ * R11 points to the exception frame. We have to preserve r10.
  */
 _GLOBAL(power_save_ppc32_restore)
        lwz     r9,_LINK(r11)           /* interrupted in ppc6xx_idle: */
@@ -166,11 +164,7 @@ BEGIN_FTR_SECTION
        mfspr   r9,SPRN_HID0
        andis.  r9,r9,HID0_NAP@h
        beq     1f
-#ifdef CONFIG_VMAP_STACK
        addis   r9, r11, nap_save_msscr0@ha
-#else
-       addis   r9,r11,(nap_save_msscr0-KERNELBASE)@ha
-#endif
        lwz     r9,nap_save_msscr0@l(r9)
        mtspr   SPRN_MSSCR0, r9
        sync
@@ -178,15 +172,11 @@ BEGIN_FTR_SECTION
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
 BEGIN_FTR_SECTION
-#ifdef CONFIG_VMAP_STACK
        addis   r9, r11, nap_save_hid1@ha
-#else
-       addis   r9,r11,(nap_save_hid1-KERNELBASE)@ha
-#endif
        lwz     r9,nap_save_hid1@l(r9)
        mtspr   SPRN_HID1, r9
 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
-       b       transfer_to_handler_cont
+       blr
 _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
 
        .data
index f9e6d83..abb719b 100644 (file)
@@ -209,4 +209,8 @@ _GLOBAL(power4_idle_nap)
        mtmsrd  r7
        isync
        b       1b
+
+       .globl power4_idle_nap_return
+power4_idle_nap_return:
+       blr
 #endif
index 72c85b6..9e1bc45 100644 (file)
@@ -74,20 +74,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 
 /*
  * Return from NAP/DOZE mode, restore some CPU specific registers,
- * r2 containing physical address of current.
- * r11 points to the exception frame (physical address).
+ * r2 containing address of current.
+ * r11 points to the exception frame.
  * We have to preserve r10.
  */
 _GLOBAL(power_save_ppc32_restore)
        lwz     r9,_LINK(r11)           /* interrupted in e500_idle */
        stw     r9,_NIP(r11)            /* make it do a blr */
-
-#ifdef CONFIG_SMP
-       lwz     r11,TASK_CPU(r2)                /* get cpu number * 4 */
-       slwi    r11,r11,2
-#else
-       li      r11,0
-#endif
-
-       b       transfer_to_handler_cont
+       blr
 _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
index c475a22..e4559f8 100644 (file)
 #include <asm/time.h>
 #include <asm/unistd.h>
 
+#if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32)
+unsigned long global_dbcr0[NR_CPUS];
+#endif
+
 typedef long (*syscall_fn)(long, long, long, long, long, long);
 
 /* Has to run notrace because it is entered not completely "reconciled" */
@@ -29,20 +33,24 @@ notrace long system_call_exception(long r3, long r4, long r5,
 {
        syscall_fn f;
 
+       kuep_lock();
+#ifdef CONFIG_PPC32
+       kuap_save_and_lock(regs);
+#endif
+
        regs->orig_gpr3 = r3;
 
        if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
                BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
 
+       trace_hardirqs_off(); /* finish reconciling */
+
        CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
        user_exit_irqoff();
 
-       trace_hardirqs_off(); /* finish reconciling */
-
        if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
                BUG_ON(!(regs->msr & MSR_RI));
        BUG_ON(!(regs->msr & MSR_PR));
-       BUG_ON(!FULL_REGS(regs));
        BUG_ON(arch_irq_disabled_regs(regs));
 
 #ifdef CONFIG_PPC_PKEY
@@ -69,9 +77,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
                        isync();
        } else
 #endif
-#ifdef CONFIG_PPC64
-               kuap_check_amr();
-#endif
+               kuap_assert_locked();
 
        booke_restore_dbcr0();
 
@@ -247,9 +253,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 
        CT_WARN_ON(ct_state() == CONTEXT_USER);
 
-#ifdef CONFIG_PPC64
-       kuap_check_amr();
-#endif
+       kuap_assert_locked();
 
        regs->result = r3;
 
@@ -344,16 +348,13 @@ again:
 
        account_cpu_user_exit();
 
-#ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */
-       /*
-        * We do this at the end so that we do context switch with KERNEL AMR
-        */
+       /* Restore user access locks last */
        kuap_user_restore(regs);
-#endif
+       kuep_unlock();
+
        return ret;
 }
 
-#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */
 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
 {
        unsigned long ti_flags;
@@ -363,7 +364,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
        if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
                BUG_ON(!(regs->msr & MSR_RI));
        BUG_ON(!(regs->msr & MSR_PR));
-       BUG_ON(!FULL_REGS(regs));
        BUG_ON(arch_irq_disabled_regs(regs));
        CT_WARN_ON(ct_state() == CONTEXT_USER);
 
@@ -371,9 +371,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
         * We don't need to restore AMR on the way back to userspace for KUAP.
         * AMR can only have been unlocked if we interrupted the kernel.
         */
-#ifdef CONFIG_PPC64
-       kuap_check_amr();
-#endif
+       kuap_assert_locked();
 
        local_irq_save(flags);
 
@@ -392,7 +390,7 @@ again:
                ti_flags = READ_ONCE(current_thread_info()->flags);
        }
 
-       if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
+       if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
                if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
                                unlikely((ti_flags & _TIF_RESTORE_TM))) {
                        restore_tm_state(regs);
@@ -427,12 +425,9 @@ again:
 
        account_cpu_user_exit();
 
-       /*
-        * We do this at the end so that we do context switch with KERNEL AMR
-        */
-#ifdef CONFIG_PPC64
+       /* Restore user access locks last */
        kuap_user_restore(regs);
-#endif
+
        return ret;
 }
 
@@ -442,25 +437,20 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 {
        unsigned long flags;
        unsigned long ret = 0;
-#ifdef CONFIG_PPC64
-       unsigned long amr;
-#endif
+       unsigned long kuap;
 
        if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
            unlikely(!(regs->msr & MSR_RI)))
                unrecoverable_exception(regs);
        BUG_ON(regs->msr & MSR_PR);
-       BUG_ON(!FULL_REGS(regs));
        /*
         * CT_WARN_ON comes here via program_check_exception,
         * so avoid recursion.
         */
-       if (TRAP(regs) != 0x700)
+       if (TRAP(regs) != INTERRUPT_PROGRAM)
                CT_WARN_ON(ct_state() == CONTEXT_USER);
 
-#ifdef CONFIG_PPC64
-       amr = kuap_get_and_check_amr();
-#endif
+       kuap = kuap_get_and_assert_locked();
 
        if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) {
                clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
@@ -498,14 +488,11 @@ again:
 #endif
 
        /*
-        * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr,
-        * which would cause Read-After-Write stalls. Hence, we take the AMR
-        * value from the check above.
+        * 64s does not want to mfspr(SPRN_AMR) here, because this comes after
+        * mtmsr, which would cause Read-After-Write stalls. Hence, take the
+        * AMR value from the check above.
         */
-#ifdef CONFIG_PPC64
-       kuap_kernel_restore(regs, amr);
-#endif
+       kuap_kernel_restore(regs, kuap);
 
        return ret;
 }
-#endif
index c00214a..57d6b85 100644 (file)
@@ -72,8 +72,7 @@ static void iommu_debugfs_del(struct iommu_table *tbl)
 
        sprintf(name, "%08lx", tbl->it_index);
        liobn_entry = debugfs_lookup(name, iommu_debugfs_dir);
-       if (liobn_entry)
-               debugfs_remove(liobn_entry);
+       debugfs_remove(liobn_entry);
 }
 #else
 static void iommu_debugfs_add(struct iommu_table *tbl){}
@@ -297,6 +296,15 @@ again:
                        pass++;
                        goto again;
 
+               } else if (pass == tbl->nr_pools + 1) {
+                       /* Last resort: try largepool */
+                       spin_unlock(&pool->lock);
+                       pool = &tbl->large_pool;
+                       spin_lock(&pool->lock);
+                       pool->hint = pool->start;
+                       pass++;
+                       goto again;
+
                } else {
                        /* Give up */
                        spin_unlock_irqrestore(&(pool->lock), flags);
@@ -719,7 +727,6 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
 {
        unsigned long sz;
        static int welcomed = 0;
-       struct page *page;
        unsigned int i;
        struct iommu_pool *p;
 
@@ -728,11 +735,11 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
        /* number of bytes needed for the bitmap */
        sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
 
-       page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz));
-       if (!page)
-               panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
-       tbl->it_map = page_address(page);
-       memset(tbl->it_map, 0, sz);
+       tbl->it_map = vzalloc_node(sz, nid);
+       if (!tbl->it_map) {
+               pr_err("%s: Can't allocate %ld bytes\n", __func__, sz);
+               return NULL;
+       }
 
        iommu_table_reserve_pages(tbl, res_start, res_end);
 
@@ -774,8 +781,6 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
 
 static void iommu_table_free(struct kref *kref)
 {
-       unsigned long bitmap_sz;
-       unsigned int order;
        struct iommu_table *tbl;
 
        tbl = container_of(kref, struct iommu_table, it_kref);
@@ -796,12 +801,8 @@ static void iommu_table_free(struct kref *kref)
        if (!bitmap_empty(tbl->it_map, tbl->it_size))
                pr_warn("%s: Unexpected TCEs\n", __func__);
 
-       /* calculate bitmap size in bytes */
-       bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
-
        /* free bitmap */
-       order = get_order(bitmap_sz);
-       free_pages((unsigned long) tbl->it_map, order);
+       vfree(tbl->it_map);
 
        /* free table */
        kfree(tbl);
@@ -897,6 +898,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
        unsigned int order;
        unsigned int nio_pages, io_order;
        struct page *page;
+       size_t size_io = size;
 
        size = PAGE_ALIGN(size);
        order = get_order(size);
@@ -923,8 +925,9 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
        memset(ret, 0, size);
 
        /* Set up tces to cover the allocated range */
-       nio_pages = size >> tbl->it_page_shift;
-       io_order = get_iommu_order(size, tbl);
+       size_io = IOMMU_PAGE_ALIGN(size_io, tbl);
+       nio_pages = size_io >> tbl->it_page_shift;
+       io_order = get_iommu_order(size_io, tbl);
        mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
                              mask >> tbl->it_page_shift, io_order, 0);
        if (mapping == DMA_MAPPING_ERROR) {
@@ -939,10 +942,9 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
                         void *vaddr, dma_addr_t dma_handle)
 {
        if (tbl) {
-               unsigned int nio_pages;
+               size_t size_io = IOMMU_PAGE_ALIGN(size, tbl);
+               unsigned int nio_pages = size_io >> tbl->it_page_shift;
 
-               size = PAGE_ALIGN(size);
-               nio_pages = size >> tbl->it_page_shift;
                iommu_free(tbl, dma_handle, nio_pages);
                size = PAGE_ALIGN(size);
                free_pages((unsigned long)vaddr, get_order(size));
@@ -1096,7 +1098,7 @@ int iommu_take_ownership(struct iommu_table *tbl)
 
        spin_lock_irqsave(&tbl->large_pool.lock, flags);
        for (i = 0; i < tbl->nr_pools; i++)
-               spin_lock(&tbl->pools[i].lock);
+               spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
 
        iommu_table_release_pages(tbl);
 
@@ -1124,7 +1126,7 @@ void iommu_release_ownership(struct iommu_table *tbl)
 
        spin_lock_irqsave(&tbl->large_pool.lock, flags);
        for (i = 0; i < tbl->nr_pools; i++)
-               spin_lock(&tbl->pools[i].lock);
+               spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
 
        memset(tbl->it_map, 0, sz);
 
index d71fd10..72cb453 100644 (file)
@@ -104,82 +104,6 @@ static inline notrace unsigned long get_irq_happened(void)
        return happened;
 }
 
-#ifdef CONFIG_PPC_BOOK3E
-
-/* This is called whenever we are re-enabling interrupts
- * and returns either 0 (nothing to do) or 500/900/280 if
- * there's an EE, DEC or DBELL to generate.
- *
- * This is called in two contexts: From arch_local_irq_restore()
- * before soft-enabling interrupts, and from the exception exit
- * path when returning from an interrupt from a soft-disabled to
- * a soft enabled context. In both case we have interrupts hard
- * disabled.
- *
- * We take care of only clearing the bits we handled in the
- * PACA irq_happened field since we can only re-emit one at a
- * time and we don't want to "lose" one.
- */
-notrace unsigned int __check_irq_replay(void)
-{
-       /*
-        * We use local_paca rather than get_paca() to avoid all
-        * the debug_smp_processor_id() business in this low level
-        * function
-        */
-       unsigned char happened = local_paca->irq_happened;
-
-       /*
-        * We are responding to the next interrupt, so interrupt-off
-        * latencies should be reset here.
-        */
-       trace_hardirqs_on();
-       trace_hardirqs_off();
-
-       if (happened & PACA_IRQ_DEC) {
-               local_paca->irq_happened &= ~PACA_IRQ_DEC;
-               return 0x900;
-       }
-
-       if (happened & PACA_IRQ_EE) {
-               local_paca->irq_happened &= ~PACA_IRQ_EE;
-               return 0x500;
-       }
-
-       if (happened & PACA_IRQ_DBELL) {
-               local_paca->irq_happened &= ~PACA_IRQ_DBELL;
-               return 0x280;
-       }
-
-       if (happened & PACA_IRQ_HARD_DIS)
-               local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
-
-       /* There should be nothing left ! */
-       BUG_ON(local_paca->irq_happened != 0);
-
-       return 0;
-}
-
-/*
- * This is specifically called by assembly code to re-enable interrupts
- * if they are currently disabled. This is typically called before
- * schedule() or do_signal() when returning to userspace. We do it
- * in C to avoid the burden of dealing with lockdep etc...
- *
- * NOTE: This is called with interrupts hard disabled but not marked
- * as such in paca->irq_happened, so we need to resync this.
- */
-void notrace restore_interrupts(void)
-{
-       if (irqs_disabled()) {
-               local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-               local_irq_enable();
-       } else
-               __hard_irq_enable();
-}
-
-#endif /* CONFIG_PPC_BOOK3E */
-
 void replay_soft_interrupts(void)
 {
        struct pt_regs regs;
@@ -218,7 +142,7 @@ again:
         */
        if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) {
                local_paca->irq_happened &= ~PACA_IRQ_HMI;
-               regs.trap = 0xe60;
+               regs.trap = INTERRUPT_HMI;
                handle_hmi_exception(&regs);
                if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
                        hard_irq_disable();
@@ -226,7 +150,7 @@ again:
 
        if (local_paca->irq_happened & PACA_IRQ_DEC) {
                local_paca->irq_happened &= ~PACA_IRQ_DEC;
-               regs.trap = 0x900;
+               regs.trap = INTERRUPT_DECREMENTER;
                timer_interrupt(&regs);
                if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
                        hard_irq_disable();
@@ -234,7 +158,7 @@ again:
 
        if (local_paca->irq_happened & PACA_IRQ_EE) {
                local_paca->irq_happened &= ~PACA_IRQ_EE;
-               regs.trap = 0x500;
+               regs.trap = INTERRUPT_EXTERNAL;
                do_IRQ(&regs);
                if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
                        hard_irq_disable();
@@ -242,10 +166,7 @@ again:
 
        if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) {
                local_paca->irq_happened &= ~PACA_IRQ_DBELL;
-               if (IS_ENABLED(CONFIG_PPC_BOOK3E))
-                       regs.trap = 0x280;
-               else
-                       regs.trap = 0xa00;
+               regs.trap = INTERRUPT_DOORBELL;
                doorbell_exception(&regs);
                if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
                        hard_irq_disable();
@@ -254,7 +175,7 @@ again:
        /* Book3E does not support soft-masking PMI interrupts */
        if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) {
                local_paca->irq_happened &= ~PACA_IRQ_PMI;
-               regs.trap = 0xf00;
+               regs.trap = INTERRUPT_PERFMON;
                performance_monitor_exception(&regs);
                if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
                        hard_irq_disable();
@@ -282,7 +203,7 @@ static inline void replay_soft_interrupts_irqrestore(void)
         * and re-locking AMR but we shouldn't get here in the first place,
         * hence the warning.
         */
-       kuap_check_amr();
+       kuap_assert_locked();
 
        if (kuap_state != AMR_KUAP_BLOCKED)
                set_kuap(AMR_KUAP_BLOCKED);
@@ -667,6 +588,47 @@ static inline void check_stack_overflow(void)
        }
 }
 
+static __always_inline void call_do_softirq(const void *sp)
+{
+       /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
+       asm volatile (
+                PPC_STLU "     %%r1, %[offset](%[sp])  ;"
+               "mr             %%r1, %[sp]             ;"
+               "bl             %[callee]               ;"
+                PPC_LL "       %%r1, 0(%%r1)           ;"
+                : // Outputs
+                : // Inputs
+                  [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD),
+                  [callee] "i" (__do_softirq)
+                : // Clobbers
+                  "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
+                  "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+                  "r11", "r12"
+       );
+}
+
+static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
+{
+       register unsigned long r3 asm("r3") = (unsigned long)regs;
+
+       /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */
+       asm volatile (
+                PPC_STLU "     %%r1, %[offset](%[sp])  ;"
+               "mr             %%r1, %[sp]             ;"
+               "bl             %[callee]               ;"
+                PPC_LL "       %%r1, 0(%%r1)           ;"
+                : // Outputs
+                  "+r" (r3)
+                : // Inputs
+                  [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD),
+                  [callee] "i" (__do_irq)
+                : // Clobbers
+                  "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
+                  "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+                  "r11", "r12"
+       );
+}
+
 void __do_irq(struct pt_regs *regs)
 {
        unsigned int irq;
index 1448580..ce87dc5 100644 (file)
 void arch_jump_label_transform(struct jump_entry *entry,
                               enum jump_label_type type)
 {
-       struct ppc_inst *addr = (struct ppc_inst *)(unsigned long)entry->code;
+       struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry);
 
        if (type == JUMP_LABEL_JMP)
-               patch_branch(addr, entry->target, 0);
+               patch_branch(addr, jump_entry_target(entry), 0);
        else
                patch_instruction(addr, ppc_inst(PPC_INST_NOP));
 }
index 4090802..7dd2ad3 100644 (file)
@@ -376,7 +376,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
 }
 
 /*
- * This function does PowerPC specific procesing for interfacing to gdb.
+ * This function does PowerPC specific processing for interfacing to gdb.
  */
 int kgdb_arch_handle_exception(int vector, int signo, int err_code,
                               char *remcom_in_buffer, char *remcom_out_buffer,
index f061e06..8b2c1a8 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/udbg.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
+#include <asm/early_ioremap.h>
 
 #undef DEBUG
 
@@ -34,6 +35,7 @@ static struct legacy_serial_info {
        unsigned int                    clock;
        int                             irq_check_parent;
        phys_addr_t                     taddr;
+       void __iomem                    *early_addr;
 } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
 
 static const struct of_device_id legacy_serial_parents[] __initconst = {
@@ -325,17 +327,16 @@ static void __init setup_legacy_serial_console(int console)
 {
        struct legacy_serial_info *info = &legacy_serial_infos[console];
        struct plat_serial8250_port *port = &legacy_serial_ports[console];
-       void __iomem *addr;
        unsigned int stride;
 
        stride = 1 << port->regshift;
 
        /* Check if a translated MMIO address has been found */
        if (info->taddr) {
-               addr = ioremap(info->taddr, 0x1000);
-               if (addr == NULL)
+               info->early_addr = early_ioremap(info->taddr, 0x1000);
+               if (info->early_addr == NULL)
                        return;
-               udbg_uart_init_mmio(addr, stride);
+               udbg_uart_init_mmio(info->early_addr, stride);
        } else {
                /* Check if it's PIO and we support untranslated PIO */
                if (port->iotype == UPIO_PORT && isa_io_special)
@@ -353,6 +354,30 @@ static void __init setup_legacy_serial_console(int console)
        udbg_uart_setup(info->speed, info->clock);
 }
 
+static int __init ioremap_legacy_serial_console(void)
+{
+       struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console];
+       struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console];
+       void __iomem *vaddr;
+
+       if (legacy_serial_console < 0)
+               return 0;
+
+       if (!info->early_addr)
+               return 0;
+
+       vaddr = ioremap(info->taddr, 0x1000);
+       if (WARN_ON(!vaddr))
+               return -ENOMEM;
+
+       udbg_uart_init_mmio(vaddr, 1 << port->regshift);
+       early_iounmap(info->early_addr, 0x1000);
+       info->early_addr = NULL;
+
+       return 0;
+}
+early_initcall(ioremap_legacy_serial_console);
+
 /*
  * This is called very early, as part of setup_system() or eventually
  * setup_arch(), basically before anything else in this file. This function
index 11f0cae..9a3c2a8 100644 (file)
@@ -40,7 +40,7 @@ static struct irq_work mce_ue_event_irq_work = {
        .func = machine_check_ue_irq_work,
 };
 
-DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
+static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
 
 static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
 
@@ -131,6 +131,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
         * Populate the mce error_type and type-specific error_type.
         */
        mce_set_error_info(mce, mce_err);
+       if (mce->error_type == MCE_ERROR_TYPE_UE)
+               mce->u.ue_error.ignore_event = mce_err->ignore_event;
 
        if (!addr)
                return;
@@ -159,7 +161,6 @@ void save_mce_event(struct pt_regs *regs, long handled,
                if (phys_addr != ULONG_MAX) {
                        mce->u.ue_error.physical_address_provided = true;
                        mce->u.ue_error.physical_address = phys_addr;
-                       mce->u.ue_error.ignore_event = mce_err->ignore_event;
                        machine_check_ue_event(mce);
                }
        }
index 717e658..6a076be 100644 (file)
 
        .text
 
-/*
- * We store the saved ksp_limit in the unused part
- * of the STACK_FRAME_OVERHEAD
- */
-_GLOBAL(call_do_softirq)
-       mflr    r0
-       stw     r0,4(r1)
-       lwz     r10,THREAD+KSP_LIMIT(r2)
-       stw     r3, THREAD+KSP_LIMIT(r2)
-       stwu    r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
-       mr      r1,r3
-       stw     r10,8(r1)
-       bl      __do_softirq
-       lwz     r10,8(r1)
-       lwz     r1,0(r1)
-       lwz     r0,4(r1)
-       stw     r10,THREAD+KSP_LIMIT(r2)
-       mtlr    r0
-       blr
-
-/*
- * void call_do_irq(struct pt_regs *regs, void *sp);
- */
-_GLOBAL(call_do_irq)
-       mflr    r0
-       stw     r0,4(r1)
-       lwz     r10,THREAD+KSP_LIMIT(r2)
-       stw     r4, THREAD+KSP_LIMIT(r2)
-       stwu    r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
-       mr      r1,r4
-       stw     r10,8(r1)
-       bl      __do_irq
-       lwz     r10,8(r1)
-       lwz     r1,0(r1)
-       lwz     r0,4(r1)
-       stw     r10,THREAD+KSP_LIMIT(r2)
-       mtlr    r0
-       blr
-
 /*
  * This returns the high 64 bits of the product of two 64-bit numbers.
  */
index 0704658..4b761a1 100644 (file)
 
        .text
 
-_GLOBAL(call_do_softirq)
-       mflr    r0
-       std     r0,16(r1)
-       stdu    r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
-       mr      r1,r3
-       bl      __do_softirq
-       ld      r1,0(r1)
-       ld      r0,16(r1)
-       mtlr    r0
-       blr
-
-_GLOBAL(call_do_irq)
-       mflr    r0
-       std     r0,16(r1)
-       stdu    r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
-       mr      r1,r4
-       bl      __do_irq
-       ld      r1,0(r1)
-       ld      r0,16(r1)
-       mtlr    r0
-       blr
-
 _GLOBAL(__bswapdi2)
 EXPORT_SYMBOL(__bswapdi2)
        srdi    r8,r3,32
index a211b02..fab8402 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/firmware.h>
 #include <linux/sort.h>
 #include <asm/setup.h>
+#include <asm/sections.h>
 
 static LIST_HEAD(module_bug_list);
 
@@ -88,12 +89,28 @@ int module_finalize(const Elf_Ehdr *hdr,
 }
 
 #ifdef MODULES_VADDR
+static __always_inline void *
+__module_alloc(unsigned long size, unsigned long start, unsigned long end)
+{
+       return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL,
+                                   PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+                                   __builtin_return_address(0));
+}
+
 void *module_alloc(unsigned long size)
 {
+       unsigned long limit = (unsigned long)_etext - SZ_32M;
+       void *ptr = NULL;
+
        BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
 
-       return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL,
-                                   PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-                                   __builtin_return_address(0));
+       /* First try within 32M limit from _etext to avoid branch trampolines */
+       if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit)
+               ptr = __module_alloc(size, limit, MODULES_END);
+
+       if (!ptr)
+               ptr = __module_alloc(size, MODULES_VADDR, MODULES_END);
+
+       return ptr;
 }
 #endif
index 7f7cdbe..cdf8708 100644 (file)
@@ -141,11 +141,21 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
        }
 }
 
+static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
+{
+       patch_instruction((struct ppc_inst *)addr,
+                         ppc_inst(PPC_RAW_LIS(reg, IMM_H(val))));
+       addr++;
+
+       patch_instruction((struct ppc_inst *)addr,
+                         ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val))));
+}
+
 /*
  * Generate instructions to load provided immediate 64-bit value
  * to register 'reg' and patch these instructions at 'addr'.
  */
-static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
+static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr)
 {
        /* lis reg,(op)@highest */
        patch_instruction((struct ppc_inst *)addr,
@@ -177,6 +187,14 @@ static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *
                                   ___PPC_RS(reg) | (val & 0xffff)));
 }
 
+static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
+{
+       if (IS_ENABLED(CONFIG_PPC64))
+               patch_imm64_load_insns(val, reg, addr);
+       else
+               patch_imm32_load_insns(val, reg, addr);
+}
+
 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
 {
        struct ppc_inst branch_op_callback, branch_emulate_step, temp;
@@ -230,7 +248,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
         * Fixup the template with instructions to:
         * 1. load the address of the actual probepoint
         */
-       patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);
+       patch_imm_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);
 
        /*
         * 2. branch to optimized_callback() and emulate_step()
@@ -264,7 +282,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
         * 3. load instruction to be emulated into relevant register, and
         */
        temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn);
-       patch_imm64_load_insns(ppc_inst_as_u64(temp), 4, buff + TMPL_INSN_IDX);
+       patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
 
        /*
         * 4. branch back from trampoline
index ff8ba4d..19ea331 100644 (file)
@@ -9,6 +9,16 @@
 #include <asm/ptrace.h>
 #include <asm/asm-offsets.h>
 
+#ifdef CONFIG_PPC64
+#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base)
+#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base)
+#define TEMPLATE_FOR_IMM_LOAD_INSNS    nop; nop; nop; nop; nop
+#else
+#define SAVE_30GPRS(base) stmw r2, GPR2(base)
+#define REST_30GPRS(base) lmw  r2, GPR2(base)
+#define TEMPLATE_FOR_IMM_LOAD_INSNS    nop; nop; nop
+#endif
+
 #define        OPT_SLOT_SIZE   65536
 
        .balign 4
@@ -30,39 +40,41 @@ optinsn_slot:
        .global optprobe_template_entry
 optprobe_template_entry:
        /* Create an in-memory pt_regs */
-       stdu    r1,-INT_FRAME_SIZE(r1)
+       PPC_STLU        r1,-INT_FRAME_SIZE(r1)
        SAVE_GPR(0,r1)
        /* Save the previous SP into stack */
        addi    r0,r1,INT_FRAME_SIZE
-       std     r0,GPR1(r1)
-       SAVE_10GPRS(2,r1)
-       SAVE_10GPRS(12,r1)
-       SAVE_10GPRS(22,r1)
+       PPC_STL r0,GPR1(r1)
+       SAVE_30GPRS(r1)
        /* Save SPRS */
        mfmsr   r5
-       std     r5,_MSR(r1)
+       PPC_STL r5,_MSR(r1)
        li      r5,0x700
-       std     r5,_TRAP(r1)
+       PPC_STL r5,_TRAP(r1)
        li      r5,0
-       std     r5,ORIG_GPR3(r1)
-       std     r5,RESULT(r1)
+       PPC_STL r5,ORIG_GPR3(r1)
+       PPC_STL r5,RESULT(r1)
        mfctr   r5
-       std     r5,_CTR(r1)
+       PPC_STL r5,_CTR(r1)
        mflr    r5
-       std     r5,_LINK(r1)
+       PPC_STL r5,_LINK(r1)
        mfspr   r5,SPRN_XER
-       std     r5,_XER(r1)
+       PPC_STL r5,_XER(r1)
        mfcr    r5
-       std     r5,_CCR(r1)
+       PPC_STL r5,_CCR(r1)
+#ifdef CONFIG_PPC64
        lbz     r5,PACAIRQSOFTMASK(r13)
        std     r5,SOFTE(r1)
+#endif
 
        /*
         * We may get here from a module, so load the kernel TOC in r2.
         * The original TOC gets restored when pt_regs is restored
         * further below.
         */
+#ifdef CONFIG_PPC64
        ld      r2,PACATOC(r13)
+#endif
 
        .global optprobe_template_op_address
 optprobe_template_op_address:
@@ -70,11 +82,8 @@ optprobe_template_op_address:
         * Parameters to optimized_callback():
         * 1. optimized_kprobe structure in r3
         */
-       nop
-       nop
-       nop
-       nop
-       nop
+       TEMPLATE_FOR_IMM_LOAD_INSNS
+
        /* 2. pt_regs pointer in r4 */
        addi    r4,r1,STACK_FRAME_OVERHEAD
 
@@ -92,11 +101,7 @@ optprobe_template_call_handler:
        .global optprobe_template_insn
 optprobe_template_insn:
        /* 2, Pass instruction to be emulated in r4 */
-       nop
-       nop
-       nop
-       nop
-       nop
+       TEMPLATE_FOR_IMM_LOAD_INSNS
 
        .global optprobe_template_call_emulate
 optprobe_template_call_emulate:
@@ -107,20 +112,18 @@ optprobe_template_call_emulate:
         * All done.
         * Now, restore the registers...
         */
-       ld      r5,_MSR(r1)
+       PPC_LL  r5,_MSR(r1)
        mtmsr   r5
-       ld      r5,_CTR(r1)
+       PPC_LL  r5,_CTR(r1)
        mtctr   r5
-       ld      r5,_LINK(r1)
+       PPC_LL  r5,_LINK(r1)
        mtlr    r5
-       ld      r5,_XER(r1)
+       PPC_LL  r5,_XER(r1)
        mtxer   r5
-       ld      r5,_CCR(r1)
+       PPC_LL  r5,_CCR(r1)
        mtcr    r5
        REST_GPR(0,r1)
-       REST_10GPRS(2,r1)
-       REST_10GPRS(12,r1)
-       REST_10GPRS(22,r1)
+       REST_30GPRS(r1)
        /* Restore the previous SP */
        addi    r1,r1,INT_FRAME_SIZE
 
index 3231c2d..89e34aa 100644 (file)
@@ -1117,9 +1117,10 @@ void restore_tm_state(struct pt_regs *regs)
        regs->msr |= msr_diff;
 }
 
-#else
+#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
 #define tm_recheckpoint_new_task(new)
 #define __switch_to_tm(prev, new)
+void tm_reclaim_current(uint8_t cause) {}
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
 static inline void save_sprs(struct thread_struct *t)
@@ -1255,6 +1256,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
         */
        restore_sprs(old_thread, new_thread);
 
+#ifdef CONFIG_PPC32
+       kuap_assert_locked();
+#endif
        last = _switch(old_thread, new_thread);
 
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -1444,11 +1448,9 @@ static void print_msr_bits(unsigned long val)
 #ifdef CONFIG_PPC64
 #define REG            "%016lx"
 #define REGS_PER_LINE  4
-#define LAST_VOLATILE  13
 #else
 #define REG            "%08lx"
 #define REGS_PER_LINE  8
-#define LAST_VOLATILE  12
 #endif
 
 static void __show_regs(struct pt_regs *regs)
@@ -1465,7 +1467,9 @@ static void __show_regs(struct pt_regs *regs)
        trap = TRAP(regs);
        if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR))
                pr_cont("CFAR: "REG" ", regs->orig_gpr3);
-       if (trap == 0x200 || trap == 0x300 || trap == 0x600) {
+       if (trap == INTERRUPT_MACHINE_CHECK ||
+           trap == INTERRUPT_DATA_STORAGE ||
+           trap == INTERRUPT_ALIGNMENT) {
                if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE))
                        pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
                else
@@ -1484,8 +1488,6 @@ static void __show_regs(struct pt_regs *regs)
                if ((i % REGS_PER_LINE) == 0)
                        pr_cont("\nGPR%02d: ", i);
                pr_cont(REG " ", regs->gpr[i]);
-               if (i == LAST_VOLATILE && !FULL_REGS(regs))
-                       break;
        }
        pr_cont("\n");
        /*
@@ -1688,7 +1690,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
        } else {
                /* user thread */
                struct pt_regs *regs = current_pt_regs();
-               CHECK_FULL_REGS(regs);
                *childregs = *regs;
                if (usp)
                        childregs->gpr[1] = usp;
@@ -1724,9 +1725,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
        kregs = (struct pt_regs *) sp;
        sp -= STACK_FRAME_OVERHEAD;
        p->thread.ksp = sp;
-#ifdef CONFIG_PPC32
-       p->thread.ksp_limit = (unsigned long)end_of_stack(p);
-#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        for (i = 0; i < nr_wp_slots(); i++)
                p->thread.ptrace_bps[i] = NULL;
@@ -1796,13 +1794,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
        regs->ccr = 0;
        regs->gpr[1] = sp;
 
-       /*
-        * We have just cleared all the nonvolatile GPRs, so make
-        * FULL_REGS(regs) return true.  This is necessary to allow
-        * ptrace to examine the thread immediately after exec.
-        */
-       SET_FULL_REGS(regs);
-
 #ifdef CONFIG_PPC32
        regs->mq = 0;
        regs->nip = start;
index 9a4797d..fbe9dee 100644 (file)
@@ -65,6 +65,8 @@
 #define DBG(fmt...)
 #endif
 
+int *chip_id_lookup_table;
+
 #ifdef CONFIG_PPC64
 int __initdata iommu_is_off;
 int __initdata iommu_force_on;
@@ -267,7 +269,7 @@ static struct feature_property {
 };
 
 #if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU)
-static inline void identical_pvr_fixup(unsigned long node)
+static __init void identical_pvr_fixup(unsigned long node)
 {
        unsigned int pvr;
        const char *model = of_get_flat_dt_prop(node, "model", NULL);
@@ -914,13 +916,22 @@ EXPORT_SYMBOL(of_get_ibm_chip_id);
 int cpu_to_chip_id(int cpu)
 {
        struct device_node *np;
+       int ret = -1, idx;
+
+       idx = cpu / threads_per_core;
+       if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1)
+               return chip_id_lookup_table[idx];
 
        np = of_get_cpu_node(cpu, NULL);
-       if (!np)
-               return -1;
+       if (np) {
+               ret = of_get_ibm_chip_id(np);
+               of_node_put(np);
+
+               if (chip_id_lookup_table)
+                       chip_id_lookup_table[idx] = ret;
+       }
 
-       of_node_put(np);
-       return of_get_ibm_chip_id(np);
+       return ret;
 }
 EXPORT_SYMBOL(cpu_to_chip_id);
 
index ccf77b9..41ed7e3 100644 (file)
@@ -2983,7 +2983,7 @@ static void __init fixup_device_tree_efika_add_phy(void)
                                " 0x3 encode-int encode+"
                                " s\" interrupts\" property"
                        " finish-device");
-       };
+       }
 
        /* Check for a PHY device node - if missing then create one and
         * give it's phandle to the ethernet node */
index 6ccffc6..773bcc4 100644 (file)
@@ -111,7 +111,7 @@ static unsigned long get_user_msr(struct task_struct *task)
        return task->thread.regs->msr | task->thread.fpexc_mode;
 }
 
-static int set_user_msr(struct task_struct *task, unsigned long msr)
+static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr)
 {
        task->thread.regs->msr &= ~MSR_DEBUGCHANGE;
        task->thread.regs->msr |= msr & MSR_DEBUGCHANGE;
@@ -147,7 +147,7 @@ static int set_user_dscr(struct task_struct *task, unsigned long dscr)
  * We prevent mucking around with the reserved area of trap
  * which are used internally by the kernel.
  */
-static int set_user_trap(struct task_struct *task, unsigned long trap)
+static __always_inline int set_user_trap(struct task_struct *task, unsigned long trap)
 {
        set_trap(task->thread.regs, trap);
        return 0;
@@ -221,17 +221,9 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset,
 #ifdef CONFIG_PPC64
        struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe));
 #endif
-       int i;
-
        if (target->thread.regs == NULL)
                return -EIO;
 
-       if (!FULL_REGS(target->thread.regs)) {
-               /* We have a partial register set.  Fill 14-31 with bogus values */
-               for (i = 14; i < 32; i++)
-                       target->thread.regs->gpr[i] = NV_REG_POISON;
-       }
-
        membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs));
 
        membuf_store(&to_msr, get_user_msr(target));
@@ -252,8 +244,6 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
        if (target->thread.regs == NULL)
                return -EIO;
 
-       CHECK_FULL_REGS(target->thread.regs);
-
        ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
                                 target->thread.regs,
                                 0, PT_MSR * sizeof(reg));
@@ -659,6 +649,9 @@ int gpr32_set_common(struct task_struct *target,
        const compat_ulong_t __user *u = ubuf;
        compat_ulong_t reg;
 
+       if (!kbuf && !user_read_access_begin(u, count))
+               return -EFAULT;
+
        pos /= sizeof(reg);
        count /= sizeof(reg);
 
@@ -667,8 +660,7 @@ int gpr32_set_common(struct task_struct *target,
                        regs[pos++] = *k++;
        else
                for (; count > 0 && pos < PT_MSR; --count) {
-                       if (__get_user(reg, u++))
-                               return -EFAULT;
+                       unsafe_get_user(reg, u++, Efault);
                        regs[pos++] = reg;
                }
 
@@ -676,8 +668,8 @@ int gpr32_set_common(struct task_struct *target,
        if (count > 0 && pos == PT_MSR) {
                if (kbuf)
                        reg = *k++;
-               else if (__get_user(reg, u++))
-                       return -EFAULT;
+               else
+                       unsafe_get_user(reg, u++, Efault);
                set_user_msr(target, reg);
                ++pos;
                --count;
@@ -690,24 +682,24 @@ int gpr32_set_common(struct task_struct *target,
                        ++k;
        } else {
                for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
-                       if (__get_user(reg, u++))
-                               return -EFAULT;
+                       unsafe_get_user(reg, u++, Efault);
                        regs[pos++] = reg;
                }
                for (; count > 0 && pos < PT_TRAP; --count, ++pos)
-                       if (__get_user(reg, u++))
-                               return -EFAULT;
+                       unsafe_get_user(reg, u++, Efault);
        }
 
        if (count > 0 && pos == PT_TRAP) {
                if (kbuf)
                        reg = *k++;
-               else if (__get_user(reg, u++))
-                       return -EFAULT;
+               else
+                       unsafe_get_user(reg, u++, Efault);
                set_user_trap(target, reg);
                ++pos;
                --count;
        }
+       if (!kbuf)
+               user_read_access_end();
 
        kbuf = k;
        ubuf = u;
@@ -715,25 +707,19 @@ int gpr32_set_common(struct task_struct *target,
        count *= sizeof(reg);
        return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
                                         (PT_TRAP + 1) * sizeof(reg), -1);
+
+Efault:
+       user_read_access_end();
+       return -EFAULT;
 }
 
 static int gpr32_get(struct task_struct *target,
                     const struct user_regset *regset,
                     struct membuf to)
 {
-       int i;
-
        if (target->thread.regs == NULL)
                return -EIO;
 
-       if (!FULL_REGS(target->thread.regs)) {
-               /*
-                * We have a partial register set.
-                * Fill 14-31 with bogus values.
-                */
-               for (i = 14; i < 32; i++)
-                       target->thread.regs->gpr[i] = NV_REG_POISON;
-       }
        return gpr32_get_common(target, regset, to,
                        &target->thread.regs->gpr[0]);
 }
@@ -746,7 +732,6 @@ static int gpr32_set(struct task_struct *target,
        if (target->thread.regs == NULL)
                return -EIO;
 
-       CHECK_FULL_REGS(target->thread.regs);
        return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
                        &target->thread.regs->gpr[0]);
 }
index 4f3d4ff..0a0a33e 100644 (file)
@@ -59,7 +59,6 @@ long arch_ptrace(struct task_struct *child, long request,
                if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
                        break;
 
-               CHECK_FULL_REGS(child->thread.regs);
                if (index < PT_FPR0)
                        ret = ptrace_get_reg(child, (int) index, &tmp);
                else
@@ -81,7 +80,6 @@ long arch_ptrace(struct task_struct *child, long request,
                if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
                        break;
 
-               CHECK_FULL_REGS(child->thread.regs);
                if (index < PT_FPR0)
                        ret = ptrace_put_reg(child, index, data);
                else
@@ -354,8 +352,6 @@ void __init pt_regs_check(void)
                     offsetof(struct user_pt_regs, nip));
        BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
                     offsetof(struct user_pt_regs, msr));
-       BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
-                    offsetof(struct user_pt_regs, msr));
        BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
                     offsetof(struct user_pt_regs, orig_gpr3));
        BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
index d30b9ad..19c2248 100644 (file)
@@ -83,7 +83,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                if ((addr & 3) || (index > PT_FPSCR32))
                        break;
 
-               CHECK_FULL_REGS(child->thread.regs);
                if (index < PT_FPR0) {
                        ret = ptrace_get_reg(child, index, &tmp);
                        if (ret)
@@ -133,7 +132,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                if ((addr & 3) || numReg > PT_FPSCR)
                        break;
 
-               CHECK_FULL_REGS(child->thread.regs);
                if (numReg >= PT_FPR0) {
                        flush_fp_to_thread(child);
                        /* get 64 bit FPR */
@@ -187,7 +185,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                if ((addr & 3) || (index > PT_FPSCR32))
                        break;
 
-               CHECK_FULL_REGS(child->thread.regs);
                if (index < PT_FPR0) {
                        ret = ptrace_put_reg(child, index, data);
                } else {
@@ -226,7 +223,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                 */
                if ((addr & 3) || (numReg > PT_FPSCR))
                        break;
-               CHECK_FULL_REGS(child->thread.regs);
                if (numReg < PT_FPR0) {
                        unsigned long freg;
                        ret = ptrace_get_reg(child, numReg, &freg);
index 2d33f34..6857a5b 100644 (file)
@@ -755,11 +755,18 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v)
        return 0;
 }
 
-#define RMO_READ_BUF_MAX 30
-
-/* RTAS Userspace access */
+/**
+ * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space.
+ *
+ * Base + size description of a range of RTAS-addressable memory set
+ * aside for user space to use as work area(s) for certain RTAS
+ * functions. User space accesses this region via /dev/mem. Apart from
+ * security policies, the kernel does not arbitrate or serialize
+ * access to this region, and user space must ensure that concurrent
+ * users do not interfere with each other.
+ */
 static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v)
 {
-       seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX);
+       seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_USER_REGION_SIZE);
        return 0;
 }
index d126d71..6bada74 100644 (file)
@@ -828,7 +828,6 @@ void rtas_activate_firmware(void)
                pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
 }
 
-static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
 #ifdef CONFIG_PPC_PSERIES
 /**
  * rtas_call_reentrant() - Used for reentrant rtas calls
@@ -988,10 +987,10 @@ static struct rtas_filter rtas_filters[] __ro_after_init = {
 static bool in_rmo_buf(u32 base, u32 end)
 {
        return base >= rtas_rmo_buf &&
-               base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) &&
+               base < (rtas_rmo_buf + RTAS_USER_REGION_SIZE) &&
                base <= end &&
                end >= rtas_rmo_buf &&
-               end < (rtas_rmo_buf + RTAS_RMOBUF_MAX);
+               end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE);
 }
 
 static bool block_rtas_call(int token, int nargs,
@@ -1052,6 +1051,14 @@ err:
        return true;
 }
 
+static void __init rtas_syscall_filter_init(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(rtas_filters); i++)
+               rtas_filters[i].token = rtas_token(rtas_filters[i].name);
+}
+
 #else
 
 static bool block_rtas_call(int token, int nargs,
@@ -1060,6 +1067,10 @@ static bool block_rtas_call(int token, int nargs,
        return false;
 }
 
+static void __init rtas_syscall_filter_init(void)
+{
+}
+
 #endif /* CONFIG_PPC_RTAS_FILTER */
 
 /* We assume to be passed big endian arguments */
@@ -1103,7 +1114,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
                return -EINVAL;
 
        /* Need to handle ibm,suspend_me call specially */
-       if (token == ibm_suspend_me_token) {
+       if (token == rtas_token("ibm,suspend-me")) {
 
                /*
                 * rtas_ibm_suspend_me assumes the streamid handle is in cpu
@@ -1163,9 +1174,6 @@ void __init rtas_initialize(void)
        unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
        u32 base, size, entry;
        int no_base, no_size, no_entry;
-#ifdef CONFIG_PPC_RTAS_FILTER
-       int i;
-#endif
 
        /* Get RTAS dev node and fill up our "rtas" structure with infos
         * about it.
@@ -1191,12 +1199,10 @@ void __init rtas_initialize(void)
         * the stop-self token if any
         */
 #ifdef CONFIG_PPC64
-       if (firmware_has_feature(FW_FEATURE_LPAR)) {
+       if (firmware_has_feature(FW_FEATURE_LPAR))
                rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX);
-               ibm_suspend_me_token = rtas_token("ibm,suspend-me");
-       }
 #endif
-       rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE,
+       rtas_rmo_buf = memblock_phys_alloc_range(RTAS_USER_REGION_SIZE, PAGE_SIZE,
                                                 0, rtas_region);
        if (!rtas_rmo_buf)
                panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
@@ -1206,11 +1212,7 @@ void __init rtas_initialize(void)
        rtas_last_error_token = rtas_token("rtas-last-error");
 #endif
 
-#ifdef CONFIG_PPC_RTAS_FILTER
-       for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) {
-               rtas_filters[i].token = rtas_token(rtas_filters[i].name);
-       }
-#endif
+       rtas_syscall_filter_init();
 }
 
 int __init early_init_dt_scan_rtas(unsigned long node,
index e4e1a94..0fdfcdd 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
+#include <linux/memblock.h>
 #include <linux/nospec.h>
 #include <linux/prctl.h>
 #include <linux/seq_buf.h>
@@ -18,6 +19,7 @@
 #include <asm/setup.h>
 #include <asm/inst.h>
 
+#include "setup.h"
 
 u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
 
@@ -250,7 +252,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
 
 static enum stf_barrier_type stf_enabled_flush_types;
 static bool no_stf_barrier;
-bool stf_barrier;
+static bool stf_barrier;
 
 static int __init handle_no_stf_barrier(char *p)
 {
@@ -541,6 +543,178 @@ void setup_count_cache_flush(void)
        toggle_branch_cache_flush(enable);
 }
 
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+static bool no_entry_flush;
+static bool no_uaccess_flush;
+bool rfi_flush;
+static bool entry_flush;
+static bool uaccess_flush;
+DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
+EXPORT_SYMBOL(uaccess_flush_key);
+
+static int __init handle_no_rfi_flush(char *p)
+{
+       pr_info("rfi-flush: disabled on command line.");
+       no_rfi_flush = true;
+       return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+static int __init handle_no_entry_flush(char *p)
+{
+       pr_info("entry-flush: disabled on command line.");
+       no_entry_flush = true;
+       return 0;
+}
+early_param("no_entry_flush", handle_no_entry_flush);
+
+static int __init handle_no_uaccess_flush(char *p)
+{
+       pr_info("uaccess-flush: disabled on command line.");
+       no_uaccess_flush = true;
+       return 0;
+}
+early_param("no_uaccess_flush", handle_no_uaccess_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+       pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+       handle_no_rfi_flush(NULL);
+       return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+       /*
+        * We don't need to do the flush explicitly, just enter+exit kernel is
+        * sufficient, the RFI exit handlers will do the right thing.
+        */
+}
+
+void rfi_flush_enable(bool enable)
+{
+       if (enable) {
+               do_rfi_flush_fixups(enabled_flush_types);
+               on_each_cpu(do_nothing, NULL, 1);
+       } else
+               do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+       rfi_flush = enable;
+}
+
+static void entry_flush_enable(bool enable)
+{
+       if (enable) {
+               do_entry_flush_fixups(enabled_flush_types);
+               on_each_cpu(do_nothing, NULL, 1);
+       } else {
+               do_entry_flush_fixups(L1D_FLUSH_NONE);
+       }
+
+       entry_flush = enable;
+}
+
+static void uaccess_flush_enable(bool enable)
+{
+       if (enable) {
+               do_uaccess_flush_fixups(enabled_flush_types);
+               static_branch_enable(&uaccess_flush_key);
+               on_each_cpu(do_nothing, NULL, 1);
+       } else {
+               static_branch_disable(&uaccess_flush_key);
+               do_uaccess_flush_fixups(L1D_FLUSH_NONE);
+       }
+
+       uaccess_flush = enable;
+}
+
+static void __ref init_fallback_flush(void)
+{
+       u64 l1d_size, limit;
+       int cpu;
+
+       /* Only allocate the fallback flush area once (at boot time). */
+       if (l1d_flush_fallback_area)
+               return;
+
+       l1d_size = ppc64_caches.l1d.size;
+
+       /*
+        * If there is no d-cache-size property in the device tree, l1d_size
+        * could be zero. That leads to the loop in the asm wrapping around to
+        * 2^64-1, and then walking off the end of the fallback area and
+        * eventually causing a page fault which is fatal. Just default to
+        * something vaguely sane.
+        */
+       if (!l1d_size)
+               l1d_size = (64 * 1024);
+
+       limit = min(ppc64_bolted_size(), ppc64_rma_size);
+
+       /*
+        * Align to L1d size, and size it at 2x L1d size, to catch possible
+        * hardware prefetch runoff. We don't have a recipe for load patterns to
+        * reliably avoid the prefetcher.
+        */
+       l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
+                                               l1d_size, MEMBLOCK_LOW_LIMIT,
+                                               limit, NUMA_NO_NODE);
+       if (!l1d_flush_fallback_area)
+               panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n",
+                     __func__, l1d_size * 2, l1d_size, &limit);
+
+
+       for_each_possible_cpu(cpu) {
+               struct paca_struct *paca = paca_ptrs[cpu];
+               paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
+               paca->l1d_flush_size = l1d_size;
+       }
+}
+
+void setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+       if (types & L1D_FLUSH_FALLBACK) {
+               pr_info("rfi-flush: fallback displacement flush available\n");
+               init_fallback_flush();
+       }
+
+       if (types & L1D_FLUSH_ORI)
+               pr_info("rfi-flush: ori type flush available\n");
+
+       if (types & L1D_FLUSH_MTTRIG)
+               pr_info("rfi-flush: mttrig type flush available\n");
+
+       enabled_flush_types = types;
+
+       if (!cpu_mitigations_off() && !no_rfi_flush)
+               rfi_flush_enable(enable);
+}
+
+void setup_entry_flush(bool enable)
+{
+       if (cpu_mitigations_off())
+               return;
+
+       if (!no_entry_flush)
+               entry_flush_enable(enable);
+}
+
+void setup_uaccess_flush(bool enable)
+{
+       if (cpu_mitigations_off())
+               return;
+
+       if (!no_uaccess_flush)
+               uaccess_flush_enable(enable);
+}
+
 #ifdef CONFIG_DEBUG_FS
 static int count_cache_flush_set(void *data, u64 val)
 {
@@ -579,5 +753,92 @@ static __init int count_cache_flush_debugfs_init(void)
        return 0;
 }
 device_initcall(count_cache_flush_debugfs_init);
+
+static int rfi_flush_set(void *data, u64 val)
+{
+       bool enable;
+
+       if (val == 1)
+               enable = true;
+       else if (val == 0)
+               enable = false;
+       else
+               return -EINVAL;
+
+       /* Only do anything if we're changing state */
+       if (enable != rfi_flush)
+               rfi_flush_enable(enable);
+
+       return 0;
+}
+
+static int rfi_flush_get(void *data, u64 *val)
+{
+       *val = rfi_flush ? 1 : 0;
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+
+static int entry_flush_set(void *data, u64 val)
+{
+       bool enable;
+
+       if (val == 1)
+               enable = true;
+       else if (val == 0)
+               enable = false;
+       else
+               return -EINVAL;
+
+       /* Only do anything if we're changing state */
+       if (enable != entry_flush)
+               entry_flush_enable(enable);
+
+       return 0;
+}
+
+static int entry_flush_get(void *data, u64 *val)
+{
+       *val = entry_flush ? 1 : 0;
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
+
+static int uaccess_flush_set(void *data, u64 val)
+{
+       bool enable;
+
+       if (val == 1)
+               enable = true;
+       else if (val == 0)
+               enable = false;
+       else
+               return -EINVAL;
+
+       /* Only do anything if we're changing state */
+       if (enable != uaccess_flush)
+               uaccess_flush_enable(enable);
+
+       return 0;
+}
+
+static int uaccess_flush_get(void *data, u64 *val)
+{
+       *val = uaccess_flush ? 1 : 0;
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
+
+static __init int rfi_flush_debugfs_init(void)
+{
+       debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+       debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
+       debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
+       return 0;
+}
+device_initcall(rfi_flush_debugfs_init);
 #endif /* CONFIG_DEBUG_FS */
 #endif /* CONFIG_PPC_BOOK3S_64 */
index bee984b..74a98ff 100644 (file)
@@ -69,7 +69,6 @@
 #include "setup.h"
 
 #ifdef DEBUG
-#include <asm/udbg.h>
 #define DBG(fmt...) udbg_printf(fmt)
 #else
 #define DBG(fmt...)
@@ -829,7 +828,7 @@ static __init void print_system_info(void)
 }
 
 #ifdef CONFIG_SMP
-static void smp_setup_pacas(void)
+static void __init smp_setup_pacas(void)
 {
        int cpu;
 
index 8ba49a6..d7c1f92 100644 (file)
@@ -164,7 +164,7 @@ void __init irqstack_early_init(void)
 }
 
 #ifdef CONFIG_VMAP_STACK
-void *emergency_ctx[NR_CPUS] __ro_after_init;
+void *emergency_ctx[NR_CPUS] __ro_after_init = {[0] = &init_stack};
 
 void __init emergency_stack_init(void)
 {
index 560ed8b..b779d25 100644 (file)
@@ -232,10 +232,23 @@ static void cpu_ready_for_interrupts(void)
         * If we are not in hypervisor mode the job is done once for
         * the whole partition in configure_exceptions().
         */
-       if (cpu_has_feature(CPU_FTR_HVMODE) &&
-           cpu_has_feature(CPU_FTR_ARCH_207S)) {
+       if (cpu_has_feature(CPU_FTR_HVMODE)) {
                unsigned long lpcr = mfspr(SPRN_LPCR);
-               mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+               unsigned long new_lpcr = lpcr;
+
+               if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+                       /* P10 DD1 does not have HAIL */
+                       if (pvr_version_is(PVR_POWER10) &&
+                                       (mfspr(SPRN_PVR) & 0xf00) == 0x100)
+                               new_lpcr |= LPCR_AIL_3;
+                       else
+                               new_lpcr |= LPCR_HAIL;
+               } else if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+                       new_lpcr |= LPCR_AIL_3;
+               }
+
+               if (new_lpcr != lpcr)
+                       mtspr(SPRN_LPCR, new_lpcr);
        }
 
        /*
@@ -941,266 +954,3 @@ static int __init disable_hardlockup_detector(void)
        return 0;
 }
 early_initcall(disable_hardlockup_detector);
-
-#ifdef CONFIG_PPC_BOOK3S_64
-static enum l1d_flush_type enabled_flush_types;
-static void *l1d_flush_fallback_area;
-static bool no_rfi_flush;
-static bool no_entry_flush;
-static bool no_uaccess_flush;
-bool rfi_flush;
-bool entry_flush;
-bool uaccess_flush;
-DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
-EXPORT_SYMBOL(uaccess_flush_key);
-
-static int __init handle_no_rfi_flush(char *p)
-{
-       pr_info("rfi-flush: disabled on command line.");
-       no_rfi_flush = true;
-       return 0;
-}
-early_param("no_rfi_flush", handle_no_rfi_flush);
-
-static int __init handle_no_entry_flush(char *p)
-{
-       pr_info("entry-flush: disabled on command line.");
-       no_entry_flush = true;
-       return 0;
-}
-early_param("no_entry_flush", handle_no_entry_flush);
-
-static int __init handle_no_uaccess_flush(char *p)
-{
-       pr_info("uaccess-flush: disabled on command line.");
-       no_uaccess_flush = true;
-       return 0;
-}
-early_param("no_uaccess_flush", handle_no_uaccess_flush);
-
-/*
- * The RFI flush is not KPTI, but because users will see doco that says to use
- * nopti we hijack that option here to also disable the RFI flush.
- */
-static int __init handle_no_pti(char *p)
-{
-       pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
-       handle_no_rfi_flush(NULL);
-       return 0;
-}
-early_param("nopti", handle_no_pti);
-
-static void do_nothing(void *unused)
-{
-       /*
-        * We don't need to do the flush explicitly, just enter+exit kernel is
-        * sufficient, the RFI exit handlers will do the right thing.
-        */
-}
-
-void rfi_flush_enable(bool enable)
-{
-       if (enable) {
-               do_rfi_flush_fixups(enabled_flush_types);
-               on_each_cpu(do_nothing, NULL, 1);
-       } else
-               do_rfi_flush_fixups(L1D_FLUSH_NONE);
-
-       rfi_flush = enable;
-}
-
-static void entry_flush_enable(bool enable)
-{
-       if (enable) {
-               do_entry_flush_fixups(enabled_flush_types);
-               on_each_cpu(do_nothing, NULL, 1);
-       } else {
-               do_entry_flush_fixups(L1D_FLUSH_NONE);
-       }
-
-       entry_flush = enable;
-}
-
-static void uaccess_flush_enable(bool enable)
-{
-       if (enable) {
-               do_uaccess_flush_fixups(enabled_flush_types);
-               static_branch_enable(&uaccess_flush_key);
-               on_each_cpu(do_nothing, NULL, 1);
-       } else {
-               static_branch_disable(&uaccess_flush_key);
-               do_uaccess_flush_fixups(L1D_FLUSH_NONE);
-       }
-
-       uaccess_flush = enable;
-}
-
-static void __ref init_fallback_flush(void)
-{
-       u64 l1d_size, limit;
-       int cpu;
-
-       /* Only allocate the fallback flush area once (at boot time). */
-       if (l1d_flush_fallback_area)
-               return;
-
-       l1d_size = ppc64_caches.l1d.size;
-
-       /*
-        * If there is no d-cache-size property in the device tree, l1d_size
-        * could be zero. That leads to the loop in the asm wrapping around to
-        * 2^64-1, and then walking off the end of the fallback area and
-        * eventually causing a page fault which is fatal. Just default to
-        * something vaguely sane.
-        */
-       if (!l1d_size)
-               l1d_size = (64 * 1024);
-
-       limit = min(ppc64_bolted_size(), ppc64_rma_size);
-
-       /*
-        * Align to L1d size, and size it at 2x L1d size, to catch possible
-        * hardware prefetch runoff. We don't have a recipe for load patterns to
-        * reliably avoid the prefetcher.
-        */
-       l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
-                                               l1d_size, MEMBLOCK_LOW_LIMIT,
-                                               limit, NUMA_NO_NODE);
-       if (!l1d_flush_fallback_area)
-               panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n",
-                     __func__, l1d_size * 2, l1d_size, &limit);
-
-
-       for_each_possible_cpu(cpu) {
-               struct paca_struct *paca = paca_ptrs[cpu];
-               paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
-               paca->l1d_flush_size = l1d_size;
-       }
-}
-
-void setup_rfi_flush(enum l1d_flush_type types, bool enable)
-{
-       if (types & L1D_FLUSH_FALLBACK) {
-               pr_info("rfi-flush: fallback displacement flush available\n");
-               init_fallback_flush();
-       }
-
-       if (types & L1D_FLUSH_ORI)
-               pr_info("rfi-flush: ori type flush available\n");
-
-       if (types & L1D_FLUSH_MTTRIG)
-               pr_info("rfi-flush: mttrig type flush available\n");
-
-       enabled_flush_types = types;
-
-       if (!cpu_mitigations_off() && !no_rfi_flush)
-               rfi_flush_enable(enable);
-}
-
-void setup_entry_flush(bool enable)
-{
-       if (cpu_mitigations_off())
-               return;
-
-       if (!no_entry_flush)
-               entry_flush_enable(enable);
-}
-
-void setup_uaccess_flush(bool enable)
-{
-       if (cpu_mitigations_off())
-               return;
-
-       if (!no_uaccess_flush)
-               uaccess_flush_enable(enable);
-}
-
-#ifdef CONFIG_DEBUG_FS
-static int rfi_flush_set(void *data, u64 val)
-{
-       bool enable;
-
-       if (val == 1)
-               enable = true;
-       else if (val == 0)
-               enable = false;
-       else
-               return -EINVAL;
-
-       /* Only do anything if we're changing state */
-       if (enable != rfi_flush)
-               rfi_flush_enable(enable);
-
-       return 0;
-}
-
-static int rfi_flush_get(void *data, u64 *val)
-{
-       *val = rfi_flush ? 1 : 0;
-       return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
-
-static int entry_flush_set(void *data, u64 val)
-{
-       bool enable;
-
-       if (val == 1)
-               enable = true;
-       else if (val == 0)
-               enable = false;
-       else
-               return -EINVAL;
-
-       /* Only do anything if we're changing state */
-       if (enable != entry_flush)
-               entry_flush_enable(enable);
-
-       return 0;
-}
-
-static int entry_flush_get(void *data, u64 *val)
-{
-       *val = entry_flush ? 1 : 0;
-       return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
-
-static int uaccess_flush_set(void *data, u64 val)
-{
-       bool enable;
-
-       if (val == 1)
-               enable = true;
-       else if (val == 0)
-               enable = false;
-       else
-               return -EINVAL;
-
-       /* Only do anything if we're changing state */
-       if (enable != uaccess_flush)
-               uaccess_flush_enable(enable);
-
-       return 0;
-}
-
-static int uaccess_flush_get(void *data, u64 *val)
-{
-       *val = uaccess_flush ? 1 : 0;
-       return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
-
-static __init int rfi_flush_debugfs_init(void)
-{
-       debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
-       debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
-       debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
-       return 0;
-}
-device_initcall(rfi_flush_debugfs_init);
-#endif
-#endif /* CONFIG_PPC_BOOK3S_64 */
index 2559a68..f4aafa3 100644 (file)
@@ -19,6 +19,15 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
                              struct task_struct *tsk);
 
+static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src)
+{
+       BUILD_BUG_ON(sizeof(sigset_t) != sizeof(u64));
+
+       return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]);
+}
+#define unsafe_get_user_sigset(dst, src, label) \
+       unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label)
+
 #ifdef CONFIG_VSX
 extern unsigned long copy_vsx_to_user(void __user *to,
                                      struct task_struct *task);
@@ -53,6 +62,26 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
                                &buf[i], label);\
 } while (0)
 
+#define unsafe_copy_fpr_from_user(task, from, label)   do {            \
+       struct task_struct *__t = task;                                 \
+       u64 __user *buf = (u64 __user *)from;                           \
+       int i;                                                          \
+                                                                       \
+       for (i = 0; i < ELF_NFPREG - 1; i++)                            \
+               unsafe_get_user(__t->thread.TS_FPR(i), &buf[i], label); \
+       unsafe_get_user(__t->thread.fp_state.fpscr, &buf[i], label);    \
+} while (0)
+
+#define unsafe_copy_vsx_from_user(task, from, label)   do {            \
+       struct task_struct *__t = task;                                 \
+       u64 __user *buf = (u64 __user *)from;                           \
+       int i;                                                          \
+                                                                       \
+       for (i = 0; i < ELF_NVSRHALFREG ; i++)                          \
+               unsafe_get_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \
+                               &buf[i], label);                        \
+} while (0)
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 #define unsafe_copy_ckfpr_to_user(to, task, label)     do {            \
        struct task_struct *__t = task;                                 \
@@ -73,6 +102,26 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
                unsafe_put_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \
                                &buf[i], label);\
 } while (0)
+
+#define unsafe_copy_ckfpr_from_user(task, from, label) do {            \
+       struct task_struct *__t = task;                                 \
+       u64 __user *buf = (u64 __user *)from;                           \
+       int i;                                                          \
+                                                                       \
+       for (i = 0; i < ELF_NFPREG - 1 ; i++)                           \
+               unsafe_get_user(__t->thread.TS_CKFPR(i), &buf[i], label);\
+       unsafe_get_user(__t->thread.ckfp_state.fpscr, &buf[i], failed); \
+} while (0)
+
+#define unsafe_copy_ckvsx_from_user(task, from, label) do {            \
+       struct task_struct *__t = task;                                 \
+       u64 __user *buf = (u64 __user *)from;                           \
+       int i;                                                          \
+                                                                       \
+       for (i = 0; i < ELF_NVSRHALFREG ; i++)                          \
+               unsafe_get_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \
+                               &buf[i], label);                        \
+} while (0)
 #endif
 #elif defined(CONFIG_PPC_FPU_REGS)
 
@@ -80,6 +129,10 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
        unsafe_copy_to_user(to, (task)->thread.fp_state.fpr,    \
                            ELF_NFPREG * sizeof(double), label)
 
+#define unsafe_copy_fpr_from_user(task, from, label)                   \
+       unsafe_copy_from_user((task)->thread.fp_state.fpr, from,        \
+                           ELF_NFPREG * sizeof(double), label)
+
 static inline unsigned long
 copy_fpr_to_user(void __user *to, struct task_struct *task)
 {
@@ -115,6 +168,8 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
 #else
 #define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
 
+#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
+
 static inline unsigned long
 copy_fpr_to_user(void __user *to, struct task_struct *task)
 {
index f651b99..8f05ed0 100644 (file)
  * implementation that makes things simple for little endian only)
  */
 #define unsafe_put_sigset_t    unsafe_put_compat_sigset
-
-static inline int get_sigset_t(sigset_t *set,
-                              const compat_sigset_t __user *uset)
-{
-       return get_compat_sigset(set, uset);
-}
+#define unsafe_get_sigset_t    unsafe_get_compat_sigset
 
 #define to_user_ptr(p)         ptr_to_compat(p)
 #define from_user_ptr(p)       compat_ptr(p)
 
 static __always_inline int
-save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame)
+__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame)
 {
        elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
        int val, i;
 
-       WARN_ON(!FULL_REGS(regs));
-
        for (i = 0; i <= PT_RESULT; i ++) {
                /* Force usr to alway see softe as 1 (interrupts enabled) */
                if (i == PT_SOFTE)
@@ -116,8 +109,8 @@ failed:
        return 1;
 }
 
-static inline int restore_general_regs(struct pt_regs *regs,
-               struct mcontext __user *sr)
+static __always_inline int
+__unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr)
 {
        elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
        int i;
@@ -125,10 +118,12 @@ static inline int restore_general_regs(struct pt_regs *regs,
        for (i = 0; i <= PT_RESULT; i++) {
                if ((i == PT_MSR) || (i == PT_SOFTE))
                        continue;
-               if (__get_user(gregs[i], &sr->mc_gregs[i]))
-                       return -EFAULT;
+               unsafe_get_user(gregs[i], &sr->mc_gregs[i], failed);
        }
        return 0;
+
+failed:
+       return 1;
 }
 
 #else /* CONFIG_PPC64 */
@@ -142,18 +137,14 @@ static inline int restore_general_regs(struct pt_regs *regs,
        unsafe_copy_to_user(__us, __s, sizeof(*__us), label);           \
 } while (0)
 
-static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset)
-{
-       return copy_from_user(set, uset, sizeof(*uset));
-}
+#define unsafe_get_sigset_t    unsafe_get_user_sigset
 
 #define to_user_ptr(p)         ((unsigned long)(p))
 #define from_user_ptr(p)       ((void __user *)(p))
 
 static __always_inline int
-save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame)
+__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame)
 {
-       WARN_ON(!FULL_REGS(regs));
        unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed);
        return 0;
 
@@ -161,23 +152,30 @@ failed:
        return 1;
 }
 
-static inline int restore_general_regs(struct pt_regs *regs,
-               struct mcontext __user *sr)
+static __always_inline
+int __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr)
 {
        /* copy up to but not including MSR */
-       if (__copy_from_user(regs, &sr->mc_gregs,
-                               PT_MSR * sizeof(elf_greg_t)))
-               return -EFAULT;
+       unsafe_copy_from_user(regs, &sr->mc_gregs, PT_MSR * sizeof(elf_greg_t), failed);
+
        /* copy from orig_r3 (the word after the MSR) up to the end */
-       if (__copy_from_user(&regs->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3],
-                               GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t)))
-               return -EFAULT;
+       unsafe_copy_from_user(&regs->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3],
+                             GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t), failed);
+
        return 0;
+
+failed:
+       return 1;
 }
 #endif
 
 #define unsafe_save_general_regs(regs, frame, label) do {      \
-       if (save_general_regs_unsafe(regs, frame))      \
+       if (__unsafe_save_general_regs(regs, frame))            \
+               goto label;                                     \
+} while (0)
+
+#define unsafe_restore_general_regs(regs, frame, label) do {   \
+       if (__unsafe_restore_general_regs(regs, frame))         \
                goto label;                                     \
 } while (0)
 
@@ -260,8 +258,8 @@ static void prepare_save_user_regs(int ctx_has_vsx_region)
 #endif
 }
 
-static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
-                                struct mcontext __user *tm_frame, int ctx_has_vsx_region)
+static int __unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
+                                  struct mcontext __user *tm_frame, int ctx_has_vsx_region)
 {
        unsigned long msr = regs->msr;
 
@@ -338,7 +336,7 @@ failed:
 }
 
 #define unsafe_save_user_regs(regs, frame, tm_frame, has_vsx, label) do { \
-       if (save_user_regs_unsafe(regs, frame, tm_frame, has_vsx))      \
+       if (__unsafe_save_user_regs(regs, frame, tm_frame, has_vsx))    \
                goto label;                                             \
 } while (0)
 
@@ -350,7 +348,7 @@ failed:
  * We also save the transactional registers to a second ucontext in the
  * frame.
  *
- * See save_user_regs_unsafe() and signal_64.c:setup_tm_sigcontexts().
+ * See __unsafe_save_user_regs() and signal_64.c:setup_tm_sigcontexts().
  */
 static void prepare_save_tm_user_regs(void)
 {
@@ -441,7 +439,7 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user
 #endif /* CONFIG_VSX */
 #ifdef CONFIG_SPE
        /* SPE regs are not checkpointed with TM, so this section is
-        * simply the same as in save_user_regs_unsafe().
+        * simply the same as in __unsafe_save_user_regs().
         */
        if (current->thread.used_spe) {
                unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr,
@@ -485,26 +483,25 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user
 static long restore_user_regs(struct pt_regs *regs,
                              struct mcontext __user *sr, int sig)
 {
-       long err;
        unsigned int save_r2 = 0;
        unsigned long msr;
 #ifdef CONFIG_VSX
        int i;
 #endif
 
+       if (!user_read_access_begin(sr, sizeof(*sr)))
+               return 1;
        /*
         * restore general registers but not including MSR or SOFTE. Also
         * take care of keeping r2 (TLS) intact if not a signal
         */
        if (!sig)
                save_r2 = (unsigned int)regs->gpr[2];
-       err = restore_general_regs(regs, sr);
+       unsafe_restore_general_regs(regs, sr, failed);
        set_trap_norestart(regs);
-       err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
+       unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
        if (!sig)
                regs->gpr[2] = (unsigned long) save_r2;
-       if (err)
-               return 1;
 
        /* if doing signal return, restore the previous little-endian mode */
        if (sig)
@@ -518,22 +515,19 @@ static long restore_user_regs(struct pt_regs *regs,
        regs->msr &= ~MSR_VEC;
        if (msr & MSR_VEC) {
                /* restore altivec registers from the stack */
-               if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
-                                    sizeof(sr->mc_vregs)))
-                       return 1;
+               unsafe_copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
+                                     sizeof(sr->mc_vregs), failed);
                current->thread.used_vr = true;
        } else if (current->thread.used_vr)
                memset(&current->thread.vr_state, 0,
                       ELF_NVRREG * sizeof(vector128));
 
        /* Always get VRSAVE back */
-       if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32]))
-               return 1;
+       unsafe_get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32], failed);
        if (cpu_has_feature(CPU_FTR_ALTIVEC))
                mtspr(SPRN_VRSAVE, current->thread.vrsave);
 #endif /* CONFIG_ALTIVEC */
-       if (copy_fpr_from_user(current, &sr->mc_fregs))
-               return 1;
+       unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
 
 #ifdef CONFIG_VSX
        /*
@@ -546,8 +540,7 @@ static long restore_user_regs(struct pt_regs *regs,
                 * Restore altivec registers from the stack to a local
                 * buffer, then write this out to the thread_struct
                 */
-               if (copy_vsx_from_user(current, &sr->mc_vsregs))
-                       return 1;
+               unsafe_copy_vsx_from_user(current, &sr->mc_vsregs, failed);
                current->thread.used_vsr = true;
        } else if (current->thread.used_vsr)
                for (i = 0; i < 32 ; i++)
@@ -565,19 +558,22 @@ static long restore_user_regs(struct pt_regs *regs,
        regs->msr &= ~MSR_SPE;
        if (msr & MSR_SPE) {
                /* restore spe registers from the stack */
-               if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
-                                    ELF_NEVRREG * sizeof(u32)))
-                       return 1;
+               unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
+                                     ELF_NEVRREG * sizeof(u32), failed);
                current->thread.used_spe = true;
        } else if (current->thread.used_spe)
                memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
 
        /* Always get SPEFSCR back */
-       if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG))
-               return 1;
+       unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
 #endif /* CONFIG_SPE */
 
+       user_read_access_end();
        return 0;
+
+failed:
+       user_read_access_end();
+       return 1;
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -590,7 +586,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
                                 struct mcontext __user *sr,
                                 struct mcontext __user *tm_sr)
 {
-       long err;
        unsigned long msr, msr_hi;
 #ifdef CONFIG_VSX
        int i;
@@ -605,15 +600,13 @@ static long restore_tm_user_regs(struct pt_regs *regs,
         * TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR
         * were set by the signal delivery.
         */
-       err = restore_general_regs(regs, tm_sr);
-       err |= restore_general_regs(&current->thread.ckpt_regs, sr);
-
-       err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]);
-
-       err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
-       if (err)
+       if (!user_read_access_begin(sr, sizeof(*sr)))
                return 1;
 
+       unsafe_restore_general_regs(&current->thread.ckpt_regs, sr, failed);
+       unsafe_get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP], failed);
+       unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
+
        /* Restore the previous little-endian mode */
        regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
 
@@ -621,12 +614,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
        regs->msr &= ~MSR_VEC;
        if (msr & MSR_VEC) {
                /* restore altivec registers from the stack */
-               if (__copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
-                                    sizeof(sr->mc_vregs)) ||
-                   __copy_from_user(&current->thread.vr_state,
-                                    &tm_sr->mc_vregs,
-                                    sizeof(sr->mc_vregs)))
-                       return 1;
+               unsafe_copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
+                                     sizeof(sr->mc_vregs), failed);
                current->thread.used_vr = true;
        } else if (current->thread.used_vr) {
                memset(&current->thread.vr_state, 0,
@@ -636,20 +625,15 @@ static long restore_tm_user_regs(struct pt_regs *regs,
        }
 
        /* Always get VRSAVE back */
-       if (__get_user(current->thread.ckvrsave,
-                      (u32 __user *)&sr->mc_vregs[32]) ||
-           __get_user(current->thread.vrsave,
-                      (u32 __user *)&tm_sr->mc_vregs[32]))
-               return 1;
+       unsafe_get_user(current->thread.ckvrsave,
+                       (u32 __user *)&sr->mc_vregs[32], failed);
        if (cpu_has_feature(CPU_FTR_ALTIVEC))
                mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
 #endif /* CONFIG_ALTIVEC */
 
        regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
 
-       if (copy_fpr_from_user(current, &sr->mc_fregs) ||
-           copy_ckfpr_from_user(current, &tm_sr->mc_fregs))
-               return 1;
+       unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
 
 #ifdef CONFIG_VSX
        regs->msr &= ~MSR_VSX;
@@ -658,9 +642,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
                 * Restore altivec registers from the stack to a local
                 * buffer, then write this out to the thread_struct
                 */
-               if (copy_vsx_from_user(current, &tm_sr->mc_vsregs) ||
-                   copy_ckvsx_from_user(current, &sr->mc_vsregs))
-                       return 1;
+               unsafe_copy_ckvsx_from_user(current, &sr->mc_vsregs, failed);
                current->thread.used_vsr = true;
        } else if (current->thread.used_vsr)
                for (i = 0; i < 32 ; i++) {
@@ -675,23 +657,54 @@ static long restore_tm_user_regs(struct pt_regs *regs,
         */
        regs->msr &= ~MSR_SPE;
        if (msr & MSR_SPE) {
-               if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
-                                    ELF_NEVRREG * sizeof(u32)))
-                       return 1;
+               unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
+                                     ELF_NEVRREG * sizeof(u32), failed);
                current->thread.used_spe = true;
        } else if (current->thread.used_spe)
                memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
 
        /* Always get SPEFSCR back */
-       if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs
-                      + ELF_NEVRREG))
-               return 1;
+       unsafe_get_user(current->thread.spefscr,
+                       (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
 #endif /* CONFIG_SPE */
 
-       /* Get the top half of the MSR from the user context */
-       if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
+       user_read_access_end();
+
+       if (!user_read_access_begin(tm_sr, sizeof(*tm_sr)))
                return 1;
+
+       unsafe_restore_general_regs(regs, tm_sr, failed);
+
+#ifdef CONFIG_ALTIVEC
+       /* restore altivec registers from the stack */
+       if (msr & MSR_VEC)
+               unsafe_copy_from_user(&current->thread.vr_state, &tm_sr->mc_vregs,
+                                     sizeof(sr->mc_vregs), failed);
+
+       /* Always get VRSAVE back */
+       unsafe_get_user(current->thread.vrsave,
+                       (u32 __user *)&tm_sr->mc_vregs[32], failed);
+#endif /* CONFIG_ALTIVEC */
+
+       unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed);
+
+#ifdef CONFIG_VSX
+       if (msr & MSR_VSX) {
+               /*
+                * Restore altivec registers from the stack to a local
+                * buffer, then write this out to the thread_struct
+                */
+               unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed);
+               current->thread.used_vsr = true;
+       }
+#endif /* CONFIG_VSX */
+
+       /* Get the top half of the MSR from the user context */
+       unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed);
        msr_hi <<= 32;
+
+       user_read_access_end();
+
        /* If TM bits are set to the reserved value, it's an invalid context */
        if (MSR_TM_RESV(msr_hi))
                return 1;
@@ -739,6 +752,16 @@ static long restore_tm_user_regs(struct pt_regs *regs,
        preempt_enable();
 
        return 0;
+
+failed:
+       user_read_access_end();
+       return 1;
+}
+#else
+static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr,
+                                struct mcontext __user *tm_sr)
+{
+       return 0;
 }
 #endif
 
@@ -944,28 +967,31 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int
        sigset_t set;
        struct mcontext __user *mcp;
 
-       if (get_sigset_t(&set, &ucp->uc_sigmask))
+       if (!user_read_access_begin(ucp, sizeof(*ucp)))
                return -EFAULT;
+
+       unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
 #ifdef CONFIG_PPC64
        {
                u32 cmcp;
 
-               if (__get_user(cmcp, &ucp->uc_regs))
-                       return -EFAULT;
+               unsafe_get_user(cmcp, &ucp->uc_regs, failed);
                mcp = (struct mcontext __user *)(u64)cmcp;
-               /* no need to check access_ok(mcp), since mcp < 4GB */
        }
 #else
-       if (__get_user(mcp, &ucp->uc_regs))
-               return -EFAULT;
-       if (!access_ok(mcp, sizeof(*mcp)))
-               return -EFAULT;
+       unsafe_get_user(mcp, &ucp->uc_regs, failed);
 #endif
+       user_read_access_end();
+
        set_current_blocked(&set);
        if (restore_user_regs(regs, mcp, sig))
                return -EFAULT;
 
        return 0;
+
+failed:
+       user_read_access_end();
+       return -EFAULT;
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -979,11 +1005,15 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
        u32 cmcp;
        u32 tm_cmcp;
 
-       if (get_sigset_t(&set, &ucp->uc_sigmask))
+       if (!user_read_access_begin(ucp, sizeof(*ucp)))
                return -EFAULT;
 
-       if (__get_user(cmcp, &ucp->uc_regs) ||
-           __get_user(tm_cmcp, &tm_ucp->uc_regs))
+       unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
+       unsafe_get_user(cmcp, &ucp->uc_regs, failed);
+
+       user_read_access_end();
+
+       if (__get_user(tm_cmcp, &tm_ucp->uc_regs))
                return -EFAULT;
        mcp = (struct mcontext __user *)(u64)cmcp;
        tm_mcp = (struct mcontext __user *)(u64)tm_cmcp;
@@ -994,6 +1024,10 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
                return -EFAULT;
 
        return 0;
+
+failed:
+       user_read_access_end();
+       return -EFAULT;
 }
 #endif
 
@@ -1311,19 +1345,16 @@ SYSCALL_DEFINE0(sigreturn)
        struct sigcontext __user *sc;
        struct sigcontext sigctx;
        struct mcontext __user *sr;
-       void __user *addr;
        sigset_t set;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-       struct mcontext __user *mcp, *tm_mcp;
-       unsigned long msr_hi;
-#endif
+       struct mcontext __user *mcp;
+       struct mcontext __user *tm_mcp = NULL;
+       unsigned long long msr_hi = 0;
 
        /* Always make any pending restarted system calls return -EINTR */
        current->restart_block.fn = do_no_restart_syscall;
 
        sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
        sc = &sf->sctx;
-       addr = sc;
        if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
                goto badframe;
 
@@ -1339,31 +1370,32 @@ SYSCALL_DEFINE0(sigreturn)
 #endif
        set_current_blocked(&set);
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        mcp = (struct mcontext __user *)&sf->mctx;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
        if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
                goto badframe;
+#endif
        if (MSR_TM_ACTIVE(msr_hi<<32)) {
                if (!cpu_has_feature(CPU_FTR_TM))
                        goto badframe;
                if (restore_tm_user_regs(regs, mcp, tm_mcp))
                        goto badframe;
-       } else
-#endif
-       {
+       } else {
                sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
-               addr = sr;
-               if (!access_ok(sr, sizeof(*sr))
-                   || restore_user_regs(regs, sr, 1))
-                       goto badframe;
+               if (restore_user_regs(regs, sr, 1)) {
+                       signal_fault(current, regs, "sys_sigreturn", sr);
+
+                       force_sig(SIGSEGV);
+                       return 0;
+               }
        }
 
        set_thread_flag(TIF_RESTOREALL);
        return 0;
 
 badframe:
-       signal_fault(current, regs, "sys_sigreturn", addr);
+       signal_fault(current, regs, "sys_sigreturn", sc);
 
        force_sig(SIGSEGV);
        return 0;
index f9e4a1a..dca6648 100644 (file)
@@ -79,13 +79,36 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
 }
 #endif
 
+static void prepare_setup_sigcontext(struct task_struct *tsk)
+{
+#ifdef CONFIG_ALTIVEC
+       /* save altivec registers */
+       if (tsk->thread.used_vr)
+               flush_altivec_to_thread(tsk);
+       if (cpu_has_feature(CPU_FTR_ALTIVEC))
+               tsk->thread.vrsave = mfspr(SPRN_VRSAVE);
+#endif /* CONFIG_ALTIVEC */
+
+       flush_fp_to_thread(tsk);
+
+#ifdef CONFIG_VSX
+       if (tsk->thread.used_vsr)
+               flush_vsx_to_thread(tsk);
+#endif /* CONFIG_VSX */
+}
+
 /*
  * Set up the sigcontext for the signal frame.
  */
 
-static long setup_sigcontext(struct sigcontext __user *sc,
-               struct task_struct *tsk, int signr, sigset_t *set,
-               unsigned long handler, int ctx_has_vsx_region)
+#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region, label)\
+do {                                                                                   \
+       if (__unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region))\
+               goto label;                                                             \
+} while (0)
+static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc,
+                                       struct task_struct *tsk, int signr, sigset_t *set,
+                                       unsigned long handler, int ctx_has_vsx_region)
 {
        /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
         * process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -97,25 +120,22 @@ static long setup_sigcontext(struct sigcontext __user *sc,
         */
 #ifdef CONFIG_ALTIVEC
        elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
-       unsigned long vrsave;
 #endif
        struct pt_regs *regs = tsk->thread.regs;
        unsigned long msr = regs->msr;
-       long err = 0;
        /* Force usr to alway see softe as 1 (interrupts enabled) */
        unsigned long softe = 0x1;
 
        BUG_ON(tsk != current);
 
 #ifdef CONFIG_ALTIVEC
-       err |= __put_user(v_regs, &sc->v_regs);
+       unsafe_put_user(v_regs, &sc->v_regs, efault_out);
 
        /* save altivec registers */
        if (tsk->thread.used_vr) {
-               flush_altivec_to_thread(tsk);
                /* Copy 33 vec registers (vr0..31 and vscr) to the stack */
-               err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
-                                     33 * sizeof(vector128));
+               unsafe_copy_to_user(v_regs, &tsk->thread.vr_state,
+                                   33 * sizeof(vector128), efault_out);
                /* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
                 * contains valid data.
                 */
@@ -124,19 +144,12 @@ static long setup_sigcontext(struct sigcontext __user *sc,
        /* We always copy to/from vrsave, it's 0 if we don't have or don't
         * use altivec.
         */
-       vrsave = 0;
-       if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
-               vrsave = mfspr(SPRN_VRSAVE);
-               tsk->thread.vrsave = vrsave;
-       }
-
-       err |= __put_user(vrsave, (u32 __user *)&v_regs[33]);
+       unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
 #else /* CONFIG_ALTIVEC */
-       err |= __put_user(0, &sc->v_regs);
+       unsafe_put_user(0, &sc->v_regs, efault_out);
 #endif /* CONFIG_ALTIVEC */
-       flush_fp_to_thread(tsk);
        /* copy fpr regs and fpscr */
-       err |= copy_fpr_to_user(&sc->fp_regs, tsk);
+       unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out);
 
        /*
         * Clear the MSR VSX bit to indicate there is no valid state attached
@@ -150,26 +163,27 @@ static long setup_sigcontext(struct sigcontext __user *sc,
         * VMX data.
         */
        if (tsk->thread.used_vsr && ctx_has_vsx_region) {
-               flush_vsx_to_thread(tsk);
                v_regs += ELF_NVRREG;
-               err |= copy_vsx_to_user(v_regs, tsk);
+               unsafe_copy_vsx_to_user(v_regs, tsk, efault_out);
                /* set MSR_VSX in the MSR value in the frame to
                 * indicate that sc->vs_reg) contains valid data.
                 */
                msr |= MSR_VSX;
        }
 #endif /* CONFIG_VSX */
-       err |= __put_user(&sc->gp_regs, &sc->regs);
-       WARN_ON(!FULL_REGS(regs));
-       err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
-       err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
-       err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]);
-       err |= __put_user(signr, &sc->signal);
-       err |= __put_user(handler, &sc->handler);
+       unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out);
+       unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out);
+       unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out);
+       unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out);
+       unsafe_put_user(signr, &sc->signal, efault_out);
+       unsafe_put_user(handler, &sc->handler, efault_out);
        if (set != NULL)
-               err |=  __put_user(set->sig[0], &sc->oldmask);
+               unsafe_put_user(set->sig[0], &sc->oldmask, efault_out);
 
-       return err;
+       return 0;
+
+efault_out:
+       return -EFAULT;
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -294,7 +308,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 
        err |= __put_user(&sc->gp_regs, &sc->regs);
        err |= __put_user(&tm_sc->gp_regs, &tm_sc->regs);
-       WARN_ON(!FULL_REGS(regs));
        err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE);
        err |= __copy_to_user(&sc->gp_regs,
                              &tsk->thread.ckpt_regs, GP_REGS_SIZE);
@@ -312,14 +325,16 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 /*
  * Restore the sigcontext from the signal frame.
  */
-
-static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
-                             struct sigcontext __user *sc)
+#define unsafe_restore_sigcontext(tsk, set, sig, sc, label) do {       \
+       if (__unsafe_restore_sigcontext(tsk, set, sig, sc))             \
+               goto label;                                             \
+} while (0)
+static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_t *set,
+                                               int sig, struct sigcontext __user *sc)
 {
 #ifdef CONFIG_ALTIVEC
        elf_vrreg_t __user *v_regs;
 #endif
-       unsigned long err = 0;
        unsigned long save_r13 = 0;
        unsigned long msr;
        struct pt_regs *regs = tsk->thread.regs;
@@ -334,27 +349,27 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
                save_r13 = regs->gpr[13];
 
        /* copy the GPRs */
-       err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr));
-       err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]);
+       unsafe_copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr), efault_out);
+       unsafe_get_user(regs->nip, &sc->gp_regs[PT_NIP], efault_out);
        /* get MSR separately, transfer the LE bit if doing signal return */
-       err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+       unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out);
        if (sig)
                regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
-       err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]);
-       err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]);
-       err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]);
-       err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]);
-       err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]);
+       unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out);
+       unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out);
+       unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out);
+       unsafe_get_user(regs->xer, &sc->gp_regs[PT_XER], efault_out);
+       unsafe_get_user(regs->ccr, &sc->gp_regs[PT_CCR], efault_out);
        /* Don't allow userspace to set SOFTE */
        set_trap_norestart(regs);
-       err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
-       err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
-       err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
+       unsafe_get_user(regs->dar, &sc->gp_regs[PT_DAR], efault_out);
+       unsafe_get_user(regs->dsisr, &sc->gp_regs[PT_DSISR], efault_out);
+       unsafe_get_user(regs->result, &sc->gp_regs[PT_RESULT], efault_out);
 
        if (!sig)
                regs->gpr[13] = save_r13;
        if (set != NULL)
-               err |=  __get_user(set->sig[0], &sc->oldmask);
+               unsafe_get_user(set->sig[0], &sc->oldmask, efault_out);
 
        /*
         * Force reload of FP/VEC.
@@ -364,29 +379,27 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
        regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
 
 #ifdef CONFIG_ALTIVEC
-       err |= __get_user(v_regs, &sc->v_regs);
-       if (err)
-               return err;
+       unsafe_get_user(v_regs, &sc->v_regs, efault_out);
        if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
                return -EFAULT;
        /* Copy 33 vec registers (vr0..31 and vscr) from the stack */
        if (v_regs != NULL && (msr & MSR_VEC) != 0) {
-               err |= __copy_from_user(&tsk->thread.vr_state, v_regs,
-                                       33 * sizeof(vector128));
+               unsafe_copy_from_user(&tsk->thread.vr_state, v_regs,
+                                     33 * sizeof(vector128), efault_out);
                tsk->thread.used_vr = true;
        } else if (tsk->thread.used_vr) {
                memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
        }
        /* Always get VRSAVE back */
        if (v_regs != NULL)
-               err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
+               unsafe_get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
        else
                tsk->thread.vrsave = 0;
        if (cpu_has_feature(CPU_FTR_ALTIVEC))
                mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
 #endif /* CONFIG_ALTIVEC */
        /* restore floating point */
-       err |= copy_fpr_from_user(tsk, &sc->fp_regs);
+       unsafe_copy_fpr_from_user(tsk, &sc->fp_regs, efault_out);
 #ifdef CONFIG_VSX
        /*
         * Get additional VSX data. Update v_regs to point after the
@@ -395,14 +408,17 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
         */
        v_regs += ELF_NVRREG;
        if ((msr & MSR_VSX) != 0) {
-               err |= copy_vsx_from_user(tsk, v_regs);
+               unsafe_copy_vsx_from_user(tsk, v_regs, efault_out);
                tsk->thread.used_vsr = true;
        } else {
                for (i = 0; i < 32 ; i++)
                        tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
        }
 #endif
-       return err;
+       return 0;
+
+efault_out:
+       return -EFAULT;
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -586,6 +602,12 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
 
        return err;
 }
+#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
+static long restore_tm_sigcontexts(struct task_struct *tsk, struct sigcontext __user *sc,
+                                  struct sigcontext __user *tm_sc)
+{
+       return -EINVAL;
+}
 #endif
 
 /*
@@ -655,12 +677,16 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
                ctx_has_vsx_region = 1;
 
        if (old_ctx != NULL) {
-               if (!access_ok(old_ctx, ctx_size)
-                   || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
-                                       ctx_has_vsx_region)
-                   || __copy_to_user(&old_ctx->uc_sigmask,
-                                     &current->blocked, sizeof(sigset_t)))
+               prepare_setup_sigcontext(current);
+               if (!user_write_access_begin(old_ctx, ctx_size))
                        return -EFAULT;
+
+               unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
+                                       0, ctx_has_vsx_region, efault_out);
+               unsafe_copy_to_user(&old_ctx->uc_sigmask, &current->blocked,
+                                   sizeof(sigset_t), efault_out);
+
+               user_write_access_end();
        }
        if (new_ctx == NULL)
                return 0;
@@ -680,15 +706,25 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
         * We kill the task with a SIGSEGV in this situation.
         */
 
-       if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
+       if (__get_user_sigset(&set, &new_ctx->uc_sigmask))
                do_exit(SIGSEGV);
        set_current_blocked(&set);
-       if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext))
+
+       if (!user_read_access_begin(new_ctx, ctx_size))
+               return -EFAULT;
+       if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
+               user_read_access_end();
                do_exit(SIGSEGV);
+       }
+       user_read_access_end();
 
        /* This returns like rt_sigreturn */
        set_thread_flag(TIF_RESTOREALL);
        return 0;
+
+efault_out:
+       user_write_access_end();
+       return -EFAULT;
 }
 
 
@@ -701,9 +737,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
        struct pt_regs *regs = current_pt_regs();
        struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
        sigset_t set;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        unsigned long msr;
-#endif
 
        /* Always make any pending restarted system calls return -EINTR */
        current->restart_block.fn = do_no_restart_syscall;
@@ -711,52 +745,54 @@ SYSCALL_DEFINE0(rt_sigreturn)
        if (!access_ok(uc, sizeof(*uc)))
                goto badframe;
 
-       if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
+       if (__get_user_sigset(&set, &uc->uc_sigmask))
                goto badframe;
        set_current_blocked(&set);
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-       /*
-        * If there is a transactional state then throw it away.
-        * The purpose of a sigreturn is to destroy all traces of the
-        * signal frame, this includes any transactional state created
-        * within in. We only check for suspended as we can never be
-        * active in the kernel, we are active, there is nothing better to
-        * do than go ahead and Bad Thing later.
-        * The cause is not important as there will never be a
-        * recheckpoint so it's not user visible.
-        */
-       if (MSR_TM_SUSPENDED(mfmsr()))
-               tm_reclaim_current(0);
+       if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM)) {
+               /*
+                * If there is a transactional state then throw it away.
+                * The purpose of a sigreturn is to destroy all traces of the
+                * signal frame, this includes any transactional state created
+                * within in. We only check for suspended as we can never be
+                * active in the kernel, we are active, there is nothing better to
+                * do than go ahead and Bad Thing later.
+                * The cause is not important as there will never be a
+                * recheckpoint so it's not user visible.
+                */
+               if (MSR_TM_SUSPENDED(mfmsr()))
+                       tm_reclaim_current(0);
 
-       /*
-        * Disable MSR[TS] bit also, so, if there is an exception in the
-        * code below (as a page fault in copy_ckvsx_to_user()), it does
-        * not recheckpoint this task if there was a context switch inside
-        * the exception.
-        *
-        * A major page fault can indirectly call schedule(). A reschedule
-        * process in the middle of an exception can have a side effect
-        * (Changing the CPU MSR[TS] state), since schedule() is called
-        * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
-        * (switch_to() calls tm_recheckpoint() for the 'new' process). In
-        * this case, the process continues to be the same in the CPU, but
-        * the CPU state just changed.
-        *
-        * This can cause a TM Bad Thing, since the MSR in the stack will
-        * have the MSR[TS]=0, and this is what will be used to RFID.
-        *
-        * Clearing MSR[TS] state here will avoid a recheckpoint if there
-        * is any process reschedule in kernel space. The MSR[TS] state
-        * does not need to be saved also, since it will be replaced with
-        * the MSR[TS] that came from user context later, at
-        * restore_tm_sigcontexts.
-        */
-       regs->msr &= ~MSR_TS_MASK;
+               /*
+                * Disable MSR[TS] bit also, so, if there is an exception in the
+                * code below (as a page fault in copy_ckvsx_to_user()), it does
+                * not recheckpoint this task if there was a context switch inside
+                * the exception.
+                *
+                * A major page fault can indirectly call schedule(). A reschedule
+                * process in the middle of an exception can have a side effect
+                * (Changing the CPU MSR[TS] state), since schedule() is called
+                * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
+                * (switch_to() calls tm_recheckpoint() for the 'new' process). In
+                * this case, the process continues to be the same in the CPU, but
+                * the CPU state just changed.
+                *
+                * This can cause a TM Bad Thing, since the MSR in the stack will
+                * have the MSR[TS]=0, and this is what will be used to RFID.
+                *
+                * Clearing MSR[TS] state here will avoid a recheckpoint if there
+                * is any process reschedule in kernel space. The MSR[TS] state
+                * does not need to be saved also, since it will be replaced with
+                * the MSR[TS] that came from user context later, at
+                * restore_tm_sigcontexts.
+                */
+               regs->msr &= ~MSR_TS_MASK;
 
-       if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
-               goto badframe;
-       if (MSR_TM_ACTIVE(msr)) {
+               if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
+                       goto badframe;
+       }
+
+       if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && MSR_TM_ACTIVE(msr)) {
                /* We recheckpoint on return. */
                struct ucontext __user *uc_transact;
 
@@ -769,9 +805,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
                if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
                                           &uc_transact->uc_mcontext))
                        goto badframe;
-       } else
-#endif
-       {
+       } else {
                /*
                 * Fall through, for non-TM restore
                 *
@@ -785,8 +819,13 @@ SYSCALL_DEFINE0(rt_sigreturn)
                 * causing a TM bad thing.
                 */
                current->thread.regs->msr &= ~MSR_TS_MASK;
-               if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext))
+               if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext)))
                        goto badframe;
+
+               unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext,
+                                         badframe_block);
+
+               user_read_access_end();
        }
 
        if (restore_altstack(&uc->uc_stack))
@@ -795,6 +834,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
        set_thread_flag(TIF_RESTOREALL);
        return 0;
 
+badframe_block:
+       user_read_access_end();
 badframe:
        signal_fault(current, regs, "rt_sigreturn", uc);
 
@@ -809,46 +850,57 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
        unsigned long newsp = 0;
        long err = 0;
        struct pt_regs *regs = tsk->thread.regs;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        /* Save the thread's msr before get_tm_stackpointer() changes it */
        unsigned long msr = regs->msr;
-#endif
 
        frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
-       if (!access_ok(frame, sizeof(*frame)))
-               goto badframe;
 
-       err |= __put_user(&frame->info, &frame->pinfo);
-       err |= __put_user(&frame->uc, &frame->puc);
-       err |= copy_siginfo_to_user(&frame->info, &ksig->info);
-       if (err)
+       /*
+        * This only applies when calling unsafe_setup_sigcontext() and must be
+        * called before opening the uaccess window.
+        */
+       if (!MSR_TM_ACTIVE(msr))
+               prepare_setup_sigcontext(tsk);
+
+       if (!user_write_access_begin(frame, sizeof(*frame)))
                goto badframe;
 
+       unsafe_put_user(&frame->info, &frame->pinfo, badframe_block);
+       unsafe_put_user(&frame->uc, &frame->puc, badframe_block);
+
        /* Create the ucontext.  */
-       err |= __put_user(0, &frame->uc.uc_flags);
-       err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       unsafe_put_user(0, &frame->uc.uc_flags, badframe_block);
+       unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block);
+
        if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
                /* The ucontext_t passed to userland points to the second
                 * ucontext_t (for transactional state) with its uc_link ptr.
                 */
-               err |= __put_user(&frame->uc_transact, &frame->uc.uc_link);
+               unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
+
+               user_write_access_end();
+
                err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
                                            &frame->uc_transact.uc_mcontext,
                                            tsk, ksig->sig, NULL,
                                            (unsigned long)ksig->ka.sa.sa_handler,
                                            msr);
-       } else
+
+               if (!user_write_access_begin(&frame->uc.uc_sigmask,
+                                            sizeof(frame->uc.uc_sigmask)))
+                       goto badframe;
+
 #endif
-       {
-               err |= __put_user(0, &frame->uc.uc_link);
-               err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
+       } else {
+               unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
+               unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
                                        NULL, (unsigned long)ksig->ka.sa.sa_handler,
-                                       1);
+                                       1, badframe_block);
        }
-       err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-       if (err)
-               goto badframe;
+
+       unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
+       user_write_access_end();
 
        /* Make sure signal handler doesn't get spurious FP exceptions */
        tsk->thread.fp_state.fpscr = 0;
@@ -863,6 +915,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
                regs->nip = (unsigned long) &frame->tramp[0];
        }
 
+
+       /* Save the siginfo outside of the unsafe block. */
+       if (copy_siginfo_to_user(&frame->info, &ksig->info))
+               goto badframe;
+
        /* Allocate a dummy caller frame for the signal handler. */
        newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
        err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
@@ -902,6 +959,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
        return 0;
 
+badframe_block:
+       user_write_access_end();
 badframe:
        signal_fault(current, regs, "handle_rt_signal64", frame);
 
index 5a4d59a..2e05c78 100644 (file)
@@ -83,7 +83,7 @@ DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
-DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
+static DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
 
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
@@ -122,14 +122,14 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata;
  * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
  * the set its siblings that share the L1-cache.
  */
-DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+static DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
 
 /*
  * On some big-cores system, thread_group_l2_cache_map for each CPU
  * corresponds to the set its siblings within the core that share the
  * L2-cache.
  */
-DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+static DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
 
 /* SMP operations for this machine */
 struct smp_ops_t *smp_ops;
@@ -1057,17 +1057,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
                                local_memory_node(numa_cpu_lookup_table[cpu]));
                }
 #endif
-               /*
-                * cpu_core_map is now more updated and exists only since
-                * its been exported for long. It only will have a snapshot
-                * of cpu_cpu_mask.
-                */
-               cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
        }
 
        /* Init the cpumasks so the boot CPU is related to itself */
        cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
        cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
+       cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
 
        if (has_coregroup_support())
                cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
@@ -1078,6 +1073,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
                                cpu_smallcore_mask(boot_cpuid));
        }
 
+       if (cpu_to_chip_id(boot_cpuid) != -1) {
+               int idx = num_possible_cpus() / threads_per_core;
+
+               /*
+                * All threads of a core will all belong to the same core,
+                * chip_id_lookup_table will have one entry per core.
+                * Assumption: if boot_cpuid doesn't have a chip-id, then no
+                * other CPUs, will also not have chip-id.
+                */
+               chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL);
+               if (chip_id_lookup_table)
+                       memset(chip_id_lookup_table, -1, sizeof(int) * idx);
+       }
+
        if (smp_ops && smp_ops->probe)
                smp_ops->probe();
 }
@@ -1408,6 +1417,9 @@ static void remove_cpu_from_masks(int cpu)
                        set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
        }
 
+       for_each_cpu(i, cpu_core_mask(cpu))
+               set_cpus_unrelated(cpu, i, cpu_core_mask);
+
        if (has_coregroup_support()) {
                for_each_cpu(i, cpu_coregroup_mask(cpu))
                        set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
@@ -1468,8 +1480,11 @@ static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
 
 static void add_cpu_to_masks(int cpu)
 {
+       struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
        int first_thread = cpu_first_thread_sibling(cpu);
        cpumask_var_t mask;
+       int chip_id = -1;
+       bool ret;
        int i;
 
        /*
@@ -1485,12 +1500,39 @@ static void add_cpu_to_masks(int cpu)
        add_cpu_to_smallcore_masks(cpu);
 
        /* In CPU-hotplug path, hence use GFP_ATOMIC */
-       alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
+       ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
        update_mask_by_l2(cpu, &mask);
 
        if (has_coregroup_support())
                update_coregroup_mask(cpu, &mask);
 
+       if (chip_id_lookup_table && ret)
+               chip_id = cpu_to_chip_id(cpu);
+
+       if (chip_id == -1) {
+               cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
+               goto out;
+       }
+
+       if (shared_caches)
+               submask_fn = cpu_l2_cache_mask;
+
+       /* Update core_mask with all the CPUs that are part of submask */
+       or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask);
+
+       /* Skip all CPUs already part of current CPU core mask */
+       cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
+
+       for_each_cpu(i, mask) {
+               if (chip_id == cpu_to_chip_id(i)) {
+                       or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask);
+                       cpumask_andnot(mask, mask, submask_fn(i));
+               } else {
+                       cpumask_andnot(mask, mask, cpu_core_mask(i));
+               }
+       }
+
+out:
        free_cpumask_var(mask);
 }
 
@@ -1521,6 +1563,9 @@ void start_secondary(void *unused)
 
        vdso_getcpu_init();
 #endif
+       set_numa_node(numa_cpu_lookup_table[cpu]);
+       set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+
        /* Update topology CPU masks */
        add_cpu_to_masks(cpu);
 
@@ -1539,9 +1584,6 @@ void start_secondary(void *unused)
                        shared_caches = true;
        }
 
-       set_numa_node(numa_cpu_lookup_table[cpu]);
-       set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
-
        smp_wmb();
        notify_cpu_starting(cpu);
        set_cpu_online(cpu, true);
index b644065..1deb1bf 100644 (file)
 
 #include <asm/paca.h>
 
-/*
- * Save stack-backtrace addresses into a stack_trace buffer.
- */
-static void save_context_stack(struct stack_trace *trace, unsigned long sp,
-                       struct task_struct *tsk, int savesched)
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+                    struct task_struct *task, struct pt_regs *regs)
 {
+       unsigned long sp;
+
+       if (regs && !consume_entry(cookie, regs->nip))
+               return;
+
+       if (regs)
+               sp = regs->gpr[1];
+       else if (task == current)
+               sp = current_stack_frame();
+       else
+               sp = task->thread.ksp;
+
        for (;;) {
                unsigned long *stack = (unsigned long *) sp;
                unsigned long newsp, ip;
 
-               if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
+               if (!validate_sp(sp, task, STACK_FRAME_OVERHEAD))
                        return;
 
                newsp = stack[0];
                ip = stack[STACK_FRAME_LR_SAVE];
 
-               if (savesched || !in_sched_functions(ip)) {
-                       if (!trace->skip)
-                               trace->entries[trace->nr_entries++] = ip;
-                       else
-                               trace->skip--;
-               }
-
-               if (trace->nr_entries >= trace->max_entries)
+               if (!consume_entry(cookie, ip))
                        return;
 
                sp = newsp;
        }
 }
 
-void save_stack_trace(struct stack_trace *trace)
-{
-       unsigned long sp;
-
-       sp = current_stack_frame();
-
-       save_context_stack(trace, sp, current, 1);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-{
-       unsigned long sp;
-
-       if (!try_get_task_stack(tsk))
-               return;
-
-       if (tsk == current)
-               sp = current_stack_frame();
-       else
-               sp = tsk->thread.ksp;
-
-       save_context_stack(trace, sp, tsk, 0);
-
-       put_task_stack(tsk);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-
-void
-save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
-{
-       save_context_stack(trace, regs->gpr[1], current, 0);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_regs);
-
-#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
 /*
  * This function returns an error if it detects any unreliable features of the
  * stack.  Otherwise it guarantees that the stack trace is reliable.
  *
  * If the task is not 'current', the caller *must* ensure the task is inactive.
  */
-static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
-                                          struct stack_trace *trace)
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+                            void *cookie, struct task_struct *task)
 {
        unsigned long sp;
        unsigned long newsp;
-       unsigned long stack_page = (unsigned long)task_stack_page(tsk);
+       unsigned long stack_page = (unsigned long)task_stack_page(task);
        unsigned long stack_end;
        int graph_idx = 0;
        bool firstframe;
 
        stack_end = stack_page + THREAD_SIZE;
-       if (!is_idle_task(tsk)) {
+       if (!is_idle_task(task)) {
                /*
                 * For user tasks, this is the SP value loaded on
                 * kernel entry, see "PACAKSAVE(r13)" in _switch() and
@@ -130,10 +96,10 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
                stack_end -= STACK_FRAME_OVERHEAD;
        }
 
-       if (tsk == current)
+       if (task == current)
                sp = current_stack_frame();
        else
-               sp = tsk->thread.ksp;
+               sp = task->thread.ksp;
 
        if (sp < stack_page + sizeof(struct thread_struct) ||
            sp > stack_end - STACK_FRAME_MIN_SIZE) {
@@ -182,7 +148,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
                 * FIXME: IMHO these tests do not belong in
                 * arch-dependent code, they are generic.
                 */
-               ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
+               ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack);
 #ifdef CONFIG_KPROBES
                /*
                 * Mark stacktraces with kretprobed functions on them
@@ -192,36 +158,12 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
                        return -EINVAL;
 #endif
 
-               if (trace->nr_entries >= trace->max_entries)
-                       return -E2BIG;
-               if (!trace->skip)
-                       trace->entries[trace->nr_entries++] = ip;
-               else
-                       trace->skip--;
+               if (!consume_entry(cookie, ip))
+                       return -EINVAL;
        }
        return 0;
 }
 
-int save_stack_trace_tsk_reliable(struct task_struct *tsk,
-                                 struct stack_trace *trace)
-{
-       int ret;
-
-       /*
-        * If the task doesn't have a stack (e.g., a zombie), the stack is
-        * "reliably" empty.
-        */
-       if (!try_get_task_stack(tsk))
-               return 0;
-
-       ret = __save_stack_trace_tsk_reliable(tsk, trace);
-
-       put_task_stack(tsk);
-
-       return ret;
-}
-#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
-
 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
 static void handle_backtrace_ipi(struct pt_regs *regs)
 {
index 078608e..a552c9e 100644 (file)
@@ -82,16 +82,8 @@ int
 ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
 {
        if ( (unsigned long)n >= 4096 )
-       {
-               unsigned long __user *buffer = (unsigned long __user *)n;
-               if (!access_ok(buffer, 5*sizeof(unsigned long))
-                   || __get_user(n, buffer)
-                   || __get_user(inp, ((fd_set __user * __user *)(buffer+1)))
-                   || __get_user(outp, ((fd_set  __user * __user *)(buffer+2)))
-                   || __get_user(exp, ((fd_set  __user * __user *)(buffer+3)))
-                   || __get_user(tvp, ((struct __kernel_old_timeval  __user * __user *)(buffer+4))))
-                       return -EFAULT;
-       }
+               return sys_old_select((void __user *)n);
+
        return sys_select(n, inp, outp, exp, tvp);
 }
 #endif
index 9e3be29..5476f62 100644 (file)
@@ -6,53 +6,38 @@ _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')     \
          $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
 
 syscall := $(src)/syscall.tbl
-syshdr := $(srctree)/$(src)/syscallhdr.sh
-systbl := $(srctree)/$(src)/syscalltbl.sh
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
 
 quiet_cmd_syshdr = SYSHDR  $@
-      cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@'       \
-                  '$(syshdr_abis_$(basetarget))'               \
-                  '$(syshdr_pfx_$(basetarget))'                \
-                  '$(syshdr_offset_$(basetarget))'
+      cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis $(abis) $< $@
 
 quiet_cmd_systbl = SYSTBL  $@
-      cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@'       \
-                  '$(systbl_abis_$(basetarget))'               \
-                  '$(systbl_abi_$(basetarget))'                \
-                  '$(systbl_offset_$(basetarget))'
+      cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
 
-syshdr_abis_unistd_32 := common,nospu,32
+$(uapi)/unistd_32.h: abis := common,nospu,32
 $(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
-syshdr_abis_unistd_64 := common,nospu,64
+$(uapi)/unistd_64.h: abis := common,nospu,64
 $(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
-systbl_abis_syscall_table_32 := common,nospu,32
-systbl_abi_syscall_table_32 := 32
+$(kapi)/syscall_table_32.h: abis := common,nospu,32
 $(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
-systbl_abis_syscall_table_64 := common,nospu,64
-systbl_abi_syscall_table_64 := 64
+$(kapi)/syscall_table_64.h: abis := common,nospu,64
 $(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
-systbl_abis_syscall_table_c32 := common,nospu,32
-systbl_abi_syscall_table_c32 := c32
-$(kapi)/syscall_table_c32.h: $(syscall) $(systbl) FORCE
-       $(call if_changed,systbl)
-
-systbl_abis_syscall_table_spu := common,spu
-systbl_abi_syscall_table_spu := spu
+$(kapi)/syscall_table_spu.h: abis := common,spu
 $(kapi)/syscall_table_spu.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 uapisyshdr-y           += unistd_32.h unistd_64.h
 kapisyshdr-y           += syscall_table_32.h           \
                           syscall_table_64.h           \
-                          syscall_table_c32.h          \
                           syscall_table_spu.h
 
 uapisyshdr-y   := $(addprefix $(uapi)/, $(uapisyshdr-y))
diff --git a/arch/powerpc/kernel/syscalls/syscallhdr.sh b/arch/powerpc/kernel/syscalls/syscallhdr.sh
deleted file mode 100644 (file)
index 02d6751..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=_UAPI_ASM_POWERPC_`basename "$out" | sed \
-       -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
-       -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
-       printf "#ifndef %s\n" "${fileguard}"
-       printf "#define %s\n" "${fileguard}"
-       printf "\n"
-
-       nxt=0
-       while read nr abi name entry compat ; do
-               if [ -z "$offset" ]; then
-                       printf "#define __NR_%s%s\t%s\n" \
-                               "${prefix}" "${name}" "${nr}"
-               else
-                       printf "#define __NR_%s%s\t(%s + %s)\n" \
-                               "${prefix}" "${name}" "${offset}" "${nr}"
-               fi
-               nxt=$((nr+1))
-       done
-
-       printf "\n"
-       printf "#ifdef __KERNEL__\n"
-       printf "#define __NR_syscalls\t%s\n" "${nxt}"
-       printf "#endif\n"
-       printf "\n"
-       printf "#endif /* %s */\n" "${fileguard}"
-) > "$out"
diff --git a/arch/powerpc/kernel/syscalls/syscalltbl.sh b/arch/powerpc/kernel/syscalls/syscalltbl.sh
deleted file mode 100644 (file)
index f7393a7..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-my_abi="$4"
-offset="$5"
-
-emit() {
-       t_nxt="$1"
-       t_nr="$2"
-       t_entry="$3"
-
-       while [ $t_nxt -lt $t_nr ]; do
-               printf "__SYSCALL(%s,sys_ni_syscall)\n" "${t_nxt}"
-               t_nxt=$((t_nxt+1))
-       done
-       printf "__SYSCALL(%s,%s)\n" "${t_nxt}" "${t_entry}"
-}
-
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
-       nxt=0
-       if [ -z "$offset" ]; then
-               offset=0
-       fi
-
-       while read nr abi name entry compat ; do
-               if [ "$my_abi" = "c32" ] && [ ! -z "$compat" ]; then
-                       emit $((nxt+offset)) $((nr+offset)) $compat
-               else
-                       emit $((nxt+offset)) $((nr+offset)) $entry
-               fi
-               nxt=$((nr+1))
-       done
-) > "$out"
index d34276f..cb33588 100644 (file)
@@ -21,6 +21,7 @@
 #define __SYSCALL(nr, entry)   .long entry
 #endif
 
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)      __SYSCALL(nr, native)
 .globl sys_call_table
 sys_call_table:
 #ifdef CONFIG_PPC64
@@ -30,8 +31,10 @@ sys_call_table:
 #endif
 
 #ifdef CONFIG_COMPAT
+#undef __SYSCALL_WITH_COMPAT
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)      __SYSCALL(nr, compat)
 .globl compat_sys_call_table
 compat_sys_call_table:
 #define compat_sys_sigsuspend  sys_sigsuspend
-#include <asm/syscall_table_c32.h>
+#include <asm/syscall_table_32.h>
 #endif
index 42761eb..ffe9537 100644 (file)
@@ -68,7 +68,7 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
         */
 
        /* read the text we want to modify */
-       if (probe_kernel_read_inst(&replaced, (void *)ip))
+       if (copy_inst_from_kernel_nofault(&replaced, (void *)ip))
                return -EFAULT;
 
        /* Make sure it is what we expect it to be */
@@ -130,7 +130,7 @@ __ftrace_make_nop(struct module *mod,
        struct ppc_inst op, pop;
 
        /* read where this goes */
-       if (probe_kernel_read_inst(&op, (void *)ip)) {
+       if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
                pr_err("Fetching opcode failed.\n");
                return -EFAULT;
        }
@@ -164,7 +164,7 @@ __ftrace_make_nop(struct module *mod,
        /* When using -mkernel_profile there is no load to jump over */
        pop = ppc_inst(PPC_INST_NOP);
 
-       if (probe_kernel_read_inst(&op, (void *)(ip - 4))) {
+       if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) {
                pr_err("Fetching instruction at %lx failed.\n", ip - 4);
                return -EFAULT;
        }
@@ -197,7 +197,7 @@ __ftrace_make_nop(struct module *mod,
         * Check what is in the next instruction. We can see ld r2,40(r1), but
         * on first pass after boot we will see mflr r0.
         */
-       if (probe_kernel_read_inst(&op, (void *)(ip + 4))) {
+       if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) {
                pr_err("Fetching op failed.\n");
                return -EFAULT;
        }
@@ -349,7 +349,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
                        return -1;
 
        /* New trampoline -- read where this goes */
-       if (probe_kernel_read_inst(&op, (void *)tramp)) {
+       if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) {
                pr_debug("Fetching opcode failed.\n");
                return -1;
        }
@@ -399,7 +399,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
        struct ppc_inst op;
 
        /* Read where this goes */
-       if (probe_kernel_read_inst(&op, (void *)ip)) {
+       if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
                pr_err("Fetching opcode failed.\n");
                return -EFAULT;
        }
@@ -526,10 +526,10 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
        struct module *mod = rec->arch.mod;
 
        /* read where this goes */
-       if (probe_kernel_read_inst(op, ip))
+       if (copy_inst_from_kernel_nofault(op, ip))
                return -EFAULT;
 
-       if (probe_kernel_read_inst(op + 1, ip + 4))
+       if (copy_inst_from_kernel_nofault(op + 1, ip + 4))
                return -EFAULT;
 
        if (!expected_nop_sequence(ip, op[0], op[1])) {
@@ -592,7 +592,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
        unsigned long ip = rec->ip;
 
        /* read where this goes */
-       if (probe_kernel_read_inst(&op, (void *)ip))
+       if (copy_inst_from_kernel_nofault(&op, (void *)ip))
                return -EFAULT;
 
        /* It should be pointing to a nop */
@@ -648,7 +648,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
        }
 
        /* Make sure we have a nop */
-       if (probe_kernel_read_inst(&op, ip)) {
+       if (copy_inst_from_kernel_nofault(&op, ip)) {
                pr_err("Unable to read ftrace location %p\n", ip);
                return -EFAULT;
        }
@@ -726,7 +726,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
        }
 
        /* read where this goes */
-       if (probe_kernel_read_inst(&op, (void *)ip)) {
+       if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
                pr_err("Fetching opcode failed.\n");
                return -EFAULT;
        }
index a44a30b..b4ab95c 100644 (file)
@@ -53,7 +53,6 @@
 #ifdef CONFIG_PPC64
 #include <asm/firmware.h>
 #include <asm/processor.h>
-#include <asm/tm.h>
 #endif
 #include <asm/kexec.h>
 #include <asm/ppc-opcode.h>
@@ -222,7 +221,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
        /*
         * system_reset_excption handles debugger, crash dump, panic, for 0x100
         */
-       if (TRAP(regs) == 0x100)
+       if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
                return;
 
        crash_fadump(regs, "die oops");
@@ -290,7 +289,7 @@ void die(const char *str, struct pt_regs *regs, long err)
        /*
         * system_reset_excption handles debugger, crash dump, panic, for 0x100
         */
-       if (TRAP(regs) != 0x100) {
+       if (TRAP(regs) != INTERRUPT_SYSTEM_RESET) {
                if (debugger(regs))
                        return;
        }
@@ -405,7 +404,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
         * Now test if the interrupt has hit a range that may be using
         * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
         * problem ranges all run un-relocated. Test real and virt modes
-        * at the same time by droping the high bit of the nip (virt mode
+        * at the same time by dropping the high bit of the nip (virt mode
         * entry points still have the +0x4000 offset).
         */
        nip &= ~0xc000000000000000ULL;
@@ -864,7 +863,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs)
        unsigned long ea, msr, msr_mask;
        bool swap;
 
-       if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip))
+       if (__get_user(instr, (unsigned int __user *)regs->nip))
                return;
 
        /*
@@ -1079,6 +1078,16 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
        _exception(SIGTRAP, regs, TRAP_UNK, 0);
 }
 
+DEFINE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception)
+{
+       printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+              regs->nip, regs->msr, regs->trap);
+
+       _exception(SIGTRAP, regs, TRAP_UNK, 0);
+
+       return 0;
+}
+
 DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
 {
        if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
@@ -1309,7 +1318,6 @@ static int emulate_instruction(struct pt_regs *regs)
 
        if (!user_mode(regs))
                return -EINVAL;
-       CHECK_FULL_REGS(regs);
 
        if (get_user(instword, (u32 __user *)(regs->nip)))
                return -EFAULT;
@@ -1406,7 +1414,6 @@ int is_valid_bugaddr(unsigned long addr)
 static int emulate_math(struct pt_regs *regs)
 {
        int ret;
-       extern int do_mathemu(struct pt_regs *regs);
 
        ret = do_mathemu(regs);
        if (ret >= 0)
@@ -1606,15 +1613,6 @@ bad:
                bad_page_fault(regs, sig);
 }
 
-DEFINE_INTERRUPT_HANDLER(StackOverflow)
-{
-       pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n",
-               current->comm, task_pid_nr(current), regs->gpr[1]);
-       debugger(regs);
-       show_regs(regs);
-       panic("kernel stack overflow");
-}
-
 DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
 {
        die("Kernel stack overflow", regs, SIGSEGV);
@@ -1693,7 +1691,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
        u8 status;
        bool hv;
 
-       hv = (TRAP(regs) == 0xf80);
+       hv = (TRAP(regs) == INTERRUPT_H_FAC_UNAVAIL);
        if (hv)
                value = mfspr(SPRN_HFSCR);
        else
@@ -2170,11 +2168,14 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
  * in the MSR is 0.  This indicates that SRR0/1 are live, and that
  * we therefore lost state by taking this exception.
  */
-void unrecoverable_exception(struct pt_regs *regs)
+void __noreturn unrecoverable_exception(struct pt_regs *regs)
 {
        pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
                 regs->trap, regs->nip, regs->msr);
        die("Unrecoverable exception", regs, SIGABRT);
+       /* die() should not return */
+       for (;;)
+               ;
 }
 
 #if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
@@ -2189,10 +2190,11 @@ void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
        return;
 }
 
-DEFINE_INTERRUPT_HANDLER(WatchdogException) /* XXX NMI? async? */
+DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException)
 {
        printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
        WatchdogHandler(regs);
+       return 0;
 }
 #endif
 
index e8a6371..186f69b 100644 (file)
@@ -41,6 +41,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
        if (addr & 0x03)
                return -EINVAL;
 
+       if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+           ppc_inst_prefixed(auprobe->insn) &&
+           (addr & 0x3f) == 60) {
+               pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n");
+               return -EINVAL;
+       }
+
        return 0;
 }
 
index e839a90..717f2c9 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/security.h>
 #include <linux/memblock.h>
 #include <linux/syscalls.h>
+#include <linux/time_namespace.h>
 #include <vdso/datapage.h>
 
 #include <asm/syscall.h>
@@ -50,15 +51,21 @@ static union {
 } vdso_data_store __page_aligned_data;
 struct vdso_arch_data *vdso_data = &vdso_data_store.data;
 
+enum vvar_pages {
+       VVAR_DATA_PAGE_OFFSET,
+       VVAR_TIMENS_PAGE_OFFSET,
+       VVAR_NR_PAGES,
+};
+
 static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma,
                       unsigned long text_size)
 {
        unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
 
-       if (new_size != text_size + PAGE_SIZE)
+       if (new_size != text_size)
                return -EINVAL;
 
-       current->mm->context.vdso = (void __user *)new_vma->vm_start + PAGE_SIZE;
+       current->mm->context.vdso = (void __user *)new_vma->vm_start;
 
        return 0;
 }
@@ -73,6 +80,14 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str
        return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
 }
 
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+                            struct vm_area_struct *vma, struct vm_fault *vmf);
+
+static struct vm_special_mapping vvar_spec __ro_after_init = {
+       .name = "[vvar]",
+       .fault = vvar_fault,
+};
+
 static struct vm_special_mapping vdso32_spec __ro_after_init = {
        .name = "[vdso]",
        .mremap = vdso32_mremap,
@@ -83,17 +98,105 @@ static struct vm_special_mapping vdso64_spec __ro_after_init = {
        .mremap = vdso64_mremap,
 };
 
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+       return ((struct vdso_arch_data *)vvar_page)->data;
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+       struct mm_struct *mm = task->mm;
+       struct vm_area_struct *vma;
+
+       mmap_read_lock(mm);
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               unsigned long size = vma->vm_end - vma->vm_start;
+
+               if (vma_is_special_mapping(vma, &vvar_spec))
+                       zap_page_range(vma, vma->vm_start, size);
+       }
+
+       mmap_read_unlock(mm);
+       return 0;
+}
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+       if (likely(vma->vm_mm == current->mm))
+               return current->nsproxy->time_ns->vvar_page;
+
+       /*
+        * VM_PFNMAP | VM_IO protect .fault() handler from being called
+        * through interfaces like /proc/$pid/mem or
+        * process_vm_{readv,writev}() as long as there's no .access()
+        * in special_mapping_vmops.
+        * For more details check_vma_flags() and __access_remote_vm()
+        */
+       WARN(1, "vvar_page accessed remotely");
+
+       return NULL;
+}
+#else
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+       return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+                            struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct page *timens_page = find_timens_vvar_page(vma);
+       unsigned long pfn;
+
+       switch (vmf->pgoff) {
+       case VVAR_DATA_PAGE_OFFSET:
+               if (timens_page)
+                       pfn = page_to_pfn(timens_page);
+               else
+                       pfn = virt_to_pfn(vdso_data);
+               break;
+#ifdef CONFIG_TIME_NS
+       case VVAR_TIMENS_PAGE_OFFSET:
+               /*
+                * If a task belongs to a time namespace then a namespace
+                * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+                * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+                * offset.
+                * See also the comment near timens_setup_vdso_data().
+                */
+               if (!timens_page)
+                       return VM_FAULT_SIGBUS;
+               pfn = virt_to_pfn(vdso_data);
+               break;
+#endif /* CONFIG_TIME_NS */
+       default:
+               return VM_FAULT_SIGBUS;
+       }
+
+       return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
 /*
  * This is called from binfmt_elf, we create the special vma for the
  * vDSO and insert it into the mm struct tree
  */
 static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
-       struct mm_struct *mm = current->mm;
+       unsigned long vdso_size, vdso_base, mappings_size;
        struct vm_special_mapping *vdso_spec;
+       unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE;
+       struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
-       unsigned long vdso_size;
-       unsigned long vdso_base;
 
        if (is_32bit_task()) {
                vdso_spec = &vdso32_spec;
@@ -110,8 +213,8 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
                vdso_base = 0;
        }
 
-       /* Add a page to the vdso size for the data page */
-       vdso_size += PAGE_SIZE;
+       mappings_size = vdso_size + vvar_size;
+       mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK;
 
        /*
         * pick a base address for the vDSO in process space. We try to put it
@@ -119,9 +222,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
         * and end up putting it elsewhere.
         * Add enough to the size so that the result can be aligned.
         */
-       vdso_base = get_unmapped_area(NULL, vdso_base,
-                                     vdso_size + ((VDSO_ALIGNMENT - 1) & PAGE_MASK),
-                                     0, 0);
+       vdso_base = get_unmapped_area(NULL, vdso_base, mappings_size, 0, 0);
        if (IS_ERR_VALUE(vdso_base))
                return vdso_base;
 
@@ -133,7 +234,13 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
         * install_special_mapping or the perf counter mmap tracking code
         * will fail to recognise it as a vDSO.
         */
-       mm->context.vdso = (void __user *)vdso_base + PAGE_SIZE;
+       mm->context.vdso = (void __user *)vdso_base + vvar_size;
+
+       vma = _install_special_mapping(mm, vdso_base, vvar_size,
+                                      VM_READ | VM_MAYREAD | VM_IO |
+                                      VM_DONTDUMP | VM_PFNMAP, &vvar_spec);
+       if (IS_ERR(vma))
+               return PTR_ERR(vma);
 
        /*
         * our vma flags don't have VM_WRITE so by default, the process isn't
@@ -145,9 +252,12 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
         * It's fine to use that for setting breakpoints in the vDSO code
         * pages though.
         */
-       vma = _install_special_mapping(mm, vdso_base, vdso_size,
+       vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size,
                                       VM_READ | VM_EXEC | VM_MAYREAD |
                                       VM_MAYWRITE | VM_MAYEXEC, vdso_spec);
+       if (IS_ERR(vma))
+               do_munmap(mm, vdso_base, vvar_size, NULL);
+
        return PTR_ERR_OR_ZERO(vma);
 }
 
@@ -249,10 +359,8 @@ static struct page ** __init vdso_setup_pages(void *start, void *end)
        if (!pagelist)
                panic("%s: Cannot allocate page list for VDSO", __func__);
 
-       pagelist[0] = virt_to_page(vdso_data);
-
        for (i = 0; i < pages; i++)
-               pagelist[i + 1] = virt_to_page(start + i * PAGE_SIZE);
+               pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
 
        return pagelist;
 }
index a4b806b..58e0099 100644 (file)
@@ -17,7 +17,7 @@ ENTRY(_start)
 
 SECTIONS
 {
-       PROVIDE(_vdso_datapage = . - PAGE_SIZE);
+       PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE);
        . = SIZEOF_HEADERS;
 
        .hash           : { *(.hash) }                  :text
index 2f3c359..0288cad 100644 (file)
@@ -17,7 +17,7 @@ ENTRY(_start)
 
 SECTIONS
 {
-       PROVIDE(_vdso_datapage = . - PAGE_SIZE);
+       PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE);
        . = SIZEOF_HEADERS;
 
        .hash           : { *(.hash) }                  :text
index 801dc28..f5a52f4 100644 (file)
@@ -67,9 +67,7 @@ _GLOBAL(load_up_altivec)
 #ifdef CONFIG_PPC32
        mfspr   r5,SPRN_SPRG_THREAD             /* current task's THREAD (phys) */
        oris    r9,r9,MSR_VEC@h
-#ifdef CONFIG_VMAP_STACK
        tovirt(r5, r5)
-#endif
 #else
        ld      r4,PACACURRENT(r13)
        addi    r5,r4,THREAD            /* Get THREAD */
index c9a8898..0196d0c 100644 (file)
@@ -24,6 +24,7 @@
 #include <asm/smp.h>
 #include <asm/setjmp.h>
 #include <asm/debug.h>
+#include <asm/interrupt.h>
 
 /*
  * The primary CPU waits a while for all secondary CPUs to enter. This is to
@@ -336,7 +337,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
         * If we came in via system reset, wait a while for the secondary
         * CPUs to enter.
         */
-       if (TRAP(regs) == 0x100)
+       if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
                mdelay(PRIMARY_TIMEOUT);
 
        crash_kexec_prepare_cpus(crashing_cpu);
index e452158..c3e31fe 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include <linux/kvm_host.h>
+#include <linux/pkeys.h>
 
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
@@ -133,6 +134,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
        else
                kvmppc_mmu_flush_icache(pfn);
 
+       rflags |= pte_to_hpte_pkey_bits(0, HPTE_USE_KERNEL_KEY);
        rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg;
 
        /*
index 13bad6b..4a53241 100644 (file)
@@ -803,7 +803,10 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
                vcpu->arch.dawrx1 = value2;
                return H_SUCCESS;
        case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
-               /* KVM does not support mflags=2 (AIL=2) */
+               /*
+                * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
+                * Keep this in synch with kvmppc_filter_guest_lpcr_hv.
+                */
                if (mflags != 0 && mflags != 3)
                        return H_UNSUPPORTED_FLAG_START;
                return H_TOO_HARD;
@@ -1635,6 +1638,41 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+/*
+ * Enforce limits on guest LPCR values based on hardware availability,
+ * guest configuration, and possibly hypervisor support and security
+ * concerns.
+ */
+unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
+{
+       /* LPCR_TC only applies to HPT guests */
+       if (kvm_is_radix(kvm))
+               lpcr &= ~LPCR_TC;
+
+       /* On POWER8 and above, userspace can modify AIL */
+       if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+               lpcr &= ~LPCR_AIL;
+       if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
+               lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
+
+       /*
+        * On POWER9, allow userspace to enable large decrementer for the
+        * guest, whether or not the host has it enabled.
+        */
+       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+               lpcr &= ~LPCR_LD;
+
+       return lpcr;
+}
+
+static void verify_lpcr(struct kvm *kvm, unsigned long lpcr)
+{
+       if (lpcr != kvmppc_filter_lpcr_hv(kvm, lpcr)) {
+               WARN_ONCE(1, "lpcr 0x%lx differs from filtered 0x%lx\n",
+                         lpcr, kvmppc_filter_lpcr_hv(kvm, lpcr));
+       }
+}
+
 static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
                bool preserve_top32)
 {
@@ -1643,6 +1681,23 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
        u64 mask;
 
        spin_lock(&vc->lock);
+
+       /*
+        * Userspace can only modify
+        * DPFD (default prefetch depth), ILE (interrupt little-endian),
+        * TC (translation control), AIL (alternate interrupt location),
+        * LD (large decrementer).
+        * These are subject to restrictions from kvmppc_filter_lcpr_hv().
+        */
+       mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD;
+
+       /* Broken 32-bit version of LPCR must not clear top bits */
+       if (preserve_top32)
+               mask &= 0xFFFFFFFF;
+
+       new_lpcr = kvmppc_filter_lpcr_hv(kvm,
+                       (vc->lpcr & ~mask) | (new_lpcr & mask));
+
        /*
         * If ILE (interrupt little-endian) has changed, update the
         * MSR_LE bit in the intr_msr for each vcpu in this vcore.
@@ -1661,25 +1716,8 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
                }
        }
 
-       /*
-        * Userspace can only modify DPFD (default prefetch depth),
-        * ILE (interrupt little-endian) and TC (translation control).
-        * On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.).
-        */
-       mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
-       if (cpu_has_feature(CPU_FTR_ARCH_207S))
-               mask |= LPCR_AIL;
-       /*
-        * On POWER9, allow userspace to enable large decrementer for the
-        * guest, whether or not the host has it enabled.
-        */
-       if (cpu_has_feature(CPU_FTR_ARCH_300))
-               mask |= LPCR_LD;
+       vc->lpcr = new_lpcr;
 
-       /* Broken 32-bit version of LPCR must not clear top bits */
-       if (preserve_top32)
-               mask &= 0xFFFFFFFF;
-       vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
        spin_unlock(&vc->lock);
 }
 
@@ -3728,7 +3766,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
        vcpu->arch.dec_expires = dec + tb;
        vcpu->cpu = -1;
        vcpu->arch.thread_cpu = -1;
+       /* Save guest CTRL register, set runlatch to 1 */
        vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+       if (!(vcpu->arch.ctrl & 1))
+               mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
 
        vcpu->arch.iamr = mfspr(SPRN_IAMR);
        vcpu->arch.pspb = mfspr(SPRN_PSPB);
@@ -3749,7 +3790,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
        mtspr(SPRN_DSCR, host_dscr);
        mtspr(SPRN_TIDR, host_tidr);
        mtspr(SPRN_IAMR, host_iamr);
-       mtspr(SPRN_PSPB, 0);
 
        if (host_amr != vcpu->arch.amr)
                mtspr(SPRN_AMR, host_amr);
@@ -4641,8 +4681,10 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
                struct kvmppc_vcore *vc = kvm->arch.vcores[i];
                if (!vc)
                        continue;
+
                spin_lock(&vc->lock);
                vc->lpcr = (vc->lpcr & ~mask) | lpcr;
+               verify_lpcr(kvm, vc->lpcr);
                spin_unlock(&vc->lock);
                if (++cores_done >= kvm->arch.online_vcores)
                        break;
@@ -4970,6 +5012,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
                kvmppc_setup_partition_table(kvm);
        }
 
+       verify_lpcr(kvm, lpcr);
        kvm->arch.lpcr = lpcr;
 
        /* Initialization for future HPT resizes */
@@ -5369,8 +5412,10 @@ static unsigned int default_hcall_list[] = {
        H_READ,
        H_PROTECT,
        H_BULK_REMOVE,
+#ifdef CONFIG_SPAPR_TCE_IOMMU
        H_GET_TCE,
        H_PUT_TCE,
+#endif
        H_SET_DABR,
        H_SET_XDABR,
        H_CEDE,
index 158d309..7a0e33a 100644 (file)
@@ -662,6 +662,9 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 
 void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
 {
+       /* Guest must always run with ME enabled, HV disabled. */
+       msr = (msr | MSR_ME) & ~MSR_HV;
+
        /*
         * Check for illegal transactional state bit combination
         * and if we find it, force the TS field to a safe state.
index 0cd0e7a..60724f6 100644 (file)
@@ -132,8 +132,33 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
        }
 }
 
+/*
+ * This can result in some L0 HV register state being leaked to an L1
+ * hypervisor when the hv_guest_state is copied back to the guest after
+ * being modified here.
+ *
+ * There is no known problem with such a leak, and in many cases these
+ * register settings could be derived by the guest by observing behaviour
+ * and timing, interrupts, etc., but it is an issue to consider.
+ */
 static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 {
+       struct kvmppc_vcore *vc = vcpu->arch.vcore;
+       u64 mask;
+
+       /*
+        * Don't let L1 change LPCR bits for the L2 except these:
+        */
+       mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
+               LPCR_LPES | LPCR_MER;
+
+       /*
+        * Additional filtering is required depending on hardware
+        * and configuration.
+        */
+       hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
+                       (vc->lpcr & ~mask) | (hr->lpcr & mask));
+
        /*
         * Don't let L1 enable features for L2 which we've disabled for L1,
         * but preserve the interrupt cause field.
@@ -271,8 +296,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
        u64 hv_ptr, regs_ptr;
        u64 hdec_exp;
        s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
-       u64 mask;
-       unsigned long lpcr;
 
        if (vcpu->kvm->arch.l1_ptcr == 0)
                return H_NOT_AVAILABLE;
@@ -320,10 +343,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
        vcpu->arch.nested = l2;
        vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
        vcpu->arch.regs = l2_regs;
-       vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
-       mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
-               LPCR_LPES | LPCR_MER;
-       lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
+
+       /* Guest must always run with ME enabled, HV disabled. */
+       vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
+
        sanitise_hv_regs(vcpu, &l2_hv);
        restore_hv_regs(vcpu, &l2_hv);
 
@@ -335,7 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
                        r = RESUME_HOST;
                        break;
                }
-               r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
+               r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr);
        } while (is_kvmppc_resume_guest(r));
 
        /* save L2 state for return */
index 88da276..7af7c70 100644 (file)
@@ -673,8 +673,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 }
 
 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
-                     unsigned long pte_index, unsigned long avpn,
-                     unsigned long va)
+                     unsigned long pte_index, unsigned long avpn)
 {
        struct kvm *kvm = vcpu->kvm;
        __be64 *hpte;
index d4efc18..f2c690e 100644 (file)
@@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
 endif
 
-obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o test_code-patching.o
+obj-y += alloc.o code-patching.o feature-fixups.o pmem.o test_code-patching.o
 
 ifndef CONFIG_KASAN
 obj-y  +=      string.o memcmp_$(BITS).o
index b895166..f3999cb 100644 (file)
@@ -16,16 +16,12 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst,
 {
        __wsum csum;
 
-       might_sleep();
-
-       if (unlikely(!access_ok(src, len)))
+       if (unlikely(!user_read_access_begin(src, len)))
                return 0;
 
-       allow_read_from_user(src, len);
-
        csum = csum_partial_copy_generic((void __force *)src, dst, len);
 
-       prevent_read_from_user(src, len);
+       user_read_access_end();
        return csum;
 }
 EXPORT_SYMBOL(csum_and_copy_from_user);
@@ -34,15 +30,12 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len)
 {
        __wsum csum;
 
-       might_sleep();
-       if (unlikely(!access_ok(dst, len)))
+       if (unlikely(!user_write_access_begin(dst, len)))
                return 0;
 
-       allow_write_to_user(dst, len);
-
        csum = csum_partial_copy_generic(src, (void __force *)dst, len);
 
-       prevent_write_to_user(dst, len);
+       user_write_access_end();
        return csum;
 }
 EXPORT_SYMBOL(csum_and_copy_to_user);
index 2333625..870b30d 100644 (file)
 static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr,
                               struct ppc_inst *patch_addr)
 {
-       if (!ppc_inst_prefixed(instr))
-               __put_user_asm_goto(ppc_inst_val(instr), patch_addr, failed, "stw");
-       else
-               __put_user_asm_goto(ppc_inst_as_u64(instr), patch_addr, failed, "std");
+       if (!ppc_inst_prefixed(instr)) {
+               u32 val = ppc_inst_val(instr);
+
+               __put_kernel_nofault(patch_addr, &val, u32, failed);
+       } else {
+               u64 val = ppc_inst_as_ulong(instr);
+
+               __put_kernel_nofault(patch_addr, &val, u64, failed);
+       }
 
        asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
                                                            "r" (exec_addr));
diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c
deleted file mode 100644 (file)
index 9cc17eb..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright 2020, IBM Corporation.
- */
-
-#include <linux/uaccess.h>
-#include <asm/disassemble.h>
-#include <asm/inst.h>
-#include <asm/ppc-opcode.h>
-
-#ifdef CONFIG_PPC64
-int probe_user_read_inst(struct ppc_inst *inst,
-                        struct ppc_inst __user *nip)
-{
-       unsigned int val, suffix;
-       int err;
-
-       err = copy_from_user_nofault(&val, nip, sizeof(val));
-       if (err)
-               return err;
-       if (get_op(val) == OP_PREFIX) {
-               err = copy_from_user_nofault(&suffix, (void __user *)nip + 4, 4);
-               *inst = ppc_inst_prefix(val, suffix);
-       } else {
-               *inst = ppc_inst(val);
-       }
-       return err;
-}
-
-int probe_kernel_read_inst(struct ppc_inst *inst,
-                          struct ppc_inst *src)
-{
-       unsigned int val, suffix;
-       int err;
-
-       err = copy_from_kernel_nofault(&val, src, sizeof(val));
-       if (err)
-               return err;
-       if (get_op(val) == OP_PREFIX) {
-               err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4);
-               *inst = ppc_inst_prefix(val, suffix);
-       } else {
-               *inst = ppc_inst(val);
-       }
-       return err;
-}
-#else /* !CONFIG_PPC64 */
-int probe_user_read_inst(struct ppc_inst *inst,
-                        struct ppc_inst __user *nip)
-{
-       unsigned int val;
-       int err;
-
-       err = copy_from_user_nofault(&val, nip, sizeof(val));
-       if (!err)
-               *inst = ppc_inst(val);
-
-       return err;
-}
-
-int probe_kernel_read_inst(struct ppc_inst *inst,
-                          struct ppc_inst *src)
-{
-       unsigned int val;
-       int err;
-
-       err = copy_from_kernel_nofault(&val, src, sizeof(val));
-       if (!err)
-               *inst = ppc_inst(val);
-
-       return err;
-}
-#endif /* CONFIG_PPC64 */
index c6aebc1..45bda25 100644 (file)
@@ -1401,10 +1401,6 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
                break;
        }
 
-       /* Following cases refer to regs->gpr[], so we need all regs */
-       if (!FULL_REGS(regs))
-               return -1;
-
        rd = (word >> 21) & 0x1f;
        ra = (word >> 16) & 0x1f;
        rb = (word >> 11) & 0x1f;
@@ -3086,15 +3082,6 @@ NOKPROBE_SYMBOL(analyse_instr);
  */
 static nokprobe_inline int handle_stack_update(unsigned long ea, struct pt_regs *regs)
 {
-#ifdef CONFIG_PPC32
-       /*
-        * Check if we will touch kernel stack overflow
-        */
-       if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) {
-               printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n");
-               return -EINVAL;
-       }
-#endif /* CONFIG_PPC32 */
        /*
         * Check if we already set since that means we'll
         * lose the previous value.
index 30b4b69..327165f 100644 (file)
@@ -225,7 +225,7 @@ record_exception(struct pt_regs *regs, int eflag)
 int
 do_mathemu(struct pt_regs *regs)
 {
-       void *op0 = 0, *op1 = 0, *op2 = 0, *op3 = 0;
+       void *op0 = NULL, *op1 = NULL, *op2 = NULL, *op3 = NULL;
        unsigned long pc = regs->nip;
        signed short sdisp;
        u32 insn = 0;
@@ -234,7 +234,7 @@ do_mathemu(struct pt_regs *regs)
        int type = 0;
        int eflag, trap;
 
-       if (get_user(insn, (u32 *)pc))
+       if (get_user(insn, (u32 __user *)pc))
                return -EFAULT;
 
        switch (insn >> 26) {
index 3b4e9e4..c3df3a8 100644 (file)
@@ -8,7 +8,8 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
 obj-y                          := fault.o mem.o pgtable.o mmap.o maccess.o \
                                   init_$(BITS).o pgtable_$(BITS).o \
                                   pgtable-frag.o ioremap.o ioremap_$(BITS).o \
-                                  init-common.o mmu_context.o drmem.o
+                                  init-common.o mmu_context.o drmem.o \
+                                  cacheflush.o
 obj-$(CONFIG_PPC_MMU_NOHASH)   += nohash/
 obj-$(CONFIG_PPC_BOOK3S_32)    += book3s32/
 obj-$(CONFIG_PPC_BOOK3S_64)    += book3s64/
index 446d9de..7f0c8a7 100644 (file)
@@ -9,3 +9,4 @@ endif
 obj-y += mmu.o mmu_context.o
 obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o
 obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o
+obj-$(CONFIG_PPC_KUEP) += kuep.o
index 0e6dc83..fb4233a 100644 (file)
@@ -140,10 +140,6 @@ _GLOBAL(hash_page)
        bne-    .Lretry                 /* retry if someone got there first */
 
        mfsrin  r3,r4                   /* get segment reg for segment */
-#ifndef CONFIG_VMAP_STACK
-       mfctr   r0
-       stw     r0,_CTR(r11)
-#endif
        bl      create_hpte             /* add the hash table entry */
 
 #ifdef CONFIG_SMP
@@ -152,17 +148,7 @@ _GLOBAL(hash_page)
        li      r0,0
        stw     r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
 #endif
-
-#ifdef CONFIG_VMAP_STACK
        b       fast_hash_page_return
-#else
-       /* Return from the exception */
-       lwz     r5,_CTR(r11)
-       mtctr   r5
-       lwz     r0,GPR0(r11)
-       lwz     r8,GPR8(r11)
-       b       fast_exception_return
-#endif
 
 #ifdef CONFIG_SMP
 .Lhash_page_out:
diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c
new file mode 100644 (file)
index 0000000..8ed1b86
--- /dev/null
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <asm/kup.h>
+#include <asm/reg.h>
+#include <asm/task_size_32.h>
+#include <asm/mmu.h>
+
+#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do {          \
+       if (TASK_SIZE > ((n) << 28))                    \
+               mtsr(val1, (n) << 28);                  \
+       if (TASK_SIZE > (((n) + 1) << 28))              \
+               mtsr(val2, ((n) + 1) << 28);            \
+       val1 = (val1 + 0x222) & 0xf0ffffff;             \
+       val2 = (val2 + 0x222) & 0xf0ffffff;             \
+} while (0)
+
+static __always_inline void kuep_update(u32 val)
+{
+       int val1 = val;
+       int val2 = (val + 0x111) & 0xf0ffffff;
+
+       KUEP_UPDATE_TWO_USER_SEGMENTS(0);
+       KUEP_UPDATE_TWO_USER_SEGMENTS(2);
+       KUEP_UPDATE_TWO_USER_SEGMENTS(4);
+       KUEP_UPDATE_TWO_USER_SEGMENTS(6);
+       KUEP_UPDATE_TWO_USER_SEGMENTS(8);
+       KUEP_UPDATE_TWO_USER_SEGMENTS(10);
+       KUEP_UPDATE_TWO_USER_SEGMENTS(12);
+       KUEP_UPDATE_TWO_USER_SEGMENTS(14);
+}
+
+void kuep_lock(void)
+{
+       kuep_update(mfsr(0) | SR_NX);
+}
+
+void kuep_unlock(void)
+{
+       kuep_update(mfsr(0) & ~SR_NX);
+}
index d7eb266..1599303 100644 (file)
@@ -162,7 +162,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
        unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
 
 
-       if (debug_pagealloc_enabled() || __map_without_bats) {
+       if (debug_pagealloc_enabled_or_kfence() || __map_without_bats) {
                pr_debug_once("Read-Write memory mapped without BATs\n");
                if (base >= border)
                        return base;
@@ -184,17 +184,10 @@ static bool is_module_segment(unsigned long addr)
 {
        if (!IS_ENABLED(CONFIG_MODULES))
                return false;
-#ifdef MODULES_VADDR
        if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
                return false;
        if (addr > ALIGN(MODULES_END, SZ_256M) - 1)
                return false;
-#else
-       if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M))
-               return false;
-       if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1)
-               return false;
-#endif
        return true;
 }
 
index 567e0c6..ad5eff0 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/mm.h>
+#include <linux/stop_machine.h>
 
 #include <asm/sections.h>
 #include <asm/mmu.h>
@@ -400,10 +401,103 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
+
+struct change_memory_parms {
+       unsigned long start, end, newpp;
+       unsigned int step, nr_cpus, master_cpu;
+       atomic_t cpu_counter;
+};
+
+// We'd rather this was on the stack but it has to be in the RMO
+static struct change_memory_parms chmem_parms;
+
+// And therefore we need a lock to protect it from concurrent use
+static DEFINE_MUTEX(chmem_lock);
+
+static void change_memory_range(unsigned long start, unsigned long end,
+                               unsigned int step, unsigned long newpp)
+{
+       unsigned long idx;
+
+       pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
+                start, end, newpp, step);
+
+       for (idx = start; idx < end; idx += step)
+               /* Not sure if we can do much with the return value */
+               mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
+                                                       mmu_kernel_ssize);
+}
+
+static int notrace chmem_secondary_loop(struct change_memory_parms *parms)
+{
+       unsigned long msr, tmp, flags;
+       int *p;
+
+       p = &parms->cpu_counter.counter;
+
+       local_irq_save(flags);
+       hard_irq_disable();
+
+       asm volatile (
+       // Switch to real mode and leave interrupts off
+       "mfmsr  %[msr]                  ;"
+       "li     %[tmp], %[MSR_IR_DR]    ;"
+       "andc   %[tmp], %[msr], %[tmp]  ;"
+       "mtmsrd %[tmp]                  ;"
+
+       // Tell the master we are in real mode
+       "1:                             "
+       "lwarx  %[tmp], 0, %[p]         ;"
+       "addic  %[tmp], %[tmp], -1      ;"
+       "stwcx. %[tmp], 0, %[p]         ;"
+       "bne-   1b                      ;"
+
+       // Spin until the counter goes to zero
+       "2:                             ;"
+       "lwz    %[tmp], 0(%[p])         ;"
+       "cmpwi  %[tmp], 0               ;"
+       "bne-   2b                      ;"
+
+       // Switch back to virtual mode
+       "mtmsrd %[msr]                  ;"
+
+       : // outputs
+         [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p)
+       : // inputs
+         [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR)
+       : // clobbers
+         "cc", "xer"
+       );
+
+       local_irq_restore(flags);
+
+       return 0;
+}
+
+static int change_memory_range_fn(void *data)
+{
+       struct change_memory_parms *parms = data;
+
+       if (parms->master_cpu != smp_processor_id())
+               return chmem_secondary_loop(parms);
+
+       // Wait for all but one CPU (this one) to call-in
+       while (atomic_read(&parms->cpu_counter) > 1)
+               barrier();
+
+       change_memory_range(parms->start, parms->end, parms->step, parms->newpp);
+
+       mb();
+
+       // Signal the other CPUs that we're done
+       atomic_dec(&parms->cpu_counter);
+
+       return 0;
+}
+
 static bool hash__change_memory_range(unsigned long start, unsigned long end,
                                      unsigned long newpp)
 {
-       unsigned long idx;
        unsigned int step, shift;
 
        shift = mmu_psize_defs[mmu_linear_psize].shift;
@@ -415,25 +509,43 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end,
        if (start >= end)
                return false;
 
-       pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
-                start, end, newpp, step);
+       if (firmware_has_feature(FW_FEATURE_LPAR)) {
+               mutex_lock(&chmem_lock);
 
-       for (idx = start; idx < end; idx += step)
-               /* Not sure if we can do much with the return value */
-               mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
-                                                       mmu_kernel_ssize);
+               chmem_parms.start = start;
+               chmem_parms.end = end;
+               chmem_parms.step = step;
+               chmem_parms.newpp = newpp;
+               chmem_parms.master_cpu = smp_processor_id();
+
+               cpus_read_lock();
+
+               atomic_set(&chmem_parms.cpu_counter, num_online_cpus());
+
+               // Ensure state is consistent before we call the other CPUs
+               mb();
+
+               stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms,
+                                       cpu_online_mask);
+
+               cpus_read_unlock();
+               mutex_unlock(&chmem_lock);
+       } else
+               change_memory_range(start, end, step, newpp);
 
        return true;
 }
 
 void hash__mark_rodata_ro(void)
 {
-       unsigned long start, end;
+       unsigned long start, end, pp;
 
        start = (unsigned long)_stext;
        end = (unsigned long)__init_begin;
 
-       WARN_ON(!hash__change_memory_range(start, end, PP_RXXX));
+       pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY);
+
+       WARN_ON(!hash__change_memory_range(start, end, pp));
 }
 
 void hash__mark_initmem_nx(void)
index 581b20a..96d9aa1 100644 (file)
@@ -338,7 +338,7 @@ repeat:
 int htab_remove_mapping(unsigned long vstart, unsigned long vend,
                      int psize, int ssize)
 {
-       unsigned long vaddr;
+       unsigned long vaddr, time_limit;
        unsigned int step, shift;
        int rc;
        int ret = 0;
@@ -351,8 +351,19 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
 
        /* Unmap the full range specificied */
        vaddr = ALIGN_DOWN(vstart, step);
+       time_limit = jiffies + HZ;
+
        for (;vaddr < vend; vaddr += step) {
                rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize);
+
+               /*
+                * For large number of mappings introduce a cond_resched()
+                * to prevent softlockup warnings.
+                */
+               if (time_after(jiffies, time_limit)) {
+                       cond_resched();
+                       time_limit = jiffies + HZ;
+               }
                if (rc == -ENOENT) {
                        ret = -ENOENT;
                        continue;
@@ -1145,7 +1156,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 
        /* page is dirty */
        if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) {
-               if (trap == 0x400) {
+               if (trap == INTERRUPT_INST_STORAGE) {
                        flush_dcache_icache_page(page);
                        set_bit(PG_dcache_clean, &page->flags);
                } else
@@ -1545,10 +1556,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
        if (user_mode(regs) || (region_id == USER_REGION_ID))
                access &= ~_PAGE_PRIVILEGED;
 
-       if (regs->trap == 0x400)
+       if (TRAP(regs) == INTERRUPT_INST_STORAGE)
                access |= _PAGE_EXEC;
 
-       err = hash_page_mm(mm, ea, access, regs->trap, flags);
+       err = hash_page_mm(mm, ea, access, TRAP(regs), flags);
        if (unlikely(err < 0)) {
                // failed to instert a hash PTE due to an hypervisor error
                if (user_mode(regs)) {
@@ -1572,10 +1583,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
 DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
 {
        unsigned long dsisr = regs->dsisr;
-       long err;
 
-       if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT)))
-               goto page_fault;
+       if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
+               hash__do_page_fault(regs);
+               return 0;
+       }
 
        /*
         * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
@@ -1595,13 +1607,10 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
                return 0;
        }
 
-       err = __do_hash_fault(regs);
-       if (err) {
-page_fault:
-               err = hash__do_page_fault(regs);
-       }
+       if (__do_hash_fault(regs))
+               hash__do_page_fault(regs);
 
-       return err;
+       return 0;
 }
 
 #ifdef CONFIG_PPC_MM_SLICES
index 0c85572..c10fc8a 100644 (file)
@@ -119,7 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm)
                /* This is fork. Copy hash_context details from current->mm */
                memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
 #ifdef CONFIG_PPC_SUBPAGE_PROT
-               /* inherit subpage prot detalis if we have one. */
+               /* inherit subpage prot details if we have one. */
                if (current->mm->context.hash_context->spt) {
                        mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
                                                                GFP_KERNEL);
index 15dcc5a..a2d9ad1 100644 (file)
@@ -301,19 +301,6 @@ void setup_kuap(bool disabled)
 }
 #endif
 
-static inline void update_current_thread_amr(u64 value)
-{
-       current->thread.regs->amr = value;
-}
-
-static inline void update_current_thread_iamr(u64 value)
-{
-       if (!likely(pkey_execute_disable_supported))
-               return;
-
-       current->thread.regs->iamr = value;
-}
-
 #ifdef CONFIG_PPC_MEM_KEYS
 void pkey_mm_init(struct mm_struct *mm)
 {
@@ -328,7 +315,7 @@ static inline void init_amr(int pkey, u8 init_bits)
        u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey));
        u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey));
 
-       update_current_thread_amr(old_amr | new_amr_bits);
+       current->thread.regs->amr = old_amr | new_amr_bits;
 }
 
 static inline void init_iamr(int pkey, u8 init_bits)
@@ -336,7 +323,10 @@ static inline void init_iamr(int pkey, u8 init_bits)
        u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey));
        u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey));
 
-       update_current_thread_iamr(old_iamr | new_iamr_bits);
+       if (!likely(pkey_execute_disable_supported))
+               return;
+
+       current->thread.regs->iamr = old_iamr | new_iamr_bits;
 }
 
 /*
index 98f0b24..50d536e 100644 (file)
@@ -108,7 +108,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
 
 set_the_pte:
        set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
-       smp_wmb();
+       asm volatile("ptesync": : :"memory");
        return 0;
 }
 
@@ -168,7 +168,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa,
 
 set_the_pte:
        set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
-       smp_wmb();
+       asm volatile("ptesync": : :"memory");
        return 0;
 }
 
@@ -180,8 +180,8 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
-void radix__change_memory_range(unsigned long start, unsigned long end,
-                               unsigned long clear)
+static void radix__change_memory_range(unsigned long start, unsigned long end,
+                                      unsigned long clear)
 {
        unsigned long idx;
        pgd_t *pgdp;
@@ -1058,7 +1058,7 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
                 * Book3S does not require a TLB flush when relaxing access
                 * restrictions when the address space is not attached to a
                 * NMMU, because the core MMU will reload the pte after taking
-                * an access fault, which is defined by the architectue.
+                * an access fault, which is defined by the architecture.
                 */
        }
        /* See ptesync comment in radix__set_pte_at */
diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
new file mode 100644 (file)
index 0000000..6336378
--- /dev/null
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/highmem.h>
+#include <linux/kprobes.h>
+
+/**
+ * flush_coherent_icache() - if a CPU has a coherent icache, flush it
+ * Return true if the cache was flushed, false otherwise
+ */
+static inline bool flush_coherent_icache(void)
+{
+       /*
+        * For a snooping icache, we still need a dummy icbi to purge all the
+        * prefetched instructions from the ifetch buffers. We also need a sync
+        * before the icbi to order the the actual stores to memory that might
+        * have modified instructions with the icbi.
+        */
+       if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+               mb(); /* sync */
+               icbi((void *)PAGE_OFFSET);
+               mb(); /* sync */
+               isync();
+               return true;
+       }
+
+       return false;
+}
+
+/**
+ * invalidate_icache_range() - Flush the icache by issuing icbi across an address range
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+static void invalidate_icache_range(unsigned long start, unsigned long stop)
+{
+       unsigned long shift = l1_icache_shift();
+       unsigned long bytes = l1_icache_bytes();
+       char *addr = (char *)(start & ~(bytes - 1));
+       unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+       unsigned long i;
+
+       for (i = 0; i < size >> shift; i++, addr += bytes)
+               icbi(addr);
+
+       mb(); /* sync */
+       isync();
+}
+
+/**
+ * flush_icache_range: Write any modified data cache blocks out to memory
+ * and invalidate the corresponding blocks in the instruction cache
+ *
+ * Generic code will call this after writing memory, before executing from it.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+void flush_icache_range(unsigned long start, unsigned long stop)
+{
+       if (flush_coherent_icache())
+               return;
+
+       clean_dcache_range(start, stop);
+
+       if (IS_ENABLED(CONFIG_44x)) {
+               /*
+                * Flash invalidate on 44x because we are passed kmapped
+                * addresses and this doesn't work for userspace pages due to
+                * the virtually tagged icache.
+                */
+               iccci((void *)start);
+               mb(); /* sync */
+               isync();
+       } else
+               invalidate_icache_range(start, stop);
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+#ifdef CONFIG_HIGHMEM
+/**
+ * flush_dcache_icache_phys() - Flush a page by it's physical address
+ * @physaddr: the physical address of the page
+ */
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+       unsigned long bytes = l1_dcache_bytes();
+       unsigned long nb = PAGE_SIZE / bytes;
+       unsigned long addr = physaddr & PAGE_MASK;
+       unsigned long msr, msr0;
+       unsigned long loop1 = addr, loop2 = addr;
+
+       msr0 = mfmsr();
+       msr = msr0 & ~MSR_DR;
+       /*
+        * This must remain as ASM to prevent potential memory accesses
+        * while the data MMU is disabled
+        */
+       asm volatile(
+               "   mtctr %2;\n"
+               "   mtmsr %3;\n"
+               "   isync;\n"
+               "0: dcbst   0, %0;\n"
+               "   addi    %0, %0, %4;\n"
+               "   bdnz    0b;\n"
+               "   sync;\n"
+               "   mtctr %2;\n"
+               "1: icbi    0, %1;\n"
+               "   addi    %1, %1, %4;\n"
+               "   bdnz    1b;\n"
+               "   sync;\n"
+               "   mtmsr %5;\n"
+               "   isync;\n"
+               : "+&r" (loop1), "+&r" (loop2)
+               : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
+               : "ctr", "memory");
+}
+NOKPROBE_SYMBOL(flush_dcache_icache_phys)
+#else
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+}
+#endif
+
+/**
+ * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ *
+ * @p: the address of the page to flush
+ */
+static void __flush_dcache_icache(void *p)
+{
+       unsigned long addr = (unsigned long)p & PAGE_MASK;
+
+       clean_dcache_range(addr, addr + PAGE_SIZE);
+
+       /*
+        * We don't flush the icache on 44x. Those have a virtual icache and we
+        * don't have access to the virtual address here (it's not the page
+        * vaddr but where it's mapped in user space). The flushing of the
+        * icache on these is handled elsewhere, when a change in the address
+        * space occurs, before returning to user space.
+        */
+
+       if (mmu_has_feature(MMU_FTR_TYPE_44x))
+               return;
+
+       invalidate_icache_range(addr, addr + PAGE_SIZE);
+}
+
+static void flush_dcache_icache_hugepage(struct page *page)
+{
+       int i;
+       int nr = compound_nr(page);
+
+       if (!PageHighMem(page)) {
+               for (i = 0; i < nr; i++)
+                       __flush_dcache_icache(lowmem_page_address(page + i));
+       } else {
+               for (i = 0; i < nr; i++) {
+                       void *start = kmap_local_page(page + i);
+
+                       __flush_dcache_icache(start);
+                       kunmap_local(start);
+               }
+       }
+}
+
+void flush_dcache_icache_page(struct page *page)
+{
+       if (flush_coherent_icache())
+               return;
+
+       if (PageCompound(page))
+               return flush_dcache_icache_hugepage(page);
+
+       if (!PageHighMem(page)) {
+               __flush_dcache_icache(lowmem_page_address(page));
+       } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
+               void *start = kmap_local_page(page);
+
+               __flush_dcache_icache(start);
+               kunmap_local(start);
+       } else {
+               flush_dcache_icache_phys(page_to_phys(page));
+       }
+}
+EXPORT_SYMBOL(flush_dcache_icache_page);
+
+void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
+{
+       clear_page(page);
+
+       /*
+        * We shouldn't have to do this, but some versions of glibc
+        * require it (ld.so assumes zero filled pages are icache clean)
+        * - Anton
+        */
+       flush_dcache_page(pg);
+}
+EXPORT_SYMBOL(clear_user_page);
+
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+                   struct page *pg)
+{
+       copy_page(vto, vfrom);
+
+       /*
+        * We should be able to use the following optimisation, however
+        * there are two problems.
+        * Firstly a bug in some versions of binutils meant PLT sections
+        * were not marked executable.
+        * Secondly the first word in the GOT section is blrl, used
+        * to establish the GOT address. Until recently the GOT was
+        * not marked executable.
+        * - Anton
+        */
+#if 0
+       if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
+               return;
+#endif
+
+       flush_dcache_page(pg);
+}
+
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
+                            unsigned long addr, int len)
+{
+       void *maddr;
+
+       maddr = kmap_local_page(page) + (addr & ~PAGE_MASK);
+       flush_icache_range((unsigned long)maddr, (unsigned long)maddr + len);
+       kunmap_local(maddr);
+}
index bb36825..34f641d 100644 (file)
@@ -32,6 +32,8 @@
 #include <linux/context_tracking.h>
 #include <linux/hugetlb.h>
 #include <linux/uaccess.h>
+#include <linux/kfence.h>
+#include <linux/pkeys.h>
 
 #include <asm/firmware.h>
 #include <asm/interrupt.h>
@@ -87,7 +89,6 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)
        return __bad_area(regs, address, SEGV_MAPERR);
 }
 
-#ifdef CONFIG_PPC_MEM_KEYS
 static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
                                    struct vm_area_struct *vma)
 {
@@ -127,7 +128,6 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
 
        return 0;
 }
-#endif
 
 static noinline int bad_access(struct pt_regs *regs, unsigned long address)
 {
@@ -197,7 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
 static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
                             unsigned long address, bool is_write)
 {
-       int is_exec = TRAP(regs) == 0x400;
+       int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
 
        /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */
        if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT |
@@ -234,7 +234,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
        return false;
 }
 
-#ifdef CONFIG_PPC_MEM_KEYS
 static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
                              struct vm_area_struct *vma)
 {
@@ -248,7 +247,6 @@ static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
 
        return false;
 }
-#endif
 
 static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma)
 {
@@ -393,7 +391,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
        struct vm_area_struct * vma;
        struct mm_struct *mm = current->mm;
        unsigned int flags = FAULT_FLAG_DEFAULT;
-       int is_exec = TRAP(regs) == 0x400;
+       int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
        int is_user = user_mode(regs);
        int is_write = page_fault_is_write(error_code);
        vm_fault_t fault, major = 0;
@@ -418,8 +416,12 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
         * take a page fault to a kernel address or a page fault to a user
         * address outside of dedicated places
         */
-       if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write)))
+       if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) {
+               if (kfence_handle_page_fault(address, is_write, regs))
+                       return 0;
+
                return SIGSEGV;
+       }
 
        /*
         * If we're in an interrupt, have no user context or are running
@@ -492,11 +494,9 @@ retry:
                        return bad_area(regs, address);
        }
 
-#ifdef CONFIG_PPC_MEM_KEYS
        if (unlikely(access_pkey_error(is_write, is_exec,
                                       (error_code & DSISR_KEYFAULT), vma)))
                return bad_access_pkey(regs, address, vma);
-#endif /* CONFIG_PPC_MEM_KEYS */
 
        if (unlikely(access_error(is_write, is_exec, vma)))
                return bad_access(regs, address);
@@ -539,39 +539,25 @@ retry:
 }
 NOKPROBE_SYMBOL(___do_page_fault);
 
-static long __do_page_fault(struct pt_regs *regs)
+static __always_inline void __do_page_fault(struct pt_regs *regs)
 {
-       const struct exception_table_entry *entry;
        long err;
 
        err = ___do_page_fault(regs, regs->dar, regs->dsisr);
-       if (likely(!err))
-               return err;
-
-       entry = search_exception_tables(regs->nip);
-       if (likely(entry)) {
-               instruction_pointer_set(regs, extable_fixup(entry));
-               return 0;
-       } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
-               __bad_page_fault(regs, err);
-               return 0;
-       } else {
-               /* 32 and 64e handle the bad page fault in asm */
-               return err;
-       }
+       if (unlikely(err))
+               bad_page_fault(regs, err);
 }
-NOKPROBE_SYMBOL(__do_page_fault);
 
-DEFINE_INTERRUPT_HANDLER_RET(do_page_fault)
+DEFINE_INTERRUPT_HANDLER(do_page_fault)
 {
-       return __do_page_fault(regs);
+       __do_page_fault(regs);
 }
 
 #ifdef CONFIG_PPC_BOOK3S_64
 /* Same as do_page_fault but interrupt entry has already run in do_hash_fault */
-long hash__do_page_fault(struct pt_regs *regs)
+void hash__do_page_fault(struct pt_regs *regs)
 {
-       return __do_page_fault(regs);
+       __do_page_fault(regs);
 }
 NOKPROBE_SYMBOL(hash__do_page_fault);
 #endif
@@ -581,27 +567,27 @@ NOKPROBE_SYMBOL(hash__do_page_fault);
  * It is called from the DSI and ISI handlers in head.S and from some
  * of the procedures in traps.c.
  */
-void __bad_page_fault(struct pt_regs *regs, int sig)
+static void __bad_page_fault(struct pt_regs *regs, int sig)
 {
        int is_write = page_fault_is_write(regs->dsisr);
 
        /* kernel has accessed a bad area */
 
        switch (TRAP(regs)) {
-       case 0x300:
-       case 0x380:
-       case 0xe00:
+       case INTERRUPT_DATA_STORAGE:
+       case INTERRUPT_DATA_SEGMENT:
+       case INTERRUPT_H_DATA_STORAGE:
                pr_alert("BUG: %s on %s at 0x%08lx\n",
                         regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
                         "Unable to handle kernel data access",
                         is_write ? "write" : "read", regs->dar);
                break;
-       case 0x400:
-       case 0x480:
+       case INTERRUPT_INST_STORAGE:
+       case INTERRUPT_INST_SEGMENT:
                pr_alert("BUG: Unable to handle kernel instruction fetch%s",
                         regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n");
                break;
-       case 0x600:
+       case INTERRUPT_ALIGNMENT:
                pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n",
                         regs->dar);
                break;
index 02c7db4..3d690be 100644 (file)
@@ -97,6 +97,9 @@ static void __init MMU_setup(void)
        if (IS_ENABLED(CONFIG_PPC_8xx))
                return;
 
+       if (IS_ENABLED(CONFIG_KFENCE))
+               __map_without_ltlbs = 1;
+
        if (debug_pagealloc_enabled())
                __map_without_ltlbs = 1;
 
index fa9a7a7..a3c30a8 100644 (file)
@@ -3,7 +3,28 @@
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
 
+#include <asm/disassemble.h>
+#include <asm/inst.h>
+#include <asm/ppc-opcode.h>
+
 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
 {
        return is_kernel_addr((unsigned long)unsafe_src);
 }
+
+int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src)
+{
+       unsigned int val, suffix;
+       int err;
+
+       err = copy_from_kernel_nofault(&val, src, sizeof(val));
+       if (err)
+               return err;
+       if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
+               err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4);
+               *inst = ppc_inst_prefix(val, suffix);
+       } else {
+               *inst = ppc_inst(val);
+       }
+       return err;
+}
index 4e8ce6d..6564b4d 100644 (file)
  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  */
 
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/gfp.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/init.h>
 #include <linux/memblock.h>
 #include <linux/highmem.h>
-#include <linux/initrd.h>
-#include <linux/pagemap.h>
 #include <linux/suspend.h>
-#include <linux/hugetlb.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/memremap.h>
 #include <linux/dma-direct.h>
-#include <linux/kprobes.h>
 
-#include <asm/prom.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/mmu.h>
-#include <asm/smp.h>
 #include <asm/machdep.h>
-#include <asm/btext.h>
-#include <asm/tlb.h>
-#include <asm/sections.h>
-#include <asm/sparsemem.h>
-#include <asm/vdso.h>
-#include <asm/fixmap.h>
-#include <asm/swiotlb.h>
 #include <asm/rtas.h>
 #include <asm/kasan.h>
 #include <asm/svm.h>
-#include <asm/mmzone.h>
 
 #include <mm/mmu_decl.h>
 
-static DEFINE_MUTEX(linear_mapping_mutex);
 unsigned long long memory_limit;
 bool init_mem_is_free;
 
@@ -72,6 +41,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 EXPORT_SYMBOL(phys_mem_access_prot);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+static DEFINE_MUTEX(linear_mapping_mutex);
 
 #ifdef CONFIG_NUMA
 int memory_add_physaddr_to_nid(u64 start)
@@ -340,257 +310,6 @@ void free_initmem(void)
        free_initmem_default(POISON_FREE_INITMEM);
 }
 
-/**
- * flush_coherent_icache() - if a CPU has a coherent icache, flush it
- * @addr: The base address to use (can be any valid address, the whole cache will be flushed)
- * Return true if the cache was flushed, false otherwise
- */
-static inline bool flush_coherent_icache(unsigned long addr)
-{
-       /*
-        * For a snooping icache, we still need a dummy icbi to purge all the
-        * prefetched instructions from the ifetch buffers. We also need a sync
-        * before the icbi to order the the actual stores to memory that might
-        * have modified instructions with the icbi.
-        */
-       if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
-               mb(); /* sync */
-               allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES);
-               icbi((void *)addr);
-               prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES);
-               mb(); /* sync */
-               isync();
-               return true;
-       }
-
-       return false;
-}
-
-/**
- * invalidate_icache_range() - Flush the icache by issuing icbi across an address range
- * @start: the start address
- * @stop: the stop address (exclusive)
- */
-static void invalidate_icache_range(unsigned long start, unsigned long stop)
-{
-       unsigned long shift = l1_icache_shift();
-       unsigned long bytes = l1_icache_bytes();
-       char *addr = (char *)(start & ~(bytes - 1));
-       unsigned long size = stop - (unsigned long)addr + (bytes - 1);
-       unsigned long i;
-
-       for (i = 0; i < size >> shift; i++, addr += bytes)
-               icbi(addr);
-
-       mb(); /* sync */
-       isync();
-}
-
-/**
- * flush_icache_range: Write any modified data cache blocks out to memory
- * and invalidate the corresponding blocks in the instruction cache
- *
- * Generic code will call this after writing memory, before executing from it.
- *
- * @start: the start address
- * @stop: the stop address (exclusive)
- */
-void flush_icache_range(unsigned long start, unsigned long stop)
-{
-       if (flush_coherent_icache(start))
-               return;
-
-       clean_dcache_range(start, stop);
-
-       if (IS_ENABLED(CONFIG_44x)) {
-               /*
-                * Flash invalidate on 44x because we are passed kmapped
-                * addresses and this doesn't work for userspace pages due to
-                * the virtually tagged icache.
-                */
-               iccci((void *)start);
-               mb(); /* sync */
-               isync();
-       } else
-               invalidate_icache_range(start, stop);
-}
-EXPORT_SYMBOL(flush_icache_range);
-
-#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
-/**
- * flush_dcache_icache_phys() - Flush a page by it's physical address
- * @physaddr: the physical address of the page
- */
-static void flush_dcache_icache_phys(unsigned long physaddr)
-{
-       unsigned long bytes = l1_dcache_bytes();
-       unsigned long nb = PAGE_SIZE / bytes;
-       unsigned long addr = physaddr & PAGE_MASK;
-       unsigned long msr, msr0;
-       unsigned long loop1 = addr, loop2 = addr;
-
-       msr0 = mfmsr();
-       msr = msr0 & ~MSR_DR;
-       /*
-        * This must remain as ASM to prevent potential memory accesses
-        * while the data MMU is disabled
-        */
-       asm volatile(
-               "   mtctr %2;\n"
-               "   mtmsr %3;\n"
-               "   isync;\n"
-               "0: dcbst   0, %0;\n"
-               "   addi    %0, %0, %4;\n"
-               "   bdnz    0b;\n"
-               "   sync;\n"
-               "   mtctr %2;\n"
-               "1: icbi    0, %1;\n"
-               "   addi    %1, %1, %4;\n"
-               "   bdnz    1b;\n"
-               "   sync;\n"
-               "   mtmsr %5;\n"
-               "   isync;\n"
-               : "+&r" (loop1), "+&r" (loop2)
-               : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
-               : "ctr", "memory");
-}
-NOKPROBE_SYMBOL(flush_dcache_icache_phys)
-#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
-
-/*
- * This is called when a page has been modified by the kernel.
- * It just marks the page as not i-cache clean.  We do the i-cache
- * flush later when the page is given to a user process, if necessary.
- */
-void flush_dcache_page(struct page *page)
-{
-       if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
-               return;
-       /* avoid an atomic op if possible */
-       if (test_bit(PG_dcache_clean, &page->flags))
-               clear_bit(PG_dcache_clean, &page->flags);
-}
-EXPORT_SYMBOL(flush_dcache_page);
-
-static void flush_dcache_icache_hugepage(struct page *page)
-{
-       int i;
-       void *start;
-
-       BUG_ON(!PageCompound(page));
-
-       for (i = 0; i < compound_nr(page); i++) {
-               if (!PageHighMem(page)) {
-                       __flush_dcache_icache(page_address(page+i));
-               } else {
-                       start = kmap_atomic(page+i);
-                       __flush_dcache_icache(start);
-                       kunmap_atomic(start);
-               }
-       }
-}
-
-void flush_dcache_icache_page(struct page *page)
-{
-
-       if (PageCompound(page))
-               return flush_dcache_icache_hugepage(page);
-
-#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64)
-       /* On 8xx there is no need to kmap since highmem is not supported */
-       __flush_dcache_icache(page_address(page));
-#else
-       if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
-               void *start = kmap_atomic(page);
-               __flush_dcache_icache(start);
-               kunmap_atomic(start);
-       } else {
-               unsigned long addr = page_to_pfn(page) << PAGE_SHIFT;
-
-               if (flush_coherent_icache(addr))
-                       return;
-               flush_dcache_icache_phys(addr);
-       }
-#endif
-}
-EXPORT_SYMBOL(flush_dcache_icache_page);
-
-/**
- * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- *
- * @page: the address of the page to flush
- */
-void __flush_dcache_icache(void *p)
-{
-       unsigned long addr = (unsigned long)p;
-
-       if (flush_coherent_icache(addr))
-               return;
-
-       clean_dcache_range(addr, addr + PAGE_SIZE);
-
-       /*
-        * We don't flush the icache on 44x. Those have a virtual icache and we
-        * don't have access to the virtual address here (it's not the page
-        * vaddr but where it's mapped in user space). The flushing of the
-        * icache on these is handled elsewhere, when a change in the address
-        * space occurs, before returning to user space.
-        */
-
-       if (mmu_has_feature(MMU_FTR_TYPE_44x))
-               return;
-
-       invalidate_icache_range(addr, addr + PAGE_SIZE);
-}
-
-void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
-{
-       clear_page(page);
-
-       /*
-        * We shouldn't have to do this, but some versions of glibc
-        * require it (ld.so assumes zero filled pages are icache clean)
-        * - Anton
-        */
-       flush_dcache_page(pg);
-}
-EXPORT_SYMBOL(clear_user_page);
-
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-                   struct page *pg)
-{
-       copy_page(vto, vfrom);
-
-       /*
-        * We should be able to use the following optimisation, however
-        * there are two problems.
-        * Firstly a bug in some versions of binutils meant PLT sections
-        * were not marked executable.
-        * Secondly the first word in the GOT section is blrl, used
-        * to establish the GOT address. Until recently the GOT was
-        * not marked executable.
-        * - Anton
-        */
-#if 0
-       if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
-               return;
-#endif
-
-       flush_dcache_page(pg);
-}
-
-void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
-                            unsigned long addr, int len)
-{
-       unsigned long maddr;
-
-       maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
-       flush_icache_range(maddr, maddr + len);
-       kunmap(page);
-}
-
 /*
  * System memory should not be in /proc/iomem but various tools expect it
  * (eg kdump).
index 18f20da..a857af4 100644 (file)
@@ -43,24 +43,26 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
                /*
                 * This full barrier orders the store to the cpumask above vs
-                * a subsequent operation which allows this CPU to begin loading
-                * translations for next.
+                * a subsequent load which allows this CPU/MMU to begin loading
+                * translations for 'next' from page table PTEs into the TLB.
                 *
-                * When using the radix MMU that operation is the load of the
+                * When using the radix MMU, that operation is the load of the
                 * MMU context id, which is then moved to SPRN_PID.
                 *
                 * For the hash MMU it is either the first load from slb_cache
-                * in switch_slb(), and/or the store of paca->mm_ctx_id in
-                * copy_mm_to_paca().
+                * in switch_slb() to preload the SLBs, or the load of
+                * get_user_context which loads the context for the VSID hash
+                * to insert a new SLB, in the SLB fault handler.
                 *
                 * On the other side, the barrier is in mm/tlb-radix.c for
-                * radix which orders earlier stores to clear the PTEs vs
-                * the load of mm_cpumask. And pte_xchg which does the same
-                * thing for hash.
+                * radix which orders earlier stores to clear the PTEs before
+                * the load of mm_cpumask to check which CPU TLBs should be
+                * flushed. For hash, pte_xchg to clear the PTE includes the
+                * barrier.
                 *
-                * This full barrier is needed by membarrier when switching
-                * between processes after store to rq->curr, before user-space
-                * memory accesses.
+                * This full barrier is also needed by membarrier when
+                * switching between processes after store to rq->curr, before
+                * user-space memory accesses.
                 */
                smp_mb();
 
index 998810e..7dac910 100644 (file)
@@ -185,3 +185,8 @@ void ptdump_check_wx(void);
 #else
 static inline void ptdump_check_wx(void) { }
 #endif
+
+static inline bool debug_pagealloc_enabled_or_kfence(void)
+{
+       return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled();
+}
index 19a3eec..71bfdbe 100644 (file)
@@ -149,7 +149,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 {
        unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
        unsigned long sinittext = __pa(_sinittext);
-       bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled();
+       bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled_or_kfence();
        unsigned long boundary = strict_boundary ? sinittext : etext8;
        unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
 
@@ -161,7 +161,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
                return 0;
 
        mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true);
-       if (debug_pagealloc_enabled()) {
+       if (debug_pagealloc_enabled_or_kfence()) {
                top = boundary;
        } else {
                mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true);
index c2dec3a..8e60af3 100644 (file)
@@ -2,8 +2,4 @@
 #
 # Arch-specific network modules
 #
-ifdef CONFIG_PPC64
-obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
-else
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
-endif
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o
index d0a67a1..99fad09 100644 (file)
@@ -26,6 +26,9 @@
 /* Long jump; (unconditional 'branch') */
 #define PPC_JMP(dest)          EMIT(PPC_INST_BRANCH |                        \
                                     (((dest) - (ctx->idx * 4)) & 0x03fffffc))
+/* blr; (unconditional 'branch' with link) to absolute address */
+#define PPC_BL_ABS(dest)       EMIT(PPC_INST_BL |                            \
+                                    (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc))
 /* "cond" here covers BO:BI fields. */
 #define PPC_BCC_SHORT(cond, dest)      EMIT(PPC_INST_BRANCH_COND |           \
                                             (((cond) & 0x3ff) << 16) |       \
                                EMIT(PPC_RAW_ORI(d, d, IMM_L(i)));            \
                } } while(0)
 
+#ifdef CONFIG_PPC32
+#define PPC_EX32(r, i)         EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0))
+#endif
+
 #define PPC_LI64(d, i)         do {                                          \
                if ((long)(i) >= -2147483648 &&                               \
                                (long)(i) < 2147483648)                       \
@@ -108,6 +115,63 @@ static inline bool is_nearbranch(int offset)
 #define COND_LT                (CR0_LT | COND_CMP_TRUE)
 #define COND_LE                (CR0_GT | COND_CMP_FALSE)
 
+#define SEEN_FUNC      0x20000000 /* might call external helpers */
+#define SEEN_STACK     0x40000000 /* uses BPF stack */
+#define SEEN_TAILCALL  0x80000000 /* uses tail calls */
+
+#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
+#define SEEN_NVREG_MASK        0x0003ffff /* Non volatile registers r14-r31 */
+
+#ifdef CONFIG_PPC64
+extern const int b2p[MAX_BPF_JIT_REG + 2];
+#else
+extern const int b2p[MAX_BPF_JIT_REG + 1];
+#endif
+
+struct codegen_context {
+       /*
+        * This is used to track register usage as well
+        * as calls to external helpers.
+        * - register usage is tracked with corresponding
+        *   bits (r3-r31)
+        * - rest of the bits can be used to track other
+        *   things -- for now, we use bits 0 to 2
+        *   encoded in SEEN_* macros above
+        */
+       unsigned int seen;
+       unsigned int idx;
+       unsigned int stack_size;
+       int b2p[ARRAY_SIZE(b2p)];
+};
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+       smp_wmb();      /* smp write barrier */
+       flush_icache_range((unsigned long)start, (unsigned long)end);
+}
+
+static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
+{
+       return ctx->seen & (1 << (31 - i));
+}
+
+static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
+{
+       ctx->seen |= 1 << (31 - i);
+}
+
+static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
+{
+       ctx->seen &= ~(1 << (31 - i));
+}
+
+void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+                      u32 *addrs, bool extra_pass);
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_realloc_regs(struct codegen_context *ctx);
+
 #endif
 
 #endif
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
deleted file mode 100644 (file)
index 448dfd4..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * bpf_jit32.h: BPF JIT compiler for PPC
- *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
- *
- * Split from bpf_jit.h
- */
-#ifndef _BPF_JIT32_H
-#define _BPF_JIT32_H
-
-#include <asm/asm-compat.h>
-#include "bpf_jit.h"
-
-#ifdef CONFIG_PPC64
-#define BPF_PPC_STACK_R3_OFF   48
-#define BPF_PPC_STACK_LOCALS   32
-#define BPF_PPC_STACK_BASIC    (48+64)
-#define BPF_PPC_STACK_SAVE     (18*8)
-#define BPF_PPC_STACKFRAME     (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
-                                BPF_PPC_STACK_SAVE)
-#define BPF_PPC_SLOWPATH_FRAME (48+64)
-#else
-#define BPF_PPC_STACK_R3_OFF   24
-#define BPF_PPC_STACK_LOCALS   16
-#define BPF_PPC_STACK_BASIC    (24+32)
-#define BPF_PPC_STACK_SAVE     (18*4)
-#define BPF_PPC_STACKFRAME     (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
-                                BPF_PPC_STACK_SAVE)
-#define BPF_PPC_SLOWPATH_FRAME (24+32)
-#endif
-
-#define REG_SZ         (BITS_PER_LONG/8)
-
-/*
- * Generated code register usage:
- *
- * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with:
- *
- * skb         r3      (Entry parameter)
- * A register  r4
- * X register  r5
- * addr param  r6
- * r7-r10      scratch
- * skb->data   r14
- * skb headlen r15     (skb->len - skb->data_len)
- * m[0]                r16
- * m[...]      ...
- * m[15]       r31
- */
-#define r_skb          3
-#define r_ret          3
-#define r_A            4
-#define r_X            5
-#define r_addr         6
-#define r_scratch1     7
-#define r_scratch2     8
-#define r_D            14
-#define r_HL           15
-#define r_M            16
-
-#ifndef __ASSEMBLY__
-
-/*
- * Assembly helpers from arch/powerpc/net/bpf_jit.S:
- */
-#define DECLARE_LOAD_FUNC(func)        \
-       extern u8 func[], func##_negative_offset[], func##_positive_offset[]
-
-DECLARE_LOAD_FUNC(sk_load_word);
-DECLARE_LOAD_FUNC(sk_load_half);
-DECLARE_LOAD_FUNC(sk_load_byte);
-DECLARE_LOAD_FUNC(sk_load_byte_msh);
-
-#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LBZ(r, base, i));   \
-               else {  EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));              \
-                       EMIT(PPC_RAW_LBZ(r, r, IMM_L(i))); } } while(0)
-
-#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LD(r, base, i));     \
-               else {  EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));                        \
-                       EMIT(PPC_RAW_LD(r, r, IMM_L(i))); } } while(0)
-
-#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LWZ(r, base, i));   \
-               else {  EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));                        \
-                       EMIT(PPC_RAW_LWZ(r, r, IMM_L(i))); } } while(0)
-
-#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LHZ(r, base, i));   \
-               else {  EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));                        \
-                       EMIT(PPC_RAW_LHZ(r, r, IMM_L(i))); } } while(0)
-
-#ifdef CONFIG_PPC64
-#define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0)
-#else
-#define PPC_LL_OFFS(r, base, i) do { PPC_LWZ_OFFS(r, base, i); } while(0)
-#endif
-
-#ifdef CONFIG_SMP
-#ifdef CONFIG_PPC64
-#define PPC_BPF_LOAD_CPU(r)            \
-       do { BUILD_BUG_ON(sizeof_field(struct paca_struct, paca_index) != 2);   \
-               PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index));  \
-       } while (0)
-#else
-#define PPC_BPF_LOAD_CPU(r)     \
-       do { BUILD_BUG_ON(sizeof_field(struct task_struct, cpu) != 4);          \
-               PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu));          \
-       } while(0)
-#endif
-#else
-#define PPC_BPF_LOAD_CPU(r) do { EMIT(PPC_RAW_LI(r, 0)); } while(0)
-#endif
-
-#define PPC_LHBRX_OFFS(r, base, i) \
-               do { PPC_LI32(r, i); EMIT(PPC_RAW_LHBRX(r, r, base)); } while(0)
-#ifdef __LITTLE_ENDIAN__
-#define PPC_NTOHS_OFFS(r, base, i)     PPC_LHBRX_OFFS(r, base, i)
-#else
-#define PPC_NTOHS_OFFS(r, base, i)     PPC_LHZ_OFFS(r, base, i)
-#endif
-
-#define PPC_BPF_LL(r, base, i) do { EMIT(PPC_RAW_LWZ(r, base, i)); } while(0)
-#define PPC_BPF_STL(r, base, i) do { EMIT(PPC_RAW_STW(r, base, i)); } while(0)
-#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STWU(r, base, i)); } while(0)
-
-#define SEEN_DATAREF 0x10000 /* might call external helpers */
-#define SEEN_XREG    0x20000 /* X reg is used */
-#define SEEN_MEM     0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary
-                             * storage */
-#define SEEN_MEM_MSK 0x0ffff
-
-struct codegen_context {
-       unsigned int seen;
-       unsigned int idx;
-       int pc_ret0; /* bpf index of first RET #0 instruction (if any) */
-};
-
-#endif
-
-#endif
index 2e33c66..7b713ed 100644 (file)
@@ -39,7 +39,7 @@
 #define TMP_REG_2      (MAX_BPF_JIT_REG + 1)
 
 /* BPF to ppc register mappings */
-static const int b2p[] = {
+const int b2p[MAX_BPF_JIT_REG + 2] = {
        /* function return value */
        [BPF_REG_0] = 8,
        /* function arguments */
@@ -86,25 +86,6 @@ static const int b2p[] = {
                                } while(0)
 #define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0)
 
-#define SEEN_FUNC      0x1000 /* might call external helpers */
-#define SEEN_STACK     0x2000 /* uses BPF stack */
-#define SEEN_TAILCALL  0x4000 /* uses tail calls */
-
-struct codegen_context {
-       /*
-        * This is used to track register usage as well
-        * as calls to external helpers.
-        * - register usage is tracked with corresponding
-        *   bits (r3-r10 and r27-r31)
-        * - rest of the bits can be used to track other
-        *   things -- for now, we use bits 16 to 23
-        *   encoded in SEEN_* macros above
-        */
-       unsigned int seen;
-       unsigned int idx;
-       unsigned int stack_size;
-};
-
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff --git a/arch/powerpc/net/bpf_jit_asm.S b/arch/powerpc/net/bpf_jit_asm.S
deleted file mode 100644 (file)
index 2f5030d..0000000
+++ /dev/null
@@ -1,226 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* bpf_jit.S: Packet/header access helper functions
- * for PPC64 BPF compiler.
- *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/asm-compat.h>
-#include "bpf_jit32.h"
-
-/*
- * All of these routines are called directly from generated code,
- * whose register usage is:
- *
- * r3          skb
- * r4,r5       A,X
- * r6          *** address parameter to helper ***
- * r7-r10      scratch
- * r14         skb->data
- * r15         skb headlen
- * r16-31      M[]
- */
-
-/*
- * To consider: These helpers are so small it could be better to just
- * generate them inline.  Inline code can do the simple headlen check
- * then branch directly to slow_path_XXX if required.  (In fact, could
- * load a spare GPR with the address of slow_path_generic and pass size
- * as an argument, making the call site a mtlr, li and bllr.)
- */
-       .globl  sk_load_word
-sk_load_word:
-       PPC_LCMPI       r_addr, 0
-       blt     bpf_slow_path_word_neg
-       .globl  sk_load_word_positive_offset
-sk_load_word_positive_offset:
-       /* Are we accessing past headlen? */
-       subi    r_scratch1, r_HL, 4
-       PPC_LCMP        r_scratch1, r_addr
-       blt     bpf_slow_path_word
-       /* Nope, just hitting the header.  cr0 here is eq or gt! */
-#ifdef __LITTLE_ENDIAN__
-       lwbrx   r_A, r_D, r_addr
-#else
-       lwzx    r_A, r_D, r_addr
-#endif
-       blr     /* Return success, cr0 != LT */
-
-       .globl  sk_load_half
-sk_load_half:
-       PPC_LCMPI       r_addr, 0
-       blt     bpf_slow_path_half_neg
-       .globl  sk_load_half_positive_offset
-sk_load_half_positive_offset:
-       subi    r_scratch1, r_HL, 2
-       PPC_LCMP        r_scratch1, r_addr
-       blt     bpf_slow_path_half
-#ifdef __LITTLE_ENDIAN__
-       lhbrx   r_A, r_D, r_addr
-#else
-       lhzx    r_A, r_D, r_addr
-#endif
-       blr
-
-       .globl  sk_load_byte
-sk_load_byte:
-       PPC_LCMPI       r_addr, 0
-       blt     bpf_slow_path_byte_neg
-       .globl  sk_load_byte_positive_offset
-sk_load_byte_positive_offset:
-       PPC_LCMP        r_HL, r_addr
-       ble     bpf_slow_path_byte
-       lbzx    r_A, r_D, r_addr
-       blr
-
-/*
- * BPF_LDX | BPF_B | BPF_MSH: ldxb  4*([offset]&0xf)
- * r_addr is the offset value
- */
-       .globl sk_load_byte_msh
-sk_load_byte_msh:
-       PPC_LCMPI       r_addr, 0
-       blt     bpf_slow_path_byte_msh_neg
-       .globl sk_load_byte_msh_positive_offset
-sk_load_byte_msh_positive_offset:
-       PPC_LCMP        r_HL, r_addr
-       ble     bpf_slow_path_byte_msh
-       lbzx    r_X, r_D, r_addr
-       rlwinm  r_X, r_X, 2, 32-4-2, 31-2
-       blr
-
-/* Call out to skb_copy_bits:
- * We'll need to back up our volatile regs first; we have
- * local variable space at r1+(BPF_PPC_STACK_BASIC).
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define bpf_slow_path_common(SIZE)                             \
-       mflr    r0;                                             \
-       PPC_STL r0, PPC_LR_STKOFF(r1);                                  \
-       /* R3 goes in parameter space of caller's frame */      \
-       PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);           \
-       PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);              \
-       PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);              \
-       addi    r5, r1, BPF_PPC_STACK_BASIC+(2*REG_SZ);         \
-       PPC_STLU        r1, -BPF_PPC_SLOWPATH_FRAME(r1);                \
-       /* R3 = r_skb, as passed */                             \
-       mr      r4, r_addr;                                     \
-       li      r6, SIZE;                                       \
-       bl      skb_copy_bits;                                  \
-       nop;                                                    \
-       /* R3 = 0 on success */                                 \
-       addi    r1, r1, BPF_PPC_SLOWPATH_FRAME;                 \
-       PPC_LL  r0, PPC_LR_STKOFF(r1);                                  \
-       PPC_LL  r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);              \
-       PPC_LL  r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);              \
-       mtlr    r0;                                             \
-       PPC_LCMPI       r3, 0;                                          \
-       blt     bpf_error;      /* cr0 = LT */                  \
-       PPC_LL  r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);           \
-       /* Great success! */
-
-bpf_slow_path_word:
-       bpf_slow_path_common(4)
-       /* Data value is on stack, and cr0 != LT */
-       lwz     r_A, BPF_PPC_STACK_BASIC+(2*REG_SZ)(r1)
-       blr
-
-bpf_slow_path_half:
-       bpf_slow_path_common(2)
-       lhz     r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
-       blr
-
-bpf_slow_path_byte:
-       bpf_slow_path_common(1)
-       lbz     r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
-       blr
-
-bpf_slow_path_byte_msh:
-       bpf_slow_path_common(1)
-       lbz     r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
-       rlwinm  r_X, r_X, 2, 32-4-2, 31-2
-       blr
-
-/* Call out to bpf_internal_load_pointer_neg_helper:
- * We'll need to back up our volatile regs first; we have
- * local variable space at r1+(BPF_PPC_STACK_BASIC).
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define sk_negative_common(SIZE)                               \
-       mflr    r0;                                             \
-       PPC_STL r0, PPC_LR_STKOFF(r1);                                  \
-       /* R3 goes in parameter space of caller's frame */      \
-       PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);           \
-       PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);              \
-       PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);              \
-       PPC_STLU        r1, -BPF_PPC_SLOWPATH_FRAME(r1);                \
-       /* R3 = r_skb, as passed */                             \
-       mr      r4, r_addr;                                     \
-       li      r5, SIZE;                                       \
-       bl      bpf_internal_load_pointer_neg_helper;           \
-       nop;                                                    \
-       /* R3 != 0 on success */                                \
-       addi    r1, r1, BPF_PPC_SLOWPATH_FRAME;                 \
-       PPC_LL  r0, PPC_LR_STKOFF(r1);                                  \
-       PPC_LL  r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);              \
-       PPC_LL  r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);              \
-       mtlr    r0;                                             \
-       PPC_LCMPLI      r3, 0;                                          \
-       beq     bpf_error_slow; /* cr0 = EQ */                  \
-       mr      r_addr, r3;                                     \
-       PPC_LL  r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);           \
-       /* Great success! */
-
-bpf_slow_path_word_neg:
-       lis     r_scratch1,-32  /* SKF_LL_OFF */
-       PPC_LCMP        r_addr, r_scratch1      /* addr < SKF_* */
-       blt     bpf_error       /* cr0 = LT */
-       .globl  sk_load_word_negative_offset
-sk_load_word_negative_offset:
-       sk_negative_common(4)
-       lwz     r_A, 0(r_addr)
-       blr
-
-bpf_slow_path_half_neg:
-       lis     r_scratch1,-32  /* SKF_LL_OFF */
-       PPC_LCMP        r_addr, r_scratch1      /* addr < SKF_* */
-       blt     bpf_error       /* cr0 = LT */
-       .globl  sk_load_half_negative_offset
-sk_load_half_negative_offset:
-       sk_negative_common(2)
-       lhz     r_A, 0(r_addr)
-       blr
-
-bpf_slow_path_byte_neg:
-       lis     r_scratch1,-32  /* SKF_LL_OFF */
-       PPC_LCMP        r_addr, r_scratch1      /* addr < SKF_* */
-       blt     bpf_error       /* cr0 = LT */
-       .globl  sk_load_byte_negative_offset
-sk_load_byte_negative_offset:
-       sk_negative_common(1)
-       lbz     r_A, 0(r_addr)
-       blr
-
-bpf_slow_path_byte_msh_neg:
-       lis     r_scratch1,-32  /* SKF_LL_OFF */
-       PPC_LCMP        r_addr, r_scratch1      /* addr < SKF_* */
-       blt     bpf_error       /* cr0 = LT */
-       .globl  sk_load_byte_msh_negative_offset
-sk_load_byte_msh_negative_offset:
-       sk_negative_common(1)
-       lbz     r_X, 0(r_addr)
-       rlwinm  r_X, r_X, 2, 32-4-2, 31-2
-       blr
-
-bpf_error_slow:
-       /* fabricate a cr0 = lt */
-       li      r_scratch1, -1
-       PPC_LCMPI       r_scratch1, 0
-bpf_error:
-       /* Entered with cr0 = lt */
-       li      r3, 0
-       /* Generated code will 'blt epilogue', returning 0. */
-       blr
index e809cb5..798ac43 100644 (file)
@@ -1,10 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/* bpf_jit_comp.c: BPF JIT compiler
+/*
+ * eBPF JIT compiler
  *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
+ * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ *               IBM Corporation
  *
- * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com)
- * Ported to ppc32 by Denis Kirjanov <kda@linux-powerpc.org>
+ * Based on the powerpc classic BPF JIT compiler by Matt Evans
  */
 #include <linux/moduleloader.h>
 #include <asm/cacheflush.h>
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
+#include <asm/kprobes.h>
+#include <linux/bpf.h>
 
-#include "bpf_jit32.h"
+#include "bpf_jit.h"
 
-static inline void bpf_flush_icache(void *start, void *end)
+static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
 {
-       smp_wmb();
-       flush_icache_range((unsigned long)start, (unsigned long)end);
+       memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
 }
 
-static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
-                                  struct codegen_context *ctx)
+/* Fix the branch target addresses for subprog calls */
+static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
+                                      struct codegen_context *ctx, u32 *addrs)
 {
-       int i;
-       const struct sock_filter *filter = fp->insns;
-
-       if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
-               /* Make stackframe */
-               if (ctx->seen & SEEN_DATAREF) {
-                       /* If we call any helpers (for loads), save LR */
-                       EMIT(PPC_INST_MFLR | __PPC_RT(R0));
-                       PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
-
-                       /* Back up non-volatile regs. */
-                       PPC_BPF_STL(r_D, 1, -(REG_SZ*(32-r_D)));
-                       PPC_BPF_STL(r_HL, 1, -(REG_SZ*(32-r_HL)));
-               }
-               if (ctx->seen & SEEN_MEM) {
-                       /*
-                        * Conditionally save regs r15-r31 as some will be used
-                        * for M[] data.
-                        */
-                       for (i = r_M; i < (r_M+16); i++) {
-                               if (ctx->seen & (1 << (i-r_M)))
-                                       PPC_BPF_STL(i, 1, -(REG_SZ*(32-i)));
-                       }
-               }
-               PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME);
-       }
-
-       if (ctx->seen & SEEN_DATAREF) {
-               /*
-                * If this filter needs to access skb data,
-                * prepare r_D and r_HL:
-                *  r_HL = skb->len - skb->data_len
-                *  r_D  = skb->data
-                */
-               PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
-                                                        data_len));
-               PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len));
-               EMIT(PPC_RAW_SUB(r_HL, r_HL, r_scratch1));
-               PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data));
-       }
+       const struct bpf_insn *insn = fp->insnsi;
+       bool func_addr_fixed;
+       u64 func_addr;
+       u32 tmp_idx;
+       int i, ret;
 
-       if (ctx->seen & SEEN_XREG) {
+       for (i = 0; i < fp->len; i++) {
                /*
-                * TODO: Could also detect whether first instr. sets X and
-                * avoid this (as below, with A).
+                * During the extra pass, only the branch target addresses for
+                * the subprog calls need to be fixed. All other instructions
+                * can left untouched.
+                *
+                * The JITed image length does not change because we already
+                * ensure that the JITed instruction sequence for these calls
+                * are of fixed length by padding them with NOPs.
                 */
-               EMIT(PPC_RAW_LI(r_X, 0));
-       }
-
-       /* make sure we dont leak kernel information to user */
-       if (bpf_needs_clear_a(&filter[0]))
-               EMIT(PPC_RAW_LI(r_A, 0));
-}
-
-static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
-{
-       int i;
-
-       if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
-               EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME));
-               if (ctx->seen & SEEN_DATAREF) {
-                       PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
-                       EMIT(PPC_RAW_MTLR(0));
-                       PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D)));
-                       PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL)));
-               }
-               if (ctx->seen & SEEN_MEM) {
-                       /* Restore any saved non-vol registers */
-                       for (i = r_M; i < (r_M+16); i++) {
-                               if (ctx->seen & (1 << (i-r_M)))
-                                       PPC_BPF_LL(i, 1, -(REG_SZ*(32-i)));
-                       }
-               }
-       }
-       /* The RETs have left a return value in R3. */
-
-       EMIT(PPC_RAW_BLR());
-}
-
-#define CHOOSE_LOAD_FUNC(K, func) \
-       ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
-
-/* Assemble the body code between the prologue & epilogue. */
-static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
-                             struct codegen_context *ctx,
-                             unsigned int *addrs)
-{
-       const struct sock_filter *filter = fp->insns;
-       int flen = fp->len;
-       u8 *func;
-       unsigned int true_cond;
-       int i;
-
-       /* Start of epilogue code */
-       unsigned int exit_addr = addrs[flen];
-
-       for (i = 0; i < flen; i++) {
-               unsigned int K = filter[i].k;
-               u16 code = bpf_anc_helper(&filter[i]);
+               if (insn[i].code == (BPF_JMP | BPF_CALL) &&
+                   insn[i].src_reg == BPF_PSEUDO_CALL) {
+                       ret = bpf_jit_get_func_addr(fp, &insn[i], true,
+                                                   &func_addr,
+                                                   &func_addr_fixed);
+                       if (ret < 0)
+                               return ret;
 
-               /*
-                * addrs[] maps a BPF bytecode address into a real offset from
-                * the start of the body code.
-                */
-               addrs[i] = ctx->idx * 4;
-
-               switch (code) {
-                       /*** ALU ops ***/
-               case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */
-                       ctx->seen |= SEEN_XREG;
-                       EMIT(PPC_RAW_ADD(r_A, r_A, r_X));
-                       break;
-               case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */
-                       if (!K)
-                               break;
-                       EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(K)));
-                       if (K >= 32768)
-                               EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(K)));
-                       break;
-               case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */
-                       ctx->seen |= SEEN_XREG;
-                       EMIT(PPC_RAW_SUB(r_A, r_A, r_X));
-                       break;
-               case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
-                       if (!K)
-                               break;
-                       EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(-K)));
-                       if (K >= 32768)
-                               EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(-K)));
-                       break;
-               case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */
-                       ctx->seen |= SEEN_XREG;
-                       EMIT(PPC_RAW_MULW(r_A, r_A, r_X));
-                       break;
-               case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
-                       if (K < 32768)
-                               EMIT(PPC_RAW_MULI(r_A, r_A, K));
-                       else {
-                               PPC_LI32(r_scratch1, K);
-                               EMIT(PPC_RAW_MULW(r_A, r_A, r_scratch1));
-                       }
-                       break;
-               case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */
-               case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */
-                       ctx->seen |= SEEN_XREG;
-                       EMIT(PPC_RAW_CMPWI(r_X, 0));
-                       if (ctx->pc_ret0 != -1) {
-                               PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
-                       } else {
-                               PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
-                               EMIT(PPC_RAW_LI(r_ret, 0));
-                               PPC_JMP(exit_addr);
-                       }
-                       if (code == (BPF_ALU | BPF_MOD | BPF_X)) {
-                               EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_X));
-                               EMIT(PPC_RAW_MULW(r_scratch1, r_X, r_scratch1));
-                               EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1));
-                       } else {
-                               EMIT(PPC_RAW_DIVWU(r_A, r_A, r_X));
-                       }
-                       break;
-               case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */
-                       PPC_LI32(r_scratch2, K);
-                       EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_scratch2));
-                       EMIT(PPC_RAW_MULW(r_scratch1, r_scratch2, r_scratch1));
-                       EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1));
-                       break;
-               case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
-                       if (K == 1)
-                               break;
-                       PPC_LI32(r_scratch1, K);
-                       EMIT(PPC_RAW_DIVWU(r_A, r_A, r_scratch1));
-                       break;
-               case BPF_ALU | BPF_AND | BPF_X:
-                       ctx->seen |= SEEN_XREG;
-                       EMIT(PPC_RAW_AND(r_A, r_A, r_X));
-                       break;
-               case BPF_ALU | BPF_AND | BPF_K:
-                       if (!IMM_H(K))
-                               EMIT(PPC_RAW_ANDI(r_A, r_A, K));
-                       else {
-                               PPC_LI32(r_scratch1, K);
-                               EMIT(PPC_RAW_AND(r_A, r_A, r_scratch1));
-                       }
-                       break;
-               case BPF_ALU | BPF_OR | BPF_X:
-                       ctx->seen |= SEEN_XREG;
-                       EMIT(PPC_RAW_OR(r_A, r_A, r_X));
-                       break;
-               case BPF_ALU | BPF_OR | BPF_K:
-                       if (IMM_L(K))
-                               EMIT(PPC_RAW_ORI(r_A, r_A, IMM_L(K)));
-                       if (K >= 65536)
-                               EMIT(PPC_RAW_ORIS(r_A, r_A, IMM_H(K)));
-                       break;
-               case BPF_ANC | SKF_AD_ALU_XOR_X:
-               case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */
-                       ctx->seen |= SEEN_XREG;
-                       EMIT(PPC_RAW_XOR(r_A, r_A, r_X));
-                       break;
-               case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
-                       if (IMM_L(K))
-                               EMIT(PPC_RAW_XORI(r_A, r_A, IMM_L(K)));
-                       if (K >= 65536)
-                               EMIT(PPC_RAW_XORIS(r_A, r_A, IMM_H(K)));
-                       break;
-               case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
-                       ctx->seen |= SEEN_XREG;
-                       EMIT(PPC_RAW_SLW(r_A, r_A, r_X));
-                       break;
-               case BPF_ALU | BPF_LSH | BPF_K:
-                       if (K == 0)
-                               break;
-                       else
-                               EMIT(PPC_RAW_SLWI(r_A, r_A, K));
-                       break;
-               case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
-                       ctx->seen |= SEEN_XREG;
-                       EMIT(PPC_RAW_SRW(r_A, r_A, r_X));
-                       break;
-               case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
-                       if (K == 0)
-                               break;
-                       else
-                               EMIT(PPC_RAW_SRWI(r_A, r_A, K));
-                       break;
-               case BPF_ALU | BPF_NEG:
-                       EMIT(PPC_RAW_NEG(r_A, r_A));
-                       break;
-               case BPF_RET | BPF_K:
-                       PPC_LI32(r_ret, K);
-                       if (!K) {
-                               if (ctx->pc_ret0 == -1)
-                                       ctx->pc_ret0 = i;
-                       }
-                       /*
-                        * If this isn't the very last instruction, branch to
-                        * the epilogue if we've stuff to clean up.  Otherwise,
-                        * if there's nothing to tidy, just return.  If we /are/
-                        * the last instruction, we're about to fall through to
-                        * the epilogue to return.
-                        */
-                       if (i != flen - 1) {
-                               /*
-                                * Note: 'seen' is properly valid only on pass
-                                * #2.  Both parts of this conditional are the
-                                * same instruction size though, meaning the
-                                * first pass will still correctly determine the
-                                * code size/addresses.
-                                */
-                               if (ctx->seen)
-                                       PPC_JMP(exit_addr);
-                               else
-                                       EMIT(PPC_RAW_BLR());
-                       }
-                       break;
-               case BPF_RET | BPF_A:
-                       EMIT(PPC_RAW_MR(r_ret, r_A));
-                       if (i != flen - 1) {
-                               if (ctx->seen)
-                                       PPC_JMP(exit_addr);
-                               else
-                                       EMIT(PPC_RAW_BLR());
-                       }
-                       break;
-               case BPF_MISC | BPF_TAX: /* X = A */
-                       EMIT(PPC_RAW_MR(r_X, r_A));
-                       break;
-               case BPF_MISC | BPF_TXA: /* A = X */
-                       ctx->seen |= SEEN_XREG;
-                       EMIT(PPC_RAW_MR(r_A, r_X));
-                       break;
-
-                       /*** Constant loads/M[] access ***/
-               case BPF_LD | BPF_IMM: /* A = K */
-                       PPC_LI32(r_A, K);
-                       break;
-               case BPF_LDX | BPF_IMM: /* X = K */
-                       PPC_LI32(r_X, K);
-                       break;
-               case BPF_LD | BPF_MEM: /* A = mem[K] */
-                       EMIT(PPC_RAW_MR(r_A, r_M + (K & 0xf)));
-                       ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
-                       break;
-               case BPF_LDX | BPF_MEM: /* X = mem[K] */
-                       EMIT(PPC_RAW_MR(r_X, r_M + (K & 0xf)));
-                       ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
-                       break;
-               case BPF_ST: /* mem[K] = A */
-                       EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_A));
-                       ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
-                       break;
-               case BPF_STX: /* mem[K] = X */
-                       EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_X));
-                       ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
-                       break;
-               case BPF_LD | BPF_W | BPF_LEN: /*       A = skb->len; */
-                       BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4);
-                       PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
-                       break;
-               case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */
-                       PPC_LWZ_OFFS(r_A, r_skb, K);
-                       break;
-               case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
-                       PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
-                       break;
-
-                       /*** Ancillary info loads ***/
-               case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
-                       BUILD_BUG_ON(sizeof_field(struct sk_buff,
-                                                 protocol) != 2);
-                       PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
-                                                           protocol));
-                       break;
-               case BPF_ANC | SKF_AD_IFINDEX:
-               case BPF_ANC | SKF_AD_HATYPE:
-                       BUILD_BUG_ON(sizeof_field(struct net_device,
-                                               ifindex) != 4);
-                       BUILD_BUG_ON(sizeof_field(struct net_device,
-                                               type) != 2);
-                       PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
-                                                               dev));
-                       EMIT(PPC_RAW_CMPDI(r_scratch1, 0));
-                       if (ctx->pc_ret0 != -1) {
-                               PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
-                       } else {
-                               /* Exit, returning 0; first pass hits here. */
-                               PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12);
-                               EMIT(PPC_RAW_LI(r_ret, 0));
-                               PPC_JMP(exit_addr);
-                       }
-                       if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
-                               PPC_LWZ_OFFS(r_A, r_scratch1,
-                                    offsetof(struct net_device, ifindex));
-                       } else {
-                               PPC_LHZ_OFFS(r_A, r_scratch1,
-                                    offsetof(struct net_device, type));
-                       }
-
-                       break;
-               case BPF_ANC | SKF_AD_MARK:
-                       BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
-                       PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
-                                                         mark));
-                       break;
-               case BPF_ANC | SKF_AD_RXHASH:
-                       BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
-                       PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
-                                                         hash));
-                       break;
-               case BPF_ANC | SKF_AD_VLAN_TAG:
-                       BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
-
-                       PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
-                                                         vlan_tci));
-                       break;
-               case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
-                       PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET());
-                       if (PKT_VLAN_PRESENT_BIT)
-                               EMIT(PPC_RAW_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT));
-                       if (PKT_VLAN_PRESENT_BIT < 7)
-                               EMIT(PPC_RAW_ANDI(r_A, r_A, 1));
-                       break;
-               case BPF_ANC | SKF_AD_QUEUE:
-                       BUILD_BUG_ON(sizeof_field(struct sk_buff,
-                                                 queue_mapping) != 2);
-                       PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
-                                                         queue_mapping));
-                       break;
-               case BPF_ANC | SKF_AD_PKTTYPE:
-                       PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET());
-                       EMIT(PPC_RAW_ANDI(r_A, r_A, PKT_TYPE_MAX));
-                       EMIT(PPC_RAW_SRWI(r_A, r_A, 5));
-                       break;
-               case BPF_ANC | SKF_AD_CPU:
-                       PPC_BPF_LOAD_CPU(r_A);
-                       break;
-                       /*** Absolute loads from packet header/data ***/
-               case BPF_LD | BPF_W | BPF_ABS:
-                       func = CHOOSE_LOAD_FUNC(K, sk_load_word);
-                       goto common_load;
-               case BPF_LD | BPF_H | BPF_ABS:
-                       func = CHOOSE_LOAD_FUNC(K, sk_load_half);
-                       goto common_load;
-               case BPF_LD | BPF_B | BPF_ABS:
-                       func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
-               common_load:
-                       /* Load from [K]. */
-                       ctx->seen |= SEEN_DATAREF;
-                       PPC_FUNC_ADDR(r_scratch1, func);
-                       EMIT(PPC_RAW_MTLR(r_scratch1));
-                       PPC_LI32(r_addr, K);
-                       EMIT(PPC_RAW_BLRL());
                        /*
-                        * Helper returns 'lt' condition on error, and an
-                        * appropriate return value in r3
+                        * Save ctx->idx as this would currently point to the
+                        * end of the JITed image and set it to the offset of
+                        * the instruction sequence corresponding to the
+                        * subprog call temporarily.
                         */
-                       PPC_BCC(COND_LT, exit_addr);
-                       break;
-
-                       /*** Indirect loads from packet header/data ***/
-               case BPF_LD | BPF_W | BPF_IND:
-                       func = sk_load_word;
-                       goto common_load_ind;
-               case BPF_LD | BPF_H | BPF_IND:
-                       func = sk_load_half;
-                       goto common_load_ind;
-               case BPF_LD | BPF_B | BPF_IND:
-                       func = sk_load_byte;
-               common_load_ind:
+                       tmp_idx = ctx->idx;
+                       ctx->idx = addrs[i] / 4;
+                       bpf_jit_emit_func_call_rel(image, ctx, func_addr);
+
                        /*
-                        * Load from [X + K].  Negative offsets are tested for
-                        * in the helper functions.
-                        */
-                       ctx->seen |= SEEN_DATAREF | SEEN_XREG;
-                       PPC_FUNC_ADDR(r_scratch1, func);
-                       EMIT(PPC_RAW_MTLR(r_scratch1));
-                       EMIT(PPC_RAW_ADDI(r_addr, r_X, IMM_L(K)));
-                       if (K >= 32768)
-                               EMIT(PPC_RAW_ADDIS(r_addr, r_addr, IMM_HA(K)));
-                       EMIT(PPC_RAW_BLRL());
-                       /* If error, cr0.LT set */
-                       PPC_BCC(COND_LT, exit_addr);
-                       break;
-
-               case BPF_LDX | BPF_B | BPF_MSH:
-                       func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
-                       goto common_load;
-                       break;
-
-                       /*** Jump and branches ***/
-               case BPF_JMP | BPF_JA:
-                       if (K != 0)
-                               PPC_JMP(addrs[i + 1 + K]);
-                       break;
-
-               case BPF_JMP | BPF_JGT | BPF_K:
-               case BPF_JMP | BPF_JGT | BPF_X:
-                       true_cond = COND_GT;
-                       goto cond_branch;
-               case BPF_JMP | BPF_JGE | BPF_K:
-               case BPF_JMP | BPF_JGE | BPF_X:
-                       true_cond = COND_GE;
-                       goto cond_branch;
-               case BPF_JMP | BPF_JEQ | BPF_K:
-               case BPF_JMP | BPF_JEQ | BPF_X:
-                       true_cond = COND_EQ;
-                       goto cond_branch;
-               case BPF_JMP | BPF_JSET | BPF_K:
-               case BPF_JMP | BPF_JSET | BPF_X:
-                       true_cond = COND_NE;
-               cond_branch:
-                       /* same targets, can avoid doing the test :) */
-                       if (filter[i].jt == filter[i].jf) {
-                               if (filter[i].jt > 0)
-                                       PPC_JMP(addrs[i + 1 + filter[i].jt]);
-                               break;
-                       }
-
-                       switch (code) {
-                       case BPF_JMP | BPF_JGT | BPF_X:
-                       case BPF_JMP | BPF_JGE | BPF_X:
-                       case BPF_JMP | BPF_JEQ | BPF_X:
-                               ctx->seen |= SEEN_XREG;
-                               EMIT(PPC_RAW_CMPLW(r_A, r_X));
-                               break;
-                       case BPF_JMP | BPF_JSET | BPF_X:
-                               ctx->seen |= SEEN_XREG;
-                               EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, r_X));
-                               break;
-                       case BPF_JMP | BPF_JEQ | BPF_K:
-                       case BPF_JMP | BPF_JGT | BPF_K:
-                       case BPF_JMP | BPF_JGE | BPF_K:
-                               if (K < 32768)
-                                       EMIT(PPC_RAW_CMPLWI(r_A, K));
-                               else {
-                                       PPC_LI32(r_scratch1, K);
-                                       EMIT(PPC_RAW_CMPLW(r_A, r_scratch1));
-                               }
-                               break;
-                       case BPF_JMP | BPF_JSET | BPF_K:
-                               if (K < 32768)
-                                       /* PPC_ANDI is /only/ dot-form */
-                                       EMIT(PPC_RAW_ANDI(r_scratch1, r_A, K));
-                               else {
-                                       PPC_LI32(r_scratch1, K);
-                                       EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A,
-                                                   r_scratch1));
-                               }
-                               break;
-                       }
-                       /* Sometimes branches are constructed "backward", with
-                        * the false path being the branch and true path being
-                        * a fallthrough to the next instruction.
+                        * Restore ctx->idx here. This is safe as the length
+                        * of the JITed sequence remains unchanged.
                         */
-                       if (filter[i].jt == 0)
-                               /* Swap the sense of the branch */
-                               PPC_BCC(true_cond ^ COND_CMP_TRUE,
-                                       addrs[i + 1 + filter[i].jf]);
-                       else {
-                               PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]);
-                               if (filter[i].jf != 0)
-                                       PPC_JMP(addrs[i + 1 + filter[i].jf]);
-                       }
-                       break;
-               default:
-                       /* The filter contains something cruel & unusual.
-                        * We don't handle it, but also there shouldn't be
-                        * anything missing from our list.
-                        */
-                       if (printk_ratelimit())
-                               pr_err("BPF filter opcode %04x (@%d) unsupported\n",
-                                      filter[i].code, i);
-                       return -ENOTSUPP;
+                       ctx->idx = tmp_idx;
                }
-
        }
-       /* Set end-of-body-code address for exit. */
-       addrs[i] = ctx->idx * 4;
 
        return 0;
 }
 
-void bpf_jit_compile(struct bpf_prog *fp)
+struct powerpc64_jit_data {
+       struct bpf_binary_header *header;
+       u32 *addrs;
+       u8 *image;
+       u32 proglen;
+       struct codegen_context ctx;
+};
+
+bool bpf_jit_needs_zext(void)
+{
+       return true;
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 {
-       unsigned int proglen;
-       unsigned int alloclen;
-       u32 *image = NULL;
+       u32 proglen;
+       u32 alloclen;
+       u8 *image = NULL;
        u32 *code_base;
-       unsigned int *addrs;
+       u32 *addrs;
+       struct powerpc64_jit_data *jit_data;
        struct codegen_context cgctx;
        int pass;
-       int flen = fp->len;
+       int flen;
+       struct bpf_binary_header *bpf_hdr;
+       struct bpf_prog *org_fp = fp;
+       struct bpf_prog *tmp_fp;
+       bool bpf_blinded = false;
+       bool extra_pass = false;
+
+       if (!fp->jit_requested)
+               return org_fp;
+
+       tmp_fp = bpf_jit_blind_constants(org_fp);
+       if (IS_ERR(tmp_fp))
+               return org_fp;
+
+       if (tmp_fp != org_fp) {
+               bpf_blinded = true;
+               fp = tmp_fp;
+       }
+
+       jit_data = fp->aux->jit_data;
+       if (!jit_data) {
+               jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+               if (!jit_data) {
+                       fp = org_fp;
+                       goto out;
+               }
+               fp->aux->jit_data = jit_data;
+       }
 
-       if (!bpf_jit_enable)
-               return;
+       flen = fp->len;
+       addrs = jit_data->addrs;
+       if (addrs) {
+               cgctx = jit_data->ctx;
+               image = jit_data->image;
+               bpf_hdr = jit_data->header;
+               proglen = jit_data->proglen;
+               alloclen = proglen + FUNCTION_DESCR_SIZE;
+               extra_pass = true;
+               goto skip_init_ctx;
+       }
 
        addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
-       if (addrs == NULL)
-               return;
+       if (addrs == NULL) {
+               fp = org_fp;
+               goto out_addrs;
+       }
 
-       /*
-        * There are multiple assembly passes as the generated code will change
-        * size as it settles down, figuring out the max branch offsets/exit
-        * paths required.
-        *
-        * The range of standard conditional branches is +/- 32Kbytes.  Since
-        * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to
-        * finish with 8 bytes/instruction.  Not feasible, so long jumps are
-        * used, distinct from short branches.
-        *
-        * Current:
-        *
-        * For now, both branch types assemble to 2 words (short branches padded
-        * with a NOP); this is less efficient, but assembly will always complete
-        * after exactly 3 passes:
-        *
-        * First pass: No code buffer; Program is "faux-generated" -- no code
-        * emitted but maximum size of output determined (and addrs[] filled
-        * in).  Also, we note whether we use M[], whether we use skb data, etc.
-        * All generation choices assumed to be 'worst-case', e.g. branches all
-        * far (2 instructions), return path code reduction not available, etc.
-        *
-        * Second pass: Code buffer allocated with size determined previously.
-        * Prologue generated to support features we have seen used.  Exit paths
-        * determined and addrs[] is filled in again, as code may be slightly
-        * smaller as a result.
-        *
-        * Third pass: Code generated 'for real', and branch destinations
-        * determined from now-accurate addrs[] map.
-        *
-        * Ideal:
-        *
-        * If we optimise this, near branches will be shorter.  On the
-        * first assembly pass, we should err on the side of caution and
-        * generate the biggest code.  On subsequent passes, branches will be
-        * generated short or long and code size will reduce.  With smaller
-        * code, more branches may fall into the short category, and code will
-        * reduce more.
-        *
-        * Finally, if we see one pass generate code the same size as the
-        * previous pass we have converged and should now generate code for
-        * real.  Allocating at the end will also save the memory that would
-        * otherwise be wasted by the (small) current code shrinkage.
-        * Preferably, we should do a small number of passes (e.g. 5) and if we
-        * haven't converged by then, get impatient and force code to generate
-        * as-is, even if the odd branch would be left long.  The chances of a
-        * long jump are tiny with all but the most enormous of BPF filter
-        * inputs, so we should usually converge on the third pass.
-        */
+       memset(&cgctx, 0, sizeof(struct codegen_context));
+       memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p));
+
+       /* Make sure that the stack is quadword aligned. */
+       cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
 
-       cgctx.idx = 0;
-       cgctx.seen = 0;
-       cgctx.pc_ret0 = -1;
        /* Scouting faux-generate pass 0 */
-       if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
+       if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
                /* We hit something illegal or unsupported. */
-               goto out;
+               fp = org_fp;
+               goto out_addrs;
+       }
+
+       /*
+        * If we have seen a tail call, we need a second pass.
+        * This is because bpf_jit_emit_common_epilogue() is called
+        * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
+        */
+       if (cgctx.seen & SEEN_TAILCALL) {
+               cgctx.idx = 0;
+               if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
+                       fp = org_fp;
+                       goto out_addrs;
+               }
+       }
 
+       bpf_jit_realloc_regs(&cgctx);
        /*
         * Pretend to build prologue, given the features we've seen.  This will
         * update ctgtx.idx as it pretends to output instructions, then we can
         * calculate total size from idx.
         */
-       bpf_jit_build_prologue(fp, 0, &cgctx);
+       bpf_jit_build_prologue(0, &cgctx);
        bpf_jit_build_epilogue(0, &cgctx);
 
        proglen = cgctx.idx * 4;
        alloclen = proglen + FUNCTION_DESCR_SIZE;
-       image = module_alloc(alloclen);
-       if (!image)
-               goto out;
 
-       code_base = image + (FUNCTION_DESCR_SIZE/4);
+       bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
+       if (!bpf_hdr) {
+               fp = org_fp;
+               goto out_addrs;
+       }
+
+skip_init_ctx:
+       code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
+
+       if (extra_pass) {
+               /*
+                * Do not touch the prologue and epilogue as they will remain
+                * unchanged. Only fix the branch target address for subprog
+                * calls in the body.
+                *
+                * This does not change the offsets and lengths of the subprog
+                * call instruction sequences and hence, the size of the JITed
+                * image as well.
+                */
+               bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
+
+               /* There is no need to perform the usual passes. */
+               goto skip_codegen_passes;
+       }
 
        /* Code generation passes 1-2 */
        for (pass = 1; pass < 3; pass++) {
                /* Now build the prologue, body code & epilogue for real. */
                cgctx.idx = 0;
-               bpf_jit_build_prologue(fp, code_base, &cgctx);
-               bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+               bpf_jit_build_prologue(code_base, &cgctx);
+               bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
                bpf_jit_build_epilogue(code_base, &cgctx);
 
                if (bpf_jit_enable > 1)
@@ -652,15 +218,15 @@ void bpf_jit_compile(struct bpf_prog *fp)
                                proglen - (cgctx.idx * 4), cgctx.seen);
        }
 
+skip_codegen_passes:
        if (bpf_jit_enable > 1)
-               /* Note that we output the base address of the code_base
+               /*
+                * Note that we output the base address of the code_base
                 * rather than image, since opcodes are in code_base.
                 */
                bpf_jit_dump(flen, proglen, pass, code_base);
 
-       bpf_flush_icache(code_base, code_base + (proglen/4));
-
-#ifdef CONFIG_PPC64
+#ifdef PPC64_ELF_ABI_v1
        /* Function descriptor nastiness: Address + TOC */
        ((u64 *)image)[0] = (u64)code_base;
        ((u64 *)image)[1] = local_paca->kernel_toc;
@@ -668,16 +234,38 @@ void bpf_jit_compile(struct bpf_prog *fp)
 
        fp->bpf_func = (void *)image;
        fp->jited = 1;
+       fp->jited_len = alloclen;
+
+       bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
+       if (!fp->is_func || extra_pass) {
+               bpf_prog_fill_jited_linfo(fp, addrs);
+out_addrs:
+               kfree(addrs);
+               kfree(jit_data);
+               fp->aux->jit_data = NULL;
+       } else {
+               jit_data->addrs = addrs;
+               jit_data->ctx = cgctx;
+               jit_data->proglen = proglen;
+               jit_data->image = image;
+               jit_data->header = bpf_hdr;
+       }
 
 out:
-       kfree(addrs);
-       return;
+       if (bpf_blinded)
+               bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
+
+       return fp;
 }
 
+/* Overriding bpf_jit_free() as we don't set images read-only. */
 void bpf_jit_free(struct bpf_prog *fp)
 {
+       unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+       struct bpf_binary_header *bpf_hdr = (void *)addr;
+
        if (fp->jited)
-               module_memfree(fp->bpf_func);
+               bpf_jit_binary_free(bpf_hdr);
 
        bpf_prog_unlock_free(fp);
 }
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
new file mode 100644 (file)
index 0000000..bbb1609
--- /dev/null
@@ -0,0 +1,1100 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * eBPF JIT compiler for PPC32
+ *
+ * Copyright 2020 Christophe Leroy <christophe.leroy@csgroup.eu>
+ *               CS GROUP France
+ *
+ * Based on PPC64 eBPF JIT compiler by Naveen N. Rao
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <asm/asm-compat.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <asm/kprobes.h>
+#include <linux/bpf.h>
+
+#include "bpf_jit.h"
+
+/*
+ * Stack layout:
+ *
+ *             [       prev sp         ] <-------------
+ *             [   nv gpr save area    ] 16 * 4        |
+ * fp (r31) -->        [   ebpf stack space    ] upto 512      |
+ *             [     frame header      ] 16            |
+ * sp (r1) --->        [    stack pointer      ] --------------
+ */
+
+/* for gpr non volatile registers r17 to r31 (14) + tail call */
+#define BPF_PPC_STACK_SAVE     (15 * 4 + 4)
+/* stack frame, ensure this is quadword aligned */
+#define BPF_PPC_STACKFRAME(ctx)        (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size)
+
+/* BPF register usage */
+#define TMP_REG        (MAX_BPF_JIT_REG + 0)
+
+/* BPF to ppc register mappings */
+const int b2p[MAX_BPF_JIT_REG + 1] = {
+       /* function return value */
+       [BPF_REG_0] = 12,
+       /* function arguments */
+       [BPF_REG_1] = 4,
+       [BPF_REG_2] = 6,
+       [BPF_REG_3] = 8,
+       [BPF_REG_4] = 10,
+       [BPF_REG_5] = 22,
+       /* non volatile registers */
+       [BPF_REG_6] = 24,
+       [BPF_REG_7] = 26,
+       [BPF_REG_8] = 28,
+       [BPF_REG_9] = 30,
+       /* frame pointer aka BPF_REG_10 */
+       [BPF_REG_FP] = 18,
+       /* eBPF jit internal registers */
+       [BPF_REG_AX] = 20,
+       [TMP_REG] = 31,         /* 32 bits */
+};
+
+static int bpf_to_ppc(struct codegen_context *ctx, int reg)
+{
+       return ctx->b2p[reg];
+}
+
+/* PPC NVR range -- update this if we ever use NVRs below r17 */
+#define BPF_PPC_NVR_MIN                17
+#define BPF_PPC_TC             16
+
+static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
+{
+       if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC)
+               return BPF_PPC_STACKFRAME(ctx) - 4 * (32 - reg);
+
+       WARN(true, "BPF JIT is asking about unknown registers, will crash the stack");
+       /* Use the hole we have left for alignment */
+       return BPF_PPC_STACKFRAME(ctx) - 4;
+}
+
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+       if (ctx->seen & SEEN_FUNC)
+               return;
+
+       while (ctx->seen & SEEN_NVREG_MASK &&
+             (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
+               int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab));
+               int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa));
+               int i;
+
+               for (i = BPF_REG_0; i <= TMP_REG; i++) {
+                       if (ctx->b2p[i] != old)
+                               continue;
+                       ctx->b2p[i] = new;
+                       bpf_set_seen_register(ctx, new);
+                       bpf_clear_seen_register(ctx, old);
+                       if (i != TMP_REG) {
+                               bpf_set_seen_register(ctx, new - 1);
+                               bpf_clear_seen_register(ctx, old - 1);
+                       }
+                       break;
+               }
+       }
+}
+
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
+{
+       int i;
+
+       /* First arg comes in as a 32 bits pointer. */
+       EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3));
+       EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0));
+       EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx)));
+
+       /*
+        * Initialize tail_call_cnt in stack frame if we do tail calls.
+        * Otherwise, put in NOPs so that it can be skipped when we are
+        * invoked through a tail call.
+        */
+       if (ctx->seen & SEEN_TAILCALL) {
+               EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+       } else {
+               EMIT(PPC_RAW_NOP());
+       }
+
+#define BPF_TAILCALL_PROLOGUE_SIZE     16
+
+       /*
+        * We need a stack frame, but we don't necessarily need to
+        * save/restore LR unless we call other functions
+        */
+       if (ctx->seen & SEEN_FUNC)
+               EMIT(PPC_RAW_MFLR(__REG_R0));
+
+       /*
+        * Back up non-volatile regs -- registers r18-r31
+        */
+       for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+               if (bpf_is_seen_register(ctx, i))
+                       EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i)));
+
+       /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/
+       if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) {
+               EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8);
+               EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12);
+       }
+
+       /* Setup frame pointer to point to the bpf stack area */
+       if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) {
+               EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0));
+               EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1,
+                                 STACK_FRAME_MIN_SIZE + ctx->stack_size));
+       }
+
+       if (ctx->seen & SEEN_FUNC)
+               EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+}
+
+static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
+{
+       int i;
+
+       /* Restore NVRs */
+       for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+               if (bpf_is_seen_register(ctx, i))
+                       EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i)));
+}
+
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+       EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0)));
+
+       bpf_jit_emit_common_epilogue(image, ctx);
+
+       /* Tear down our stack frame */
+
+       if (ctx->seen & SEEN_FUNC)
+               EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+
+       EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx)));
+
+       if (ctx->seen & SEEN_FUNC)
+               EMIT(PPC_RAW_MTLR(__REG_R0));
+
+       EMIT(PPC_RAW_BLR());
+}
+
+void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func)
+{
+       s32 rel = (s32)func - (s32)(image + ctx->idx);
+
+       if (image && rel < 0x2000000 && rel >= -0x2000000) {
+               PPC_BL_ABS(func);
+       } else {
+               /* Load function address into r0 */
+               EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func)));
+               EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func)));
+               EMIT(PPC_RAW_MTLR(__REG_R0));
+               EMIT(PPC_RAW_BLRL());
+       }
+}
+
+static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
+{
+       /*
+        * By now, the eBPF program has already setup parameters in r3-r6
+        * r3-r4/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
+        * r5-r6/BPF_REG_2 - pointer to bpf_array
+        * r7-r8/BPF_REG_3 - index in bpf_array
+        */
+       int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2);
+       int b2p_index = bpf_to_ppc(ctx, BPF_REG_3);
+
+       /*
+        * if (index >= array->map.max_entries)
+        *   goto out;
+        */
+       EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+       EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0));
+       EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+       PPC_BCC(COND_GE, out);
+
+       /*
+        * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+        *   goto out;
+        */
+       EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT));
+       /* tail_call_cnt++; */
+       EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1));
+       PPC_BCC(COND_GT, out);
+
+       /* prog = array->ptrs[index]; */
+       EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29));
+       EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array));
+       EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs)));
+       EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+
+       /*
+        * if (prog == NULL)
+        *   goto out;
+        */
+       EMIT(PPC_RAW_CMPLWI(__REG_R3, 0));
+       PPC_BCC(COND_EQ, out);
+
+       /* goto *(prog->bpf_func + prologue_size); */
+       EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func)));
+
+       if (ctx->seen & SEEN_FUNC)
+               EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+
+       EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE));
+
+       if (ctx->seen & SEEN_FUNC)
+               EMIT(PPC_RAW_MTLR(__REG_R0));
+
+       EMIT(PPC_RAW_MTCTR(__REG_R3));
+
+       EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1)));
+
+       /* tear restore NVRs, ... */
+       bpf_jit_emit_common_epilogue(image, ctx);
+
+       EMIT(PPC_RAW_BCTR());
+       /* out: */
+}
+
+/* Assemble the body code between the prologue & epilogue */
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+                      u32 *addrs, bool extra_pass)
+{
+       const struct bpf_insn *insn = fp->insnsi;
+       int flen = fp->len;
+       int i, ret;
+
+       /* Start of epilogue code - will only be valid 2nd pass onwards */
+       u32 exit_addr = addrs[flen];
+
+       for (i = 0; i < flen; i++) {
+               u32 code = insn[i].code;
+               u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg);
+               u32 dst_reg_h = dst_reg - 1;
+               u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg);
+               u32 src_reg_h = src_reg - 1;
+               u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG);
+               s16 off = insn[i].off;
+               s32 imm = insn[i].imm;
+               bool func_addr_fixed;
+               u64 func_addr;
+               u32 true_cond;
+
+               /*
+                * addrs[] maps a BPF bytecode address into a real offset from
+                * the start of the body code.
+                */
+               addrs[i] = ctx->idx * 4;
+
+               /*
+                * As an optimization, we note down which registers
+                * are used so that we can only save/restore those in our
+                * prologue and epilogue. We do this here regardless of whether
+                * the actual BPF instruction uses src/dst registers or not
+                * (for instance, BPF_CALL does not use them). The expectation
+                * is that those instructions will have src_reg/dst_reg set to
+                * 0. Even otherwise, we just lose some prologue/epilogue
+                * optimization but everything else should work without
+                * any issues.
+                */
+               if (dst_reg >= 3 && dst_reg < 32) {
+                       bpf_set_seen_register(ctx, dst_reg);
+                       bpf_set_seen_register(ctx, dst_reg_h);
+               }
+
+               if (src_reg >= 3 && src_reg < 32) {
+                       bpf_set_seen_register(ctx, src_reg);
+                       bpf_set_seen_register(ctx, src_reg_h);
+               }
+
+               switch (code) {
+               /*
+                * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
+                */
+               case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
+                       EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg));
+                       break;
+               case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
+                       EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, src_reg));
+                       EMIT(PPC_RAW_ADDE(dst_reg_h, dst_reg_h, src_reg_h));
+                       break;
+               case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
+                       EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg));
+                       break;
+               case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
+                       EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, dst_reg));
+                       EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, dst_reg_h));
+                       break;
+               case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
+                       imm = -imm;
+                       fallthrough;
+               case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
+                       if (IMM_HA(imm) & 0xffff)
+                               EMIT(PPC_RAW_ADDIS(dst_reg, dst_reg, IMM_HA(imm)));
+                       if (IMM_L(imm))
+                               EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm)));
+                       break;
+               case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
+                       imm = -imm;
+                       fallthrough;
+               case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
+                       if (!imm)
+                               break;
+
+                       if (imm >= -32768 && imm < 32768) {
+                               EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm));
+                       } else {
+                               PPC_LI32(__REG_R0, imm);
+                               EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0));
+                       }
+                       if (imm >= 0)
+                               EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h));
+                       else
+                               EMIT(PPC_RAW_ADDME(dst_reg_h, dst_reg_h));
+                       break;
+               case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
+                       bpf_set_seen_register(ctx, tmp_reg);
+                       EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h));
+                       EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg));
+                       EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg));
+                       EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
+                       EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0));
+                       EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg));
+                       break;
+               case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
+                       EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
+                       break;
+               case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
+                       if (imm >= -32768 && imm < 32768) {
+                               EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm));
+                       } else {
+                               PPC_LI32(__REG_R0, imm);
+                               EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0));
+                       }
+                       break;
+               case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
+                       if (!imm) {
+                               PPC_LI32(dst_reg, 0);
+                               PPC_LI32(dst_reg_h, 0);
+                               break;
+                       }
+                       if (imm == 1)
+                               break;
+                       if (imm == -1) {
+                               EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0));
+                               EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h));
+                               break;
+                       }
+                       bpf_set_seen_register(ctx, tmp_reg);
+                       PPC_LI32(tmp_reg, imm);
+                       EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg));
+                       if (imm < 0)
+                               EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg));
+                       EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg));
+                       EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg));
+                       EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0));
+                       break;
+               case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
+                       EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg));
+                       break;
+               case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
+                       EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg));
+                       EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0));
+                       EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0));
+                       break;
+               case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
+                       return -EOPNOTSUPP;
+               case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
+                       return -EOPNOTSUPP;
+               case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
+                       if (!imm)
+                               return -EINVAL;
+                       if (imm == 1)
+                               break;
+
+                       PPC_LI32(__REG_R0, imm);
+                       EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0));
+                       break;
+               case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
+                       if (!imm)
+                               return -EINVAL;
+
+                       if (!is_power_of_2((u32)imm)) {
+                               bpf_set_seen_register(ctx, tmp_reg);
+                               PPC_LI32(tmp_reg, imm);
+                               EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg));
+                               EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0));
+                               EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0));
+                               break;
+                       }
+                       if (imm == 1)
+                               EMIT(PPC_RAW_LI(dst_reg, 0));
+                       else
+                               EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2((u32)imm), 31));
+
+                       break;
+               case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
+                       if (!imm)
+                               return -EINVAL;
+                       if (imm < 0)
+                               imm = -imm;
+                       if (!is_power_of_2(imm))
+                               return -EOPNOTSUPP;
+                       if (imm == 1)
+                               EMIT(PPC_RAW_LI(dst_reg, 0));
+                       else
+                               EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2(imm), 31));
+                       EMIT(PPC_RAW_LI(dst_reg_h, 0));
+                       break;
+               case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
+                       if (!imm)
+                               return -EINVAL;
+                       if (!is_power_of_2(abs(imm)))
+                               return -EOPNOTSUPP;
+
+                       if (imm < 0) {
+                               EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0));
+                               EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h));
+                               imm = -imm;
+                       }
+                       if (imm == 1)
+                               break;
+                       imm = ilog2(imm);
+                       EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31));
+                       EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1));
+                       EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm));
+                       break;
+               case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
+                       EMIT(PPC_RAW_NEG(dst_reg, dst_reg));
+                       break;
+               case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+                       EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0));
+                       EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h));
+                       break;
+
+               /*
+                * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
+                */
+               case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+                       EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg));
+                       EMIT(PPC_RAW_AND(dst_reg_h, dst_reg_h, src_reg_h));
+                       break;
+               case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
+                       EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg));
+                       break;
+               case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+                       if (imm >= 0)
+                               EMIT(PPC_RAW_LI(dst_reg_h, 0));
+                       fallthrough;
+               case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
+                       if (!IMM_H(imm)) {
+                               EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm)));
+                       } else if (!IMM_L(imm)) {
+                               EMIT(PPC_RAW_ANDIS(dst_reg, dst_reg, IMM_H(imm)));
+                       } else if (imm == (((1 << fls(imm)) - 1) ^ ((1 << (ffs(i) - 1)) - 1))) {
+                               EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0,
+                                                   32 - fls(imm), 32 - ffs(imm)));
+                       } else {
+                               PPC_LI32(__REG_R0, imm);
+                               EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0));
+                       }
+                       break;
+               case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+                       EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg));
+                       EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, src_reg_h));
+                       break;
+               case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+                       EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg));
+                       break;
+               case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
+                       /* Sign-extended */
+                       if (imm < 0)
+                               EMIT(PPC_RAW_LI(dst_reg_h, -1));
+                       fallthrough;
+               case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
+                       if (IMM_L(imm))
+                               EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm)));
+                       if (IMM_H(imm))
+                               EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm)));
+                       break;
+               case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
+                       if (dst_reg == src_reg) {
+                               EMIT(PPC_RAW_LI(dst_reg, 0));
+                               EMIT(PPC_RAW_LI(dst_reg_h, 0));
+                       } else {
+                               EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg));
+                               EMIT(PPC_RAW_XOR(dst_reg_h, dst_reg_h, src_reg_h));
+                       }
+                       break;
+               case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
+                       if (dst_reg == src_reg)
+                               EMIT(PPC_RAW_LI(dst_reg, 0));
+                       else
+                               EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg));
+                       break;
+               case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
+                       if (imm < 0)
+                               EMIT(PPC_RAW_NOR(dst_reg_h, dst_reg_h, dst_reg_h));
+                       fallthrough;
+               case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
+                       if (IMM_L(imm))
+                               EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm)));
+                       if (IMM_H(imm))
+                               EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm)));
+                       break;
+               case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
+                       EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
+                       break;
+               case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
+                       bpf_set_seen_register(ctx, tmp_reg);
+                       EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+                       EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg));
+                       EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+                       EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0));
+                       EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg));
+                       EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0));
+                       EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
+                       EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg));
+                       break;
+               case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */
+                       if (!imm)
+                               break;
+                       EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm));
+                       break;
+               case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<= imm */
+                       if (imm < 0)
+                               return -EINVAL;
+                       if (!imm)
+                               break;
+                       if (imm < 32) {
+                               EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, imm, 0, 31 - imm));
+                               EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31));
+                               EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, imm, 0, 31 - imm));
+                               break;
+                       }
+                       if (imm < 64)
+                               EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg, imm, 0, 31 - imm));
+                       else
+                               EMIT(PPC_RAW_LI(dst_reg_h, 0));
+                       EMIT(PPC_RAW_LI(dst_reg, 0));
+                       break;
+               case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
+                       EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
+                       break;
+               case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
+                       bpf_set_seen_register(ctx, tmp_reg);
+                       EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+                       EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
+                       EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+                       EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0));
+                       EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg));
+                       EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0));
+                       EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg));
+                       EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
+                       break;
+               case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
+                       if (!imm)
+                               break;
+                       EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm));
+                       break;
+               case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
+                       if (imm < 0)
+                               return -EINVAL;
+                       if (!imm)
+                               break;
+                       if (imm < 32) {
+                               EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31));
+                               EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1));
+                               EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, 32 - imm, imm, 31));
+                               break;
+                       }
+                       if (imm < 64)
+                               EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg_h, 64 - imm, imm - 32, 31));
+                       else
+                               EMIT(PPC_RAW_LI(dst_reg, 0));
+                       EMIT(PPC_RAW_LI(dst_reg_h, 0));
+                       break;
+               case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
+                       EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg, src_reg));
+                       break;
+               case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
+                       bpf_set_seen_register(ctx, tmp_reg);
+                       EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+                       EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
+                       EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0));
+                       EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+                       EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0));
+                       EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26));
+                       EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg));
+                       EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg));
+                       EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0));
+                       EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
+                       break;
+               case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
+                       if (!imm)
+                               break;
+                       EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm));
+                       break;
+               case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
+                       if (imm < 0)
+                               return -EINVAL;
+                       if (!imm)
+                               break;
+                       if (imm < 32) {
+                               EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31));
+                               EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1));
+                               EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm));
+                               break;
+                       }
+                       if (imm < 64)
+                               EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, imm - 32));
+                       else
+                               EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, 31));
+                       EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31));
+                       break;
+
+               /*
+                * MOV
+                */
+               case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+                       if (dst_reg == src_reg)
+                               break;
+                       EMIT(PPC_RAW_MR(dst_reg, src_reg));
+                       EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h));
+                       break;
+               case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
+                       /* special mov32 for zext */
+                       if (imm == 1)
+                               EMIT(PPC_RAW_LI(dst_reg_h, 0));
+                       else if (dst_reg != src_reg)
+                               EMIT(PPC_RAW_MR(dst_reg, src_reg));
+                       break;
+               case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
+                       PPC_LI32(dst_reg, imm);
+                       PPC_EX32(dst_reg_h, imm);
+                       break;
+               case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
+                       PPC_LI32(dst_reg, imm);
+                       break;
+
+               /*
+                * BPF_FROM_BE/LE
+                */
+               case BPF_ALU | BPF_END | BPF_FROM_LE:
+                       switch (imm) {
+                       case 16:
+                               /* Copy 16 bits to upper part */
+                               EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg, 16, 0, 15));
+                               /* Rotate 8 bits right & mask */
+                               EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 24, 16, 31));
+                               break;
+                       case 32:
+                               /*
+                                * Rotate word left by 8 bits:
+                                * 2 bytes are already in their final position
+                                * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
+                                */
+                               EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31));
+                               /* Rotate 24 bits and insert byte 1 */
+                               EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7));
+                               /* Rotate 24 bits and insert byte 3 */
+                               EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23));
+                               EMIT(PPC_RAW_MR(dst_reg, __REG_R0));
+                               break;
+                       case 64:
+                               bpf_set_seen_register(ctx, tmp_reg);
+                               EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31));
+                               EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31));
+                               /* Rotate 24 bits and insert byte 1 */
+                               EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7));
+                               EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7));
+                               /* Rotate 24 bits and insert byte 3 */
+                               EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23));
+                               EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23));
+                               EMIT(PPC_RAW_MR(dst_reg, __REG_R0));
+                               EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg));
+                               break;
+                       }
+                       break;
+               case BPF_ALU | BPF_END | BPF_FROM_BE:
+                       switch (imm) {
+                       case 16:
+                               /* zero-extend 16 bits into 32 bits */
+                               EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 16, 31));
+                               break;
+                       case 32:
+                       case 64:
+                               /* nop */
+                               break;
+                       }
+                       break;
+
+               /*
+                * BPF_ST(X)
+                */
+               case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
+                       EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
+                       break;
+               case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+                       PPC_LI32(__REG_R0, imm);
+                       EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off));
+                       break;
+               case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+                       EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
+                       break;
+               case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+                       PPC_LI32(__REG_R0, imm);
+                       EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off));
+                       break;
+               case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+                       EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
+                       break;
+               case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+                       PPC_LI32(__REG_R0, imm);
+                       EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off));
+                       break;
+               case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+                       EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off));
+                       EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4));
+                       break;
+               case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+                       PPC_LI32(__REG_R0, imm);
+                       EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4));
+                       PPC_EX32(__REG_R0, imm);
+                       EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off));
+                       break;
+
+               /*
+                * BPF_STX XADD (atomic_add)
+                */
+               case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */
+                       bpf_set_seen_register(ctx, tmp_reg);
+                       /* Get offset into TMP_REG */
+                       EMIT(PPC_RAW_LI(tmp_reg, off));
+                       /* load value from memory into r0 */
+                       EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0));
+                       /* add value from src_reg into this */
+                       EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg));
+                       /* store result back */
+                       EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg));
+                       /* we're done if this succeeded */
+                       PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4);
+                       break;
+
+               case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */
+                       return -EOPNOTSUPP;
+
+               /*
+                * BPF_LDX
+                */
+               case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+                       EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+                       if (!fp->aux->verifier_zext)
+                               EMIT(PPC_RAW_LI(dst_reg_h, 0));
+                       break;
+               case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+                       EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+                       if (!fp->aux->verifier_zext)
+                               EMIT(PPC_RAW_LI(dst_reg_h, 0));
+                       break;
+               case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+                       EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+                       if (!fp->aux->verifier_zext)
+                               EMIT(PPC_RAW_LI(dst_reg_h, 0));
+                       break;
+               case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+                       EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
+                       EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+                       break;
+
+               /*
+                * Doubleword load
+                * 16 byte instruction that uses two 'struct bpf_insn'
+                */
+               case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+                       PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
+                       PPC_LI32(dst_reg, (u32)insn[i].imm);
+                       /* Adjust for two bpf instructions */
+                       addrs[++i] = ctx->idx * 4;
+                       break;
+
+               /*
+                * Return/Exit
+                */
+               case BPF_JMP | BPF_EXIT:
+                       /*
+                        * If this isn't the very last instruction, branch to
+                        * the epilogue. If we _are_ the last instruction,
+                        * we'll just fall through to the epilogue.
+                        */
+                       if (i != flen - 1)
+                               PPC_JMP(exit_addr);
+                       /* else fall through to the epilogue */
+                       break;
+
+               /*
+                * Call kernel helper or bpf function
+                */
+               case BPF_JMP | BPF_CALL:
+                       ctx->seen |= SEEN_FUNC;
+
+                       ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+                                                   &func_addr, &func_addr_fixed);
+                       if (ret < 0)
+                               return ret;
+
+                       if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) {
+                               EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8));
+                               EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12));
+                       }
+
+                       bpf_jit_emit_func_call_rel(image, ctx, func_addr);
+
+                       EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3));
+                       EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4));
+                       break;
+
+               /*
+                * Jumps and branches
+                */
+               case BPF_JMP | BPF_JA:
+                       PPC_JMP(addrs[i + 1 + off]);
+                       break;
+
+               case BPF_JMP | BPF_JGT | BPF_K:
+               case BPF_JMP | BPF_JGT | BPF_X:
+               case BPF_JMP | BPF_JSGT | BPF_K:
+               case BPF_JMP | BPF_JSGT | BPF_X:
+               case BPF_JMP32 | BPF_JGT | BPF_K:
+               case BPF_JMP32 | BPF_JGT | BPF_X:
+               case BPF_JMP32 | BPF_JSGT | BPF_K:
+               case BPF_JMP32 | BPF_JSGT | BPF_X:
+                       true_cond = COND_GT;
+                       goto cond_branch;
+               case BPF_JMP | BPF_JLT | BPF_K:
+               case BPF_JMP | BPF_JLT | BPF_X:
+               case BPF_JMP | BPF_JSLT | BPF_K:
+               case BPF_JMP | BPF_JSLT | BPF_X:
+               case BPF_JMP32 | BPF_JLT | BPF_K:
+               case BPF_JMP32 | BPF_JLT | BPF_X:
+               case BPF_JMP32 | BPF_JSLT | BPF_K:
+               case BPF_JMP32 | BPF_JSLT | BPF_X:
+                       true_cond = COND_LT;
+                       goto cond_branch;
+               case BPF_JMP | BPF_JGE | BPF_K:
+               case BPF_JMP | BPF_JGE | BPF_X:
+               case BPF_JMP | BPF_JSGE | BPF_K:
+               case BPF_JMP | BPF_JSGE | BPF_X:
+               case BPF_JMP32 | BPF_JGE | BPF_K:
+               case BPF_JMP32 | BPF_JGE | BPF_X:
+               case BPF_JMP32 | BPF_JSGE | BPF_K:
+               case BPF_JMP32 | BPF_JSGE | BPF_X:
+                       true_cond = COND_GE;
+                       goto cond_branch;
+               case BPF_JMP | BPF_JLE | BPF_K:
+               case BPF_JMP | BPF_JLE | BPF_X:
+               case BPF_JMP | BPF_JSLE | BPF_K:
+               case BPF_JMP | BPF_JSLE | BPF_X:
+               case BPF_JMP32 | BPF_JLE | BPF_K:
+               case BPF_JMP32 | BPF_JLE | BPF_X:
+               case BPF_JMP32 | BPF_JSLE | BPF_K:
+               case BPF_JMP32 | BPF_JSLE | BPF_X:
+                       true_cond = COND_LE;
+                       goto cond_branch;
+               case BPF_JMP | BPF_JEQ | BPF_K:
+               case BPF_JMP | BPF_JEQ | BPF_X:
+               case BPF_JMP32 | BPF_JEQ | BPF_K:
+               case BPF_JMP32 | BPF_JEQ | BPF_X:
+                       true_cond = COND_EQ;
+                       goto cond_branch;
+               case BPF_JMP | BPF_JNE | BPF_K:
+               case BPF_JMP | BPF_JNE | BPF_X:
+               case BPF_JMP32 | BPF_JNE | BPF_K:
+               case BPF_JMP32 | BPF_JNE | BPF_X:
+                       true_cond = COND_NE;
+                       goto cond_branch;
+               case BPF_JMP | BPF_JSET | BPF_K:
+               case BPF_JMP | BPF_JSET | BPF_X:
+               case BPF_JMP32 | BPF_JSET | BPF_K:
+               case BPF_JMP32 | BPF_JSET | BPF_X:
+                       true_cond = COND_NE;
+                       /* fallthrough; */
+
+cond_branch:
+                       switch (code) {
+                       case BPF_JMP | BPF_JGT | BPF_X:
+                       case BPF_JMP | BPF_JLT | BPF_X:
+                       case BPF_JMP | BPF_JGE | BPF_X:
+                       case BPF_JMP | BPF_JLE | BPF_X:
+                       case BPF_JMP | BPF_JEQ | BPF_X:
+                       case BPF_JMP | BPF_JNE | BPF_X:
+                               /* unsigned comparison */
+                               EMIT(PPC_RAW_CMPLW(dst_reg_h, src_reg_h));
+                               PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+                               EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+                               break;
+                       case BPF_JMP32 | BPF_JGT | BPF_X:
+                       case BPF_JMP32 | BPF_JLT | BPF_X:
+                       case BPF_JMP32 | BPF_JGE | BPF_X:
+                       case BPF_JMP32 | BPF_JLE | BPF_X:
+                       case BPF_JMP32 | BPF_JEQ | BPF_X:
+                       case BPF_JMP32 | BPF_JNE | BPF_X:
+                               /* unsigned comparison */
+                               EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+                               break;
+                       case BPF_JMP | BPF_JSGT | BPF_X:
+                       case BPF_JMP | BPF_JSLT | BPF_X:
+                       case BPF_JMP | BPF_JSGE | BPF_X:
+                       case BPF_JMP | BPF_JSLE | BPF_X:
+                               /* signed comparison */
+                               EMIT(PPC_RAW_CMPW(dst_reg_h, src_reg_h));
+                               PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+                               EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+                               break;
+                       case BPF_JMP32 | BPF_JSGT | BPF_X:
+                       case BPF_JMP32 | BPF_JSLT | BPF_X:
+                       case BPF_JMP32 | BPF_JSGE | BPF_X:
+                       case BPF_JMP32 | BPF_JSLE | BPF_X:
+                               /* signed comparison */
+                               EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
+                               break;
+                       case BPF_JMP | BPF_JSET | BPF_X:
+                               EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h));
+                               PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+                               EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg));
+                               break;
+                       case BPF_JMP32 | BPF_JSET | BPF_X: {
+                               EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg));
+                               break;
+                       case BPF_JMP | BPF_JNE | BPF_K:
+                       case BPF_JMP | BPF_JEQ | BPF_K:
+                       case BPF_JMP | BPF_JGT | BPF_K:
+                       case BPF_JMP | BPF_JLT | BPF_K:
+                       case BPF_JMP | BPF_JGE | BPF_K:
+                       case BPF_JMP | BPF_JLE | BPF_K:
+                               /*
+                                * Need sign-extended load, so only positive
+                                * values can be used as imm in cmplwi
+                                */
+                               if (imm >= 0 && imm < 32768) {
+                                       EMIT(PPC_RAW_CMPLWI(dst_reg_h, 0));
+                                       PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+                                       EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+                               } else {
+                                       /* sign-extending load ... but unsigned comparison */
+                                       PPC_EX32(__REG_R0, imm);
+                                       EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0));
+                                       PPC_LI32(__REG_R0, imm);
+                                       PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+                                       EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+                               }
+                               break;
+                       case BPF_JMP32 | BPF_JNE | BPF_K:
+                       case BPF_JMP32 | BPF_JEQ | BPF_K:
+                       case BPF_JMP32 | BPF_JGT | BPF_K:
+                       case BPF_JMP32 | BPF_JLT | BPF_K:
+                       case BPF_JMP32 | BPF_JGE | BPF_K:
+                       case BPF_JMP32 | BPF_JLE | BPF_K:
+                               if (imm >= 0 && imm < 65536) {
+                                       EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+                               } else {
+                                       PPC_LI32(__REG_R0, imm);
+                                       EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+                               }
+                               break;
+                       }
+                       case BPF_JMP | BPF_JSGT | BPF_K:
+                       case BPF_JMP | BPF_JSLT | BPF_K:
+                       case BPF_JMP | BPF_JSGE | BPF_K:
+                       case BPF_JMP | BPF_JSLE | BPF_K:
+                               if (imm >= 0 && imm < 65536) {
+                                       EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
+                                       PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+                                       EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+                               } else {
+                                       /* sign-extending load */
+                                       EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
+                                       PPC_LI32(__REG_R0, imm);
+                                       PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+                                       EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+                               }
+                               break;
+                       case BPF_JMP32 | BPF_JSGT | BPF_K:
+                       case BPF_JMP32 | BPF_JSLT | BPF_K:
+                       case BPF_JMP32 | BPF_JSGE | BPF_K:
+                       case BPF_JMP32 | BPF_JSLE | BPF_K:
+                               /*
+                                * signed comparison, so any 16-bit value
+                                * can be used in cmpwi
+                                */
+                               if (imm >= -32768 && imm < 32768) {
+                                       EMIT(PPC_RAW_CMPWI(dst_reg, imm));
+                               } else {
+                                       /* sign-extending load */
+                                       PPC_LI32(__REG_R0, imm);
+                                       EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0));
+                               }
+                               break;
+                       case BPF_JMP | BPF_JSET | BPF_K:
+                               /* andi does not sign-extend the immediate */
+                               if (imm >= 0 && imm < 32768) {
+                                       /* PPC_ANDI is _only/always_ dot-form */
+                                       EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm));
+                               } else {
+                                       PPC_LI32(__REG_R0, imm);
+                                       if (imm < 0) {
+                                               EMIT(PPC_RAW_CMPWI(dst_reg_h, 0));
+                                               PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+                                       }
+                                       EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0));
+                               }
+                               break;
+                       case BPF_JMP32 | BPF_JSET | BPF_K:
+                               /* andi does not sign-extend the immediate */
+                               if (imm >= -32768 && imm < 32768) {
+                                       /* PPC_ANDI is _only/always_ dot-form */
+                                       EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm));
+                               } else {
+                                       PPC_LI32(__REG_R0, imm);
+                                       EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0));
+                               }
+                               break;
+                       }
+                       PPC_BCC(true_cond, addrs[i + 1 + off]);
+                       break;
+
+               /*
+                * Tail call
+                */
+               case BPF_JMP | BPF_TAIL_CALL:
+                       ctx->seen |= SEEN_TAILCALL;
+                       bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
+                       break;
+
+               default:
+                       /*
+                        * The filter contains something cruel & unusual.
+                        * We don't handle it, but also there shouldn't be
+                        * anything missing from our list.
+                        */
+                       pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", code, i);
+                       return -EOPNOTSUPP;
+               }
+               if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext &&
+                   !insn_is_zext(&insn[i + 1]))
+                       EMIT(PPC_RAW_LI(dst_reg_h, 0));
+       }
+
+       /* Set end-of-body-code address for exit. */
+       addrs[i] = ctx->idx * 4;
+
+       return 0;
+}
index aaf1a88..57a8c11 100644 (file)
 
 #include "bpf_jit64.h"
 
-static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
-{
-       memset32(area, BREAKPOINT_INSTRUCTION, size/4);
-}
-
-static inline void bpf_flush_icache(void *start, void *end)
-{
-       smp_wmb();
-       flush_icache_range((unsigned long)start, (unsigned long)end);
-}
-
-static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
-{
-       return (ctx->seen & (1 << (31 - b2p[i])));
-}
-
-static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
-{
-       ctx->seen |= (1 << (31 - b2p[i]));
-}
-
 static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
 {
        /*
@@ -47,7 +26,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
         * - the bpf program uses its stack area
         * The latter condition is deduced from the usage of BPF_REG_FP
         */
-       return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP);
+       return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]);
 }
 
 /*
@@ -85,7 +64,11 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
        BUG();
 }
 
-static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+}
+
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 {
        int i;
 
@@ -124,11 +107,11 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
         * in the protected zone below the previous stack frame
         */
        for (i = BPF_REG_6; i <= BPF_REG_10; i++)
-               if (bpf_is_seen_register(ctx, i))
+               if (bpf_is_seen_register(ctx, b2p[i]))
                        PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
 
        /* Setup frame pointer to point to the bpf stack area */
-       if (bpf_is_seen_register(ctx, BPF_REG_FP))
+       if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP]))
                EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1,
                                STACK_FRAME_MIN_SIZE + ctx->stack_size));
 }
@@ -139,7 +122,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
 
        /* Restore NVRs */
        for (i = BPF_REG_6; i <= BPF_REG_10; i++)
-               if (bpf_is_seen_register(ctx, i))
+               if (bpf_is_seen_register(ctx, b2p[i]))
                        PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
 
        /* Tear down our stack frame */
@@ -152,7 +135,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
        }
 }
 
-static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 {
        bpf_jit_emit_common_epilogue(image, ctx);
 
@@ -187,8 +170,7 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
        EMIT(PPC_RAW_BLRL());
 }
 
-static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx,
-                                      u64 func)
+void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func)
 {
        unsigned int i, ctx_idx = ctx->idx;
 
@@ -289,9 +271,8 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
 }
 
 /* Assemble the body code between the prologue & epilogue */
-static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
-                             struct codegen_context *ctx,
-                             u32 *addrs, bool extra_pass)
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+                      u32 *addrs, bool extra_pass)
 {
        const struct bpf_insn *insn = fp->insnsi;
        int flen = fp->len;
@@ -330,9 +311,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
                 * any issues.
                 */
                if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
-                       bpf_set_seen_register(ctx, insn[i].dst_reg);
+                       bpf_set_seen_register(ctx, dst_reg);
                if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
-                       bpf_set_seen_register(ctx, insn[i].src_reg);
+                       bpf_set_seen_register(ctx, src_reg);
 
                switch (code) {
                /*
@@ -1026,249 +1007,3 @@ cond_branch:
 
        return 0;
 }
-
-/* Fix the branch target addresses for subprog calls */
-static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
-                                      struct codegen_context *ctx, u32 *addrs)
-{
-       const struct bpf_insn *insn = fp->insnsi;
-       bool func_addr_fixed;
-       u64 func_addr;
-       u32 tmp_idx;
-       int i, ret;
-
-       for (i = 0; i < fp->len; i++) {
-               /*
-                * During the extra pass, only the branch target addresses for
-                * the subprog calls need to be fixed. All other instructions
-                * can left untouched.
-                *
-                * The JITed image length does not change because we already
-                * ensure that the JITed instruction sequence for these calls
-                * are of fixed length by padding them with NOPs.
-                */
-               if (insn[i].code == (BPF_JMP | BPF_CALL) &&
-                   insn[i].src_reg == BPF_PSEUDO_CALL) {
-                       ret = bpf_jit_get_func_addr(fp, &insn[i], true,
-                                                   &func_addr,
-                                                   &func_addr_fixed);
-                       if (ret < 0)
-                               return ret;
-
-                       /*
-                        * Save ctx->idx as this would currently point to the
-                        * end of the JITed image and set it to the offset of
-                        * the instruction sequence corresponding to the
-                        * subprog call temporarily.
-                        */
-                       tmp_idx = ctx->idx;
-                       ctx->idx = addrs[i] / 4;
-                       bpf_jit_emit_func_call_rel(image, ctx, func_addr);
-
-                       /*
-                        * Restore ctx->idx here. This is safe as the length
-                        * of the JITed sequence remains unchanged.
-                        */
-                       ctx->idx = tmp_idx;
-               }
-       }
-
-       return 0;
-}
-
-struct powerpc64_jit_data {
-       struct bpf_binary_header *header;
-       u32 *addrs;
-       u8 *image;
-       u32 proglen;
-       struct codegen_context ctx;
-};
-
-bool bpf_jit_needs_zext(void)
-{
-       return true;
-}
-
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
-{
-       u32 proglen;
-       u32 alloclen;
-       u8 *image = NULL;
-       u32 *code_base;
-       u32 *addrs;
-       struct powerpc64_jit_data *jit_data;
-       struct codegen_context cgctx;
-       int pass;
-       int flen;
-       struct bpf_binary_header *bpf_hdr;
-       struct bpf_prog *org_fp = fp;
-       struct bpf_prog *tmp_fp;
-       bool bpf_blinded = false;
-       bool extra_pass = false;
-
-       if (!fp->jit_requested)
-               return org_fp;
-
-       tmp_fp = bpf_jit_blind_constants(org_fp);
-       if (IS_ERR(tmp_fp))
-               return org_fp;
-
-       if (tmp_fp != org_fp) {
-               bpf_blinded = true;
-               fp = tmp_fp;
-       }
-
-       jit_data = fp->aux->jit_data;
-       if (!jit_data) {
-               jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
-               if (!jit_data) {
-                       fp = org_fp;
-                       goto out;
-               }
-               fp->aux->jit_data = jit_data;
-       }
-
-       flen = fp->len;
-       addrs = jit_data->addrs;
-       if (addrs) {
-               cgctx = jit_data->ctx;
-               image = jit_data->image;
-               bpf_hdr = jit_data->header;
-               proglen = jit_data->proglen;
-               alloclen = proglen + FUNCTION_DESCR_SIZE;
-               extra_pass = true;
-               goto skip_init_ctx;
-       }
-
-       addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
-       if (addrs == NULL) {
-               fp = org_fp;
-               goto out_addrs;
-       }
-
-       memset(&cgctx, 0, sizeof(struct codegen_context));
-
-       /* Make sure that the stack is quadword aligned. */
-       cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
-
-       /* Scouting faux-generate pass 0 */
-       if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
-               /* We hit something illegal or unsupported. */
-               fp = org_fp;
-               goto out_addrs;
-       }
-
-       /*
-        * If we have seen a tail call, we need a second pass.
-        * This is because bpf_jit_emit_common_epilogue() is called
-        * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
-        */
-       if (cgctx.seen & SEEN_TAILCALL) {
-               cgctx.idx = 0;
-               if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
-                       fp = org_fp;
-                       goto out_addrs;
-               }
-       }
-
-       /*
-        * Pretend to build prologue, given the features we've seen.  This will
-        * update ctgtx.idx as it pretends to output instructions, then we can
-        * calculate total size from idx.
-        */
-       bpf_jit_build_prologue(0, &cgctx);
-       bpf_jit_build_epilogue(0, &cgctx);
-
-       proglen = cgctx.idx * 4;
-       alloclen = proglen + FUNCTION_DESCR_SIZE;
-
-       bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
-                       bpf_jit_fill_ill_insns);
-       if (!bpf_hdr) {
-               fp = org_fp;
-               goto out_addrs;
-       }
-
-skip_init_ctx:
-       code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
-
-       if (extra_pass) {
-               /*
-                * Do not touch the prologue and epilogue as they will remain
-                * unchanged. Only fix the branch target address for subprog
-                * calls in the body.
-                *
-                * This does not change the offsets and lengths of the subprog
-                * call instruction sequences and hence, the size of the JITed
-                * image as well.
-                */
-               bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
-
-               /* There is no need to perform the usual passes. */
-               goto skip_codegen_passes;
-       }
-
-       /* Code generation passes 1-2 */
-       for (pass = 1; pass < 3; pass++) {
-               /* Now build the prologue, body code & epilogue for real. */
-               cgctx.idx = 0;
-               bpf_jit_build_prologue(code_base, &cgctx);
-               bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
-               bpf_jit_build_epilogue(code_base, &cgctx);
-
-               if (bpf_jit_enable > 1)
-                       pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
-                               proglen - (cgctx.idx * 4), cgctx.seen);
-       }
-
-skip_codegen_passes:
-       if (bpf_jit_enable > 1)
-               /*
-                * Note that we output the base address of the code_base
-                * rather than image, since opcodes are in code_base.
-                */
-               bpf_jit_dump(flen, proglen, pass, code_base);
-
-#ifdef PPC64_ELF_ABI_v1
-       /* Function descriptor nastiness: Address + TOC */
-       ((u64 *)image)[0] = (u64)code_base;
-       ((u64 *)image)[1] = local_paca->kernel_toc;
-#endif
-
-       fp->bpf_func = (void *)image;
-       fp->jited = 1;
-       fp->jited_len = alloclen;
-
-       bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
-       if (!fp->is_func || extra_pass) {
-               bpf_prog_fill_jited_linfo(fp, addrs);
-out_addrs:
-               kfree(addrs);
-               kfree(jit_data);
-               fp->aux->jit_data = NULL;
-       } else {
-               jit_data->addrs = addrs;
-               jit_data->ctx = cgctx;
-               jit_data->proglen = proglen;
-               jit_data->image = image;
-               jit_data->header = bpf_hdr;
-       }
-
-out:
-       if (bpf_blinded)
-               bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
-
-       return fp;
-}
-
-/* Overriding bpf_jit_free() as we don't set images read-only. */
-void bpf_jit_free(struct bpf_prog *fp)
-{
-       unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
-       struct bpf_binary_header *bpf_hdr = (void *)addr;
-
-       if (fp->jited)
-               bpf_jit_binary_free(bpf_hdr);
-
-       bpf_prog_unlock_free(fp);
-}
index 766f064..16d4d1b 100644 (file)
@@ -17,6 +17,7 @@
 #include <asm/firmware.h>
 #include <asm/ptrace.h>
 #include <asm/code-patching.h>
+#include <asm/interrupt.h>
 
 #ifdef CONFIG_PPC64
 #include "internal.h"
@@ -168,7 +169,7 @@ static bool regs_use_siar(struct pt_regs *regs)
         * they have not been setup using perf_read_regs() and so regs->result
         * is something random.
         */
-       return ((TRAP(regs) == 0xf00) && regs->result);
+       return ((TRAP(regs) == INTERRUPT_PERFMON) && regs->result);
 }
 
 /*
@@ -347,7 +348,7 @@ static inline void perf_read_regs(struct pt_regs *regs)
         * hypervisor samples as well as samples in the kernel with
         * interrupts off hence the userspace check.
         */
-       if (TRAP(regs) != 0xf00)
+       if (TRAP(regs) != INTERRUPT_PERFMON)
                use_siar = 0;
        else if ((ppmu->flags & PPMU_NO_SIAR))
                use_siar = 0;
@@ -1963,6 +1964,17 @@ static int power_pmu_event_init(struct perf_event *event)
                return -ENOENT;
        }
 
+       /*
+        * PMU config registers have fields that are
+        * reserved and some specific values for bit fields are reserved.
+        * For ex., MMCRA[61:62] is Randome Sampling Mode (SM)
+        * and value of 0b11 to this field is reserved.
+        * Check for invalid values in attr.config.
+        */
+       if (ppmu->check_attr_config &&
+           ppmu->check_attr_config(event))
+               return -EINVAL;
+
        event->hw.config_base = ev;
        event->hw.idx = 0;
 
@@ -2206,9 +2218,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
                                                ppmu->get_mem_data_src)
                        ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
 
-               if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
+               if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
                                                ppmu->get_mem_weight)
-                       ppmu->get_mem_weight(&data.weight.full);
+                       ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
 
                if (perf_event_overflow(event, &data, regs))
                        power_pmu_stop(event, 0);
index e5eb332..1816f56 100644 (file)
@@ -226,14 +226,14 @@ static struct attribute_group event_long_desc_group = {
 
 static struct kmem_cache *hv_page_cache;
 
-DEFINE_PER_CPU(int, hv_24x7_txn_flags);
-DEFINE_PER_CPU(int, hv_24x7_txn_err);
+static DEFINE_PER_CPU(int, hv_24x7_txn_flags);
+static DEFINE_PER_CPU(int, hv_24x7_txn_err);
 
 struct hv_24x7_hw {
        struct perf_event *events[255];
 };
 
-DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
+static DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
 
 /*
  * request_buffer and result_buffer are not required to be 4k aligned,
@@ -241,8 +241,8 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
  * the simplest way to ensure that.
  */
 #define H24x7_DATA_BUFFER_SIZE 4096
-DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
-DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+static DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+static DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
 
 static unsigned int max_num_requests(int interface_version)
 {
index e4f577d..f92bf5f 100644 (file)
@@ -21,7 +21,7 @@ PMU_FORMAT_ATTR(thresh_stop,  "config:32-35");
 PMU_FORMAT_ATTR(thresh_start,  "config:36-39");
 PMU_FORMAT_ATTR(thresh_cmp,    "config:40-49");
 
-struct attribute *isa207_pmu_format_attr[] = {
+static struct attribute *isa207_pmu_format_attr[] = {
        &format_attr_event.attr,
        &format_attr_pmcxsel.attr,
        &format_attr_mark.attr,
@@ -275,17 +275,47 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
 
        sier = mfspr(SPRN_SIER);
        val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
-       if (val == 1 || val == 2) {
-               idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT;
-               sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT;
+       if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31)))
+               return;
+
+       idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT;
+       sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT;
+
+       dsrc->val = isa207_find_source(idx, sub_idx);
+       if (val == 7) {
+               u64 mmcra;
+               u32 op_type;
 
-               dsrc->val = isa207_find_source(idx, sub_idx);
+               /*
+                * Type 0b111 denotes either larx or stcx instruction. Use the
+                * MMCRA sampling bits [57:59] along with the type value
+                * to determine the exact instruction type. If the sampling
+                * criteria is neither load or store, set the type as default
+                * to NA.
+                */
+               mmcra = mfspr(SPRN_MMCRA);
+
+               op_type = (mmcra >> MMCRA_SAMP_ELIG_SHIFT) & MMCRA_SAMP_ELIG_MASK;
+               switch (op_type) {
+               case 5:
+                       dsrc->val |= P(OP, LOAD);
+                       break;
+               case 7:
+                       dsrc->val |= P(OP, STORE);
+                       break;
+               default:
+                       dsrc->val |= P(OP, NA);
+                       break;
+               }
+       } else {
                dsrc->val |= (val == 1) ? P(OP, LOAD) : P(OP, STORE);
        }
 }
 
-void isa207_get_mem_weight(u64 *weight)
+void isa207_get_mem_weight(u64 *weight, u64 type)
 {
+       union perf_sample_weight *weight_fields;
+       u64 weight_lat;
        u64 mmcra = mfspr(SPRN_MMCRA);
        u64 exp = MMCRA_THR_CTR_EXP(mmcra);
        u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
@@ -295,10 +325,31 @@ void isa207_get_mem_weight(u64 *weight)
        if (cpu_has_feature(CPU_FTR_ARCH_31))
                mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);
 
-       if (val == 0 || val == 7)
-               *weight = 0;
+       if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31)))
+               weight_lat = 0;
        else
-               *weight = mantissa << (2 * exp);
+               weight_lat = mantissa << (2 * exp);
+
+       /*
+        * Use 64 bit weight field (full) if sample type is
+        * WEIGHT.
+        *
+        * if sample type is WEIGHT_STRUCT:
+        * - store memory latency in the lower 32 bits.
+        * - For ISA v3.1, use remaining two 16 bit fields of
+        *   perf_sample_weight to store cycle counter values
+        *   from sier2.
+        */
+       weight_fields = (union perf_sample_weight *)weight;
+       if (type & PERF_SAMPLE_WEIGHT)
+               weight_fields->full = weight_lat;
+       else {
+               weight_fields->var1_dw = (u32)weight_lat;
+               if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+                       weight_fields->var2_w = P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2));
+                       weight_fields->var3_w = P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2));
+               }
+       }
 }
 
 int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1)
@@ -447,8 +498,8 @@ ebb_bhrb:
         * EBB events are pinned & exclusive, so this should never actually
         * hit, but we leave it as a fallback in case.
         */
-       mask  |= CNST_EBB_VAL(ebb);
-       value |= CNST_EBB_MASK;
+       mask  |= CNST_EBB_MASK;
+       value |= CNST_EBB_VAL(ebb);
 
        *maskp = mask;
        *valp = value;
@@ -694,3 +745,45 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
 
        return num_alt;
 }
+
+int isa3XX_check_attr_config(struct perf_event *ev)
+{
+       u64 val, sample_mode;
+       u64 event = ev->attr.config;
+
+       val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+       sample_mode = val & 0x3;
+
+       /*
+        * MMCRA[61:62] is Random Sampling Mode (SM).
+        * value of 0b11 is reserved.
+        */
+       if (sample_mode == 0x3)
+               return -EINVAL;
+
+       /*
+        * Check for all reserved value
+        * Source: Performance Monitoring Unit User Guide
+        */
+       switch (val) {
+       case 0x5:
+       case 0x9:
+       case 0xD:
+       case 0x19:
+       case 0x1D:
+       case 0x1A:
+       case 0x1E:
+               return -EINVAL;
+       }
+
+       /*
+        * MMCRA[48:51]/[52:55]) Threshold Start/Stop
+        * Events Selection.
+        * 0b11110000/0b00001111 is reserved.
+        */
+       val = (event >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;
+       if (((val & 0xF0) == 0xF0) || ((val & 0xF) == 0xF))
+               return -EINVAL;
+
+       return 0;
+}
index 1af0e8c..4a2cbc3 100644 (file)
 /* Bits in MMCRA for PowerISA v2.07 */
 #define MMCRA_SAMP_MODE_SHIFT          1
 #define MMCRA_SAMP_ELIG_SHIFT          4
+#define MMCRA_SAMP_ELIG_MASK           7
 #define MMCRA_THR_CTL_SHIFT            8
 #define MMCRA_THR_SEL_SHIFT            16
 #define MMCRA_THR_CMP_SHIFT            32
 #define ISA207_SIER_DATA_SRC_SHIFT     53
 #define ISA207_SIER_DATA_SRC_MASK      (0x7ull << ISA207_SIER_DATA_SRC_SHIFT)
 
+/* Bits in SIER2/SIER3 for Power10 */
+#define P10_SIER2_FINISH_CYC(sier2)    (((sier2) >> (63 - 37)) & 0x7fful)
+#define P10_SIER2_DISPATCH_CYC(sier2)  (((sier2) >> (63 - 13)) & 0x7fful)
+
 #define P(a, b)                                PERF_MEM_S(a, b)
 #define PH(a, b)                       (P(LVL, HIT) | P(a, b))
 #define PM(a, b)                       (P(LVL, MISS) | P(a, b))
@@ -278,6 +283,8 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
                                        const unsigned int ev_alt[][MAX_ALT]);
 void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
                                                        struct pt_regs *regs);
-void isa207_get_mem_weight(u64 *weight);
+void isa207_get_mem_weight(u64 *weight, u64 type);
+
+int isa3XX_check_attr_config(struct perf_event *ev);
 
 #endif
index e45dafe..93be719 100644 (file)
@@ -75,5 +75,5 @@ EVENT(PM_RUN_INST_CMPL_ALT,                   0x00002);
  *     thresh end (TE)
  */
 
-EVENT(MEM_LOADS,                               0x34340401e0);
-EVENT(MEM_STORES,                              0x343c0401e0);
+EVENT(MEM_LOADS,                               0x35340401e0);
+EVENT(MEM_STORES,                              0x353c0401e0);
index a901c13..f9d64c6 100644 (file)
@@ -106,6 +106,18 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
        return num_alt;
 }
 
+static int power10_check_attr_config(struct perf_event *ev)
+{
+       u64 val;
+       u64 event = ev->attr.config;
+
+       val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+       if (val == 0x10 || isa3XX_check_attr_config(ev))
+               return -EINVAL;
+
+       return 0;
+}
+
 GENERIC_EVENT_ATTR(cpu-cycles,                 PM_RUN_CYC);
 GENERIC_EVENT_ATTR(instructions,               PM_RUN_INST_CMPL);
 GENERIC_EVENT_ATTR(branch-instructions,                PM_BR_CMPL);
@@ -559,6 +571,7 @@ static struct power_pmu power10_pmu = {
        .attr_groups            = power10_pmu_attr_groups,
        .bhrb_nr                = 32,
        .capabilities           = PERF_PMU_CAP_EXTENDED_REGS,
+       .check_attr_config      = power10_check_attr_config,
 };
 
 int init_power10_pmu(void)
index 2a57e93..ff33821 100644 (file)
@@ -151,6 +151,18 @@ static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
        return num_alt;
 }
 
+static int power9_check_attr_config(struct perf_event *ev)
+{
+       u64 val;
+       u64 event = ev->attr.config;
+
+       val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+       if (val == 0xC || isa3XX_check_attr_config(ev))
+               return -EINVAL;
+
+       return 0;
+}
+
 GENERIC_EVENT_ATTR(cpu-cycles,                 PM_CYC);
 GENERIC_EVENT_ATTR(stalled-cycles-frontend,    PM_ICT_NOSLOT_CYC);
 GENERIC_EVENT_ATTR(stalled-cycles-backend,     PM_CMPLU_STALL);
@@ -437,6 +449,7 @@ static struct power_pmu power9_pmu = {
        .attr_groups            = power9_pmu_attr_groups,
        .bhrb_nr                = 32,
        .capabilities           = PERF_PMU_CAP_EXTENDED_REGS,
+       .check_attr_config      = power9_check_attr_config,
 };
 
 int init_power9_pmu(void)
index 7d41e92..83975ef 100644 (file)
@@ -5,7 +5,7 @@ config PPC_47x
        select MPIC
        help
          This option enables support for the 47x family of processors and is
-         not currently compatible with other 44x or 46x varients
+         not currently compatible with other 44x or 46x variants
 
 config BAMBOO
        bool "Bamboo"
index 11475c5..afee8b1 100644 (file)
@@ -181,7 +181,7 @@ sram_code:
   udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */
        mullw   r12, r12, r11
        mftb    r13     /* start */
-       addi    r12, r13, r12 /* end */
+       add     r12, r13, r12 /* end */
     1:
        mftb    r13     /* current */
        cmp     cr0, r13, r12
index 3ce9075..e4b0566 100644 (file)
@@ -101,6 +101,8 @@ config PPC_BOOK3S_64
        select ARCH_SUPPORTS_NUMA_BALANCING
        select IRQ_WORK
        select PPC_MM_SLICES
+       select PPC_HAVE_KUEP
+       select PPC_HAVE_KUAP
 
 config PPC_BOOK3E_64
        bool "Embedded processors"
@@ -306,6 +308,7 @@ config PHYS_64BIT
 config ALTIVEC
        bool "AltiVec Support"
        depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 || (PPC_E500MC && PPC64)
+       select PPC_FPU
        help
          This option enables kernel support for the Altivec extensions to the
          PowerPC processor. The kernel currently supports saving and restoring
@@ -363,8 +366,6 @@ config PPC_RADIX_MMU
        bool "Radix MMU Support"
        depends on PPC_BOOK3S_64
        select ARCH_HAS_GIGANTIC_PAGE
-       select PPC_HAVE_KUEP
-       select PPC_HAVE_KUAP
        default y
        help
          Enable support for the Power ISA 3.0 Radix style MMU. Currently this
index 2124831..fa08699 100644 (file)
@@ -486,7 +486,8 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
        window->table.it_size = size >> window->table.it_page_shift;
        window->table.it_ops = &cell_iommu_ops;
 
-       iommu_init_table(&window->table, iommu->nid, 0, 0);
+       if (!iommu_init_table(&window->table, iommu->nid, 0, 0))
+               panic("Failed to initialize iommu table");
 
        pr_debug("\tioid      %d\n", window->ioid);
        pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
index abdef9b..fe0d879 100644 (file)
@@ -35,9 +35,9 @@
  */
 
 static void *spu_syscall_table[] = {
+#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry)
 #define __SYSCALL(nr, entry) [nr] = entry,
 #include <asm/syscall_table_spu.h>
-#undef __SYSCALL
 };
 
 long spu_sys_callback(struct spu_syscall_block *s)
index 8c421dc..76e6256 100644 (file)
@@ -131,8 +131,7 @@ static struct pci_ops rtas_pci_ops =
 
 volatile struct Hydra __iomem *Hydra = NULL;
 
-int __init
-hydra_init(void)
+static int __init hydra_init(void)
 {
        struct device_node *np;
        struct resource r;
index c192096..4c6d703 100644 (file)
@@ -71,11 +71,6 @@ config MPC10X_BRIDGE
        bool
        select PPC_INDIRECT_PCI
 
-config MV64X60
-       bool
-       select PPC_INDIRECT_PCI
-       select CHECK_CACHE_COHERENCY
-
 config GAMECUBE_COMMON
        bool
 
index a20b957..37875e4 100644 (file)
@@ -34,7 +34,7 @@ static struct pci_controller *u3_agp, *u3_ht, *u4_pcie;
 
 static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
 {
-       for (; node != 0;node = node->sibling) {
+       for (; nodenode = node->sibling) {
                const int *bus_range;
                const unsigned int *class_code;
                int len;
index b500a6e..5be7242 100644 (file)
@@ -146,7 +146,9 @@ static void iommu_table_iobmap_setup(void)
         */
        iommu_table_iobmap.it_blocksize = 4;
        iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops;
-       iommu_init_table(&iommu_table_iobmap, 0, 0, 0);
+       if (!iommu_init_table(&iommu_table_iobmap, 0, 0, 0))
+               panic("Failed to initialize iommu table");
+
        pr_debug(" <- %s\n", __func__);
 }
 
index 019669e..71c1262 100644 (file)
@@ -46,10 +46,26 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
        return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
 }
 
+static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+       struct memtrace_entry *ent = filp->private_data;
+
+       if (ent->size < vma->vm_end - vma->vm_start)
+               return -EINVAL;
+
+       if (vma->vm_pgoff << PAGE_SHIFT >= ent->size)
+               return -EINVAL;
+
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+       return remap_pfn_range(vma, vma->vm_start, PHYS_PFN(ent->start) + vma->vm_pgoff,
+                              vma->vm_end - vma->vm_start, vma->vm_page_prot);
+}
+
 static const struct file_operations memtrace_fops = {
        .llseek = default_llseek,
        .read   = memtrace_read,
        .open   = simple_open,
+       .mmap   = memtrace_mmap,
 };
 
 #define FLUSH_CHUNK_SIZE SZ_1G
@@ -187,7 +203,7 @@ static int memtrace_init_debugfs(void)
                dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
 
                ent->dir = dir;
-               debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
+               debugfs_create_file_unsafe("trace", 0600, dir, ent, &memtrace_fops);
                debugfs_create_x64("start", 0400, dir, &ent->start);
                debugfs_create_x64("size", 0400, dir, &ent->size);
        }
index 0d9ba70..5b9736b 100644 (file)
@@ -71,7 +71,7 @@ static LIST_HEAD(opalcore_list);
 static struct opalcore_config *oc_conf;
 static const struct opal_mpipl_fadump *opalc_metadata;
 static const struct opal_mpipl_fadump *opalc_cpu_metadata;
-struct kobject *mpipl_kobj;
+static struct kobject *mpipl_kobj;
 
 /*
  * Set crashing CPU's signal to SIGUSR1. if the kernel is triggered
index deddaeb..a191f4c 100644 (file)
@@ -105,7 +105,6 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
 {
        size_t addr, size;
        pgprot_t page_prot;
-       int rc;
 
        pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n",
                        vma->vm_start, vma->vm_end, vma->vm_pgoff,
@@ -121,10 +120,8 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
        page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
                                         size, vma->vm_page_prot);
 
-       rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
+       return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
                                page_prot);
-
-       return rc;
 }
 
 static bool opal_msg_queue_empty(void)
index f0f9016..66c3c33 100644 (file)
@@ -1762,7 +1762,8 @@ found:
        tbl->it_ops = &pnv_ioda1_iommu_ops;
        pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift;
        pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
-       iommu_init_table(tbl, phb->hose->node, 0, 0);
+       if (!iommu_init_table(tbl, phb->hose->node, 0, 0))
+               panic("Failed to initialize iommu table");
 
        pe->dma_setup_done = true;
        return;
@@ -1930,16 +1931,16 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
                res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift;
                res_end = min(window_size, SZ_4G) >> tbl->it_page_shift;
        }
-       iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end);
 
-       rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+       if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end))
+               rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+       else
+               rc = -ENOMEM;
        if (rc) {
-               pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
-                               rc);
+               pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", rc);
                iommu_tce_table_put(tbl);
-               return rc;
+               tbl = NULL; /* This clears iommu_table_base below */
        }
-
        if (!pnv_iommu_bypass_disabled)
                pnv_pci_ioda2_set_bypass(pe, true);
 
index aadf932..a8db3f1 100644 (file)
@@ -157,7 +157,7 @@ static void __init pnv_check_guarded_cores(void)
        for_each_node_by_type(dn, "cpu") {
                if (of_property_match_string(dn, "status", "bad") >= 0)
                        bad_count++;
-       };
+       }
 
        if (bad_count) {
                printk("  _     _______________\n");
index 233503f..3ac7079 100644 (file)
@@ -329,6 +329,20 @@ int dlpar_release_drc(u32 drc_index)
        return 0;
 }
 
+int dlpar_unisolate_drc(u32 drc_index)
+{
+       int dr_status, rc;
+
+       rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+                               DR_ENTITY_SENSE, drc_index);
+       if (rc || dr_status != DR_ENTITY_PRESENT)
+               return -1;
+
+       rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+
+       return 0;
+}
+
 int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 {
        int rc;
index 12cbffd..7e970f8 100644 (file)
@@ -47,9 +47,6 @@ static void rtas_stop_self(void)
 
        BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
 
-       printk("cpu %u (hwid %u) Ready to die...\n",
-              smp_processor_id(), hard_smp_processor_id());
-
        rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
 
        panic("Alas, I survived.\n");
@@ -271,6 +268,19 @@ static int dlpar_offline_cpu(struct device_node *dn)
                        if (!cpu_online(cpu))
                                break;
 
+                       /*
+                        * device_offline() will return -EBUSY (via cpu_down()) if there
+                        * is only one CPU left. Check it here to fail earlier and with a
+                        * more informative error message, while also retaining the
+                        * cpu_add_remove_lock to be sure that no CPUs are being
+                        * online/offlined during this check.
+                        */
+                       if (num_online_cpus() == 1) {
+                               pr_warn("Unable to remove last online CPU %pOFn\n", dn);
+                               rc = -EBUSY;
+                               goto out_unlock;
+                       }
+
                        cpu_maps_update_done();
                        rc = device_offline(get_cpu_device(cpu));
                        if (rc)
@@ -283,6 +293,7 @@ static int dlpar_offline_cpu(struct device_node *dn)
                                thread);
                }
        }
+out_unlock:
        cpu_maps_update_done();
 
 out:
@@ -802,8 +813,16 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
        case PSERIES_HP_ELOG_ACTION_REMOVE:
                if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
                        rc = dlpar_cpu_remove_by_count(count);
-               else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+               else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
                        rc = dlpar_cpu_remove_by_index(drc_index);
+                       /*
+                        * Setting the isolation state of an UNISOLATED/CONFIGURED
+                        * device to UNISOLATE is a no-op, but the hypervisor can
+                        * use it as a hint that the CPU removal failed.
+                        */
+                       if (rc)
+                               dlpar_unisolate_drc(drc_index);
+               }
                else
                        rc = -EINVAL;
                break;
index 2c59b49..3a50612 100644 (file)
@@ -26,7 +26,7 @@ struct hcall_stats {
 };
 #define HCALL_STAT_ARRAY_SIZE  ((MAX_HCALL_OPCODE >> 2) + 1)
 
-DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
+static DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
 
 /*
  * Routines for displaying the statistics in debugfs
index 9fc5217..0c55b99 100644 (file)
@@ -638,7 +638,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 
        iommu_table_setparms(pci->phb, dn, tbl);
        tbl->it_ops = &iommu_table_pseries_ops;
-       iommu_init_table(tbl, pci->phb->node, 0, 0);
+       if (!iommu_init_table(tbl, pci->phb->node, 0, 0))
+               panic("Failed to initialize iommu table");
 
        /* Divide the rest (1.75GB) among the children */
        pci->phb->dma_window_size = 0x80000000ul;
@@ -720,7 +721,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
                iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
                                ppci->table_group, dma_window);
                tbl->it_ops = &iommu_table_lpar_multi_ops;
-               iommu_init_table(tbl, ppci->phb->node, 0, 0);
+               if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
+                       panic("Failed to initialize iommu table");
                iommu_register_group(ppci->table_group,
                                pci_domain_nr(bus), 0);
                pr_debug("  created table: %p\n", ppci->table_group);
@@ -749,7 +751,9 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
                tbl = PCI_DN(dn)->table_group->tables[0];
                iommu_table_setparms(phb, dn, tbl);
                tbl->it_ops = &iommu_table_pseries_ops;
-               iommu_init_table(tbl, phb->node, 0, 0);
+               if (!iommu_init_table(tbl, phb->node, 0, 0))
+                       panic("Failed to initialize iommu table");
+
                set_iommu_table_base(&dev->dev, tbl);
                return;
        }
@@ -1099,6 +1103,33 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
                         ret);
 }
 
+/* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */
+static int iommu_get_page_shift(u32 query_page_size)
+{
+       /* Supported IO page-sizes according to LoPAR */
+       const int shift[] = {
+               __builtin_ctzll(SZ_4K),   __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M),
+               __builtin_ctzll(SZ_32M),  __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M),
+               __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G)
+       };
+
+       int i = ARRAY_SIZE(shift) - 1;
+
+       /*
+        * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field:
+        * - bit 31 means 4k pages are supported,
+        * - bit 30 means 64k pages are supported, and so on.
+        * Larger pagesizes map more memory with the same amount of TCEs, so start probing them.
+        */
+       for (; i >= 0 ; i--) {
+               if (query_page_size & (1 << i))
+                       return shift[i];
+       }
+
+       /* No valid page size found. */
+       return 0;
+}
+
 /*
  * If the PE supports dynamic dma windows, and there is space for a table
  * that can map all pages in a linear offset, then setup such a table,
@@ -1206,13 +1237,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
                        goto out_failed;
                }
        }
-       if (query.page_size & 4) {
-               page_shift = 24; /* 16MB */
-       } else if (query.page_size & 2) {
-               page_shift = 16; /* 64kB */
-       } else if (query.page_size & 1) {
-               page_shift = 12; /* 4kB */
-       } else {
+
+       page_shift = iommu_get_page_shift(query.page_size);
+       if (!page_shift) {
                dev_dbg(&dev->dev, "no supported direct page size in mask %x",
                          query.page_size);
                goto out_failed;
@@ -1229,7 +1256,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
        if (pmem_present) {
                if (query.largest_available_block >=
                    (1ULL << (MAX_PHYSMEM_BITS - page_shift)))
-                       len = MAX_PHYSMEM_BITS - page_shift;
+                       len = MAX_PHYSMEM_BITS;
                else
                        dev_info(&dev->dev, "Skipping ibm,pmemory");
        }
index 3805519..1f3152a 100644 (file)
@@ -977,11 +977,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
        slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
        BUG_ON(slot == -1);
 
-       flags = newpp & 7;
+       flags = newpp & (HPTE_R_PP | HPTE_R_N);
        if (mmu_has_feature(MMU_FTR_KERNEL_RO))
                /* Move pp0 into bit 8 (IBM 55) */
                flags |= (newpp & HPTE_R_PP0) >> 55;
 
+       flags |= ((newpp & HPTE_R_KEY_HI) >> 48) | (newpp & HPTE_R_KEY_LO);
+
        lpar_rc = plpar_pte_protect(flags, slot, 0);
 
        BUG_ON(lpar_rc != H_SUCCESS);
@@ -1630,7 +1632,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
                }
                msleep(delay);
                rc = plpar_resize_hpt_prepare(0, shift);
-       };
+       }
 
        switch (rc) {
        case H_SUCCESS:
index e278390..f71eac7 100644 (file)
@@ -537,6 +537,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
        parse_em_data(m);
        maxmem_data(m);
 
+       seq_printf(m, "security_flavor=%u\n", pseries_security_flavor);
+
        return 0;
 }
 
index 835163f..ef26fe4 100644 (file)
@@ -93,6 +93,7 @@ struct papr_scm_priv {
        uint64_t block_size;
        int metadata_size;
        bool is_volatile;
+       bool hcall_flush_required;
 
        uint64_t bound_addr;
 
@@ -117,6 +118,38 @@ struct papr_scm_priv {
        size_t stat_buffer_len;
 };
 
+static int papr_scm_pmem_flush(struct nd_region *nd_region,
+                              struct bio *bio __maybe_unused)
+{
+       struct papr_scm_priv *p = nd_region_provider_data(nd_region);
+       unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0;
+       long rc;
+
+       dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index);
+
+       do {
+               rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token);
+               token = ret_buf[0];
+
+               /* Check if we are stalled for some time */
+               if (H_IS_LONG_BUSY(rc)) {
+                       msleep(get_longbusy_msecs(rc));
+                       rc = H_BUSY;
+               } else if (rc == H_BUSY) {
+                       cond_resched();
+               }
+       } while (rc == H_BUSY);
+
+       if (rc) {
+               dev_err(&p->pdev->dev, "flush error: %ld", rc);
+               rc = -EIO;
+       } else {
+               dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index);
+       }
+
+       return rc;
+}
+
 static LIST_HEAD(papr_nd_regions);
 static DEFINE_MUTEX(papr_ndr_lock);
 
@@ -914,6 +947,15 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
        dimm_flags = 0;
        set_bit(NDD_LABELING, &dimm_flags);
 
+       /*
+        * Check if the nvdimm is unarmed. No locking needed as we are still
+        * initializing. Ignore error encountered if any.
+        */
+       __drc_pmem_query_health(p);
+
+       if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK)
+               set_bit(NDD_UNARMED, &dimm_flags);
+
        p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups,
                                  dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
        if (!p->nvdimm) {
@@ -943,6 +985,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
        ndr_desc.num_mappings = 1;
        ndr_desc.nd_set = &p->nd_set;
 
+       if (p->hcall_flush_required) {
+               set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+               ndr_desc.flush = papr_scm_pmem_flush;
+       }
+
        if (p->is_volatile)
                p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
        else {
@@ -1088,6 +1135,7 @@ static int papr_scm_probe(struct platform_device *pdev)
        p->block_size = block_size;
        p->blocks = blocks;
        p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
+       p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
 
        /* We just need to ensure that set cookies are unique across */
        uuid_parse(uuid_str, (uuid_t *) uuid);
index f9ae17e..a8f9140 100644 (file)
@@ -50,6 +50,7 @@ EXPORT_SYMBOL_GPL(init_phb_dynamic);
 int remove_phb_dynamic(struct pci_controller *phb)
 {
        struct pci_bus *b = phb->bus;
+       struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge);
        struct resource *res;
        int rc, i;
 
@@ -76,7 +77,8 @@ int remove_phb_dynamic(struct pci_controller *phb)
        /* Remove the PCI bus and unregister the bridge device from sysfs */
        phb->bus = NULL;
        pci_remove_bus(b);
-       device_unregister(b->bridge);
+       host_bridge->bus = NULL;
+       device_unregister(&host_bridge->dev);
 
        /* Now release the IO resource */
        if (res->flags & IORESOURCE_IO)
index e1dc5d3..439ac72 100644 (file)
@@ -139,7 +139,7 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
        return rc;
 }
 
-const struct of_device_id drc_pmem_match[] = {
+static const struct of_device_id drc_pmem_match[] = {
        { .type = "ibm,persistent-memory", },
        {}
 };
index 4fe48c0..1f051a7 100644 (file)
@@ -43,9 +43,6 @@ extern void pSeries_final_fixup(void);
 /* Poweron flag used for enabling auto ups restart */
 extern unsigned long rtas_poweron_auto;
 
-/* Provided by HVC VIO */
-extern void hvc_vio_init_early(void);
-
 /* Dynamic logical Partitioning/Mobility */
 extern void dlpar_free_cc_nodes(struct device_node *);
 extern void dlpar_free_cc_property(struct property *);
@@ -55,6 +52,7 @@ extern int dlpar_attach_node(struct device_node *, struct device_node *);
 extern int dlpar_detach_node(struct device_node *);
 extern int dlpar_acquire_drc(u32 drc_index);
 extern int dlpar_release_drc(u32 drc_index);
+extern int dlpar_unisolate_drc(u32 drc_index);
 
 void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog);
 int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog);
@@ -111,6 +109,7 @@ static inline unsigned long cmo_get_page_size(void)
 
 int dlpar_workqueue_init(void);
 
+extern u32 pseries_security_flavor;
 void pseries_setup_security_mitigations(void);
 void pseries_lpar_read_hblkrm_characteristics(void);
 
index f8b390a..9d4ef65 100644 (file)
@@ -699,7 +699,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
                mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
                break;
        case MC_ERROR_TYPE_I_CACHE:
-               mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
+               mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
                break;
        case MC_ERROR_TYPE_UNKNOWN:
        default:
index 8134390..f8f73b4 100644 (file)
@@ -247,7 +247,7 @@ static inline int rtas_fadump_gpr_index(u64 id)
        return i;
 }
 
-void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
+static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
 {
        int i;
 
index 46e1540..754e493 100644 (file)
@@ -71,6 +71,7 @@
 #include <asm/swiotlb.h>
 #include <asm/svm.h>
 #include <asm/dtl.h>
+#include <asm/hvconsole.h>
 
 #include "pseries.h"
 #include "../../../../drivers/pci/pci.h"
@@ -85,6 +86,7 @@ EXPORT_SYMBOL(CMO_PageSize);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 int ibm_nmi_interlock_token;
+u32 pseries_security_flavor;
 
 static void pSeries_show_cpuinfo(struct seq_file *m)
 {
@@ -534,9 +536,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
        /*
         * The features below are enabled by default, so we instead look to see
         * if firmware has *disabled* them, and clear them if so.
+        * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if
+        * H_CPU_BEHAV_FAVOUR_SECURITY is.
         */
        if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
                security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+       else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H)
+               pseries_security_flavor = 1;
+       else
+               pseries_security_flavor = 2;
 
        if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
                security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
index 9cb4fc8..e00f372 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/mm.h>
 #include <linux/dma-map-ops.h>
 #include <linux/kobject.h>
+#include <linux/kexec.h>
 
 #include <asm/iommu.h>
 #include <asm/dma.h>
@@ -1278,6 +1279,20 @@ static int vio_bus_remove(struct device *dev)
        return 0;
 }
 
+static void vio_bus_shutdown(struct device *dev)
+{
+       struct vio_dev *viodev = to_vio_dev(dev);
+       struct vio_driver *viodrv;
+
+       if (dev->driver) {
+               viodrv = to_vio_driver(dev->driver);
+               if (viodrv->shutdown)
+                       viodrv->shutdown(viodev);
+               else if (kexec_in_progress)
+                       vio_bus_remove(dev);
+       }
+}
+
 /**
  * vio_register_driver: - Register a new vio driver
  * @viodrv:    The vio_driver structure to be registered.
@@ -1285,6 +1300,10 @@ static int vio_bus_remove(struct device *dev)
 int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
                          const char *mod_name)
 {
+       // vio_bus_type is only initialised for pseries
+       if (!machine_is(pseries))
+               return -ENODEV;
+
        pr_debug("%s: driver %s registering\n", __func__, viodrv->name);
 
        /* fill in 'struct driver' fields */
@@ -1613,6 +1632,7 @@ struct bus_type vio_bus_type = {
        .match = vio_bus_match,
        .probe = vio_bus_probe,
        .remove = vio_bus_remove,
+       .shutdown = vio_bus_shutdown,
 };
 
 /**
index d956b8a..b35837c 100644 (file)
@@ -12,7 +12,6 @@
 #include <asm/asm-compat.h>
 #include <asm/crashdump-ppc64.h>
 
-       .machine ppc64
        .balign 256
        .globl purgatory_start
 purgatory_start:
index 6b4a34b..1d33b7a 100644 (file)
@@ -344,7 +344,8 @@ static void iommu_table_dart_setup(void)
        iommu_table_dart.it_index = 0;
        iommu_table_dart.it_blocksize = 1;
        iommu_table_dart.it_ops = &iommu_dart_ops;
-       iommu_init_table(&iommu_table_dart, -1, 0, 0);
+       if (!iommu_init_table(&iommu_table_dart, -1, 0, 0))
+               panic("Failed to initialize iommu table");
 
        /* Reserve the last page of the DART to avoid possible prefetch
         * past the DART mapped area
index 040b9d0..69af737 100644 (file)
@@ -455,7 +455,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
        }
 }
 
-static void __init setup_pci_cmd(struct pci_controller *hose)
+static void setup_pci_cmd(struct pci_controller *hose)
 {
        u16 cmd;
        int cap_x;
index 595310e..5046970 100644 (file)
@@ -63,8 +63,19 @@ static const struct xive_ops *xive_ops;
 static struct irq_domain *xive_irq_domain;
 
 #ifdef CONFIG_SMP
-/* The IPIs all use the same logical irq number */
-static u32 xive_ipi_irq;
+/* The IPIs use the same logical irq number when on the same chip */
+static struct xive_ipi_desc {
+       unsigned int irq;
+       char name[16];
+} *xive_ipis;
+
+/*
+ * Use early_cpu_to_node() for hot-plugged CPUs
+ */
+static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu)
+{
+       return xive_ipis[early_cpu_to_node(cpu)].irq;
+}
 #endif
 
 /* Xive state for each CPU */
@@ -253,17 +264,20 @@ notrace void xmon_xive_do_dump(int cpu)
        xmon_printf("\n");
 }
 
+static struct irq_data *xive_get_irq_data(u32 hw_irq)
+{
+       unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq);
+
+       return irq ? irq_get_irq_data(irq) : NULL;
+}
+
 int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
 {
-       struct irq_chip *chip = irq_data_get_irq_chip(d);
        int rc;
        u32 target;
        u8 prio;
        u32 lirq;
 
-       if (!is_xive_irq(chip))
-               return -EINVAL;
-
        rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
        if (rc) {
                xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
@@ -273,6 +287,9 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
        xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
                    hw_irq, target, prio, lirq);
 
+       if (!d)
+               d = xive_get_irq_data(hw_irq);
+
        if (d) {
                struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
                u64 val = xive_esb_read(xd, XIVE_ESB_GET);
@@ -289,6 +306,20 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
        return 0;
 }
 
+void xmon_xive_get_irq_all(void)
+{
+       unsigned int i;
+       struct irq_desc *desc;
+
+       for_each_irq_desc(i, desc) {
+               struct irq_data *d = irq_desc_get_irq_data(desc);
+               unsigned int hwirq = (unsigned int)irqd_to_hwirq(d);
+
+               if (d->domain == xive_irq_domain)
+                       xmon_xive_get_irq_config(hwirq, d);
+       }
+}
+
 #endif /* CONFIG_XMON */
 
 static unsigned int xive_get_irq(void)
@@ -1067,28 +1098,94 @@ static struct irq_chip xive_ipi_chip = {
        .irq_unmask = xive_ipi_do_nothing,
 };
 
-static void __init xive_request_ipi(void)
+/*
+ * IPIs are marked per-cpu. We use separate HW interrupts under the
+ * hood but associated with the same "linux" interrupt
+ */
+struct xive_ipi_alloc_info {
+       irq_hw_number_t hwirq;
+};
+
+static int xive_ipi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+                                    unsigned int nr_irqs, void *arg)
 {
-       unsigned int virq;
+       struct xive_ipi_alloc_info *info = arg;
+       int i;
 
-       /*
-        * Initialization failed, move on, we might manage to
-        * reach the point where we display our errors before
-        * the system falls appart
-        */
-       if (!xive_irq_domain)
-               return;
+       for (i = 0; i < nr_irqs; i++) {
+               irq_domain_set_info(domain, virq + i, info->hwirq + i, &xive_ipi_chip,
+                                   domain->host_data, handle_percpu_irq,
+                                   NULL, NULL);
+       }
+       return 0;
+}
 
-       /* Initialize it */
-       virq = irq_create_mapping(xive_irq_domain, XIVE_IPI_HW_IRQ);
-       xive_ipi_irq = virq;
+static const struct irq_domain_ops xive_ipi_irq_domain_ops = {
+       .alloc  = xive_ipi_irq_domain_alloc,
+};
 
-       WARN_ON(request_irq(virq, xive_muxed_ipi_action,
-                           IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
+static int __init xive_request_ipi(void)
+{
+       struct fwnode_handle *fwnode;
+       struct irq_domain *ipi_domain;
+       unsigned int node;
+       int ret = -ENOMEM;
+
+       fwnode = irq_domain_alloc_named_fwnode("XIVE-IPI");
+       if (!fwnode)
+               goto out;
+
+       ipi_domain = irq_domain_create_linear(fwnode, nr_node_ids,
+                                             &xive_ipi_irq_domain_ops, NULL);
+       if (!ipi_domain)
+               goto out_free_fwnode;
+
+       xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL);
+       if (!xive_ipis)
+               goto out_free_domain;
+
+       for_each_node(node) {
+               struct xive_ipi_desc *xid = &xive_ipis[node];
+               struct xive_ipi_alloc_info info = { node };
+
+               /* Skip nodes without CPUs */
+               if (cpumask_empty(cpumask_of_node(node)))
+                       continue;
+
+               /*
+                * Map one IPI interrupt per node for all cpus of that node.
+                * Since the HW interrupt number doesn't have any meaning,
+                * simply use the node number.
+                */
+               xid->irq = irq_domain_alloc_irqs(ipi_domain, 1, node, &info);
+               if (xid->irq < 0) {
+                       ret = xid->irq;
+                       goto out_free_xive_ipis;
+               }
+
+               snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
+
+               ret = request_irq(xid->irq, xive_muxed_ipi_action,
+                                 IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL);
+
+               WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
+       }
+
+       return ret;
+
+out_free_xive_ipis:
+       kfree(xive_ipis);
+out_free_domain:
+       irq_domain_remove(ipi_domain);
+out_free_fwnode:
+       irq_domain_free_fwnode(fwnode);
+out:
+       return ret;
 }
 
 static int xive_setup_cpu_ipi(unsigned int cpu)
 {
+       unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
        struct xive_cpu *xc;
        int rc;
 
@@ -1131,6 +1228,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
 
 static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc)
 {
+       unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
+
        /* Disable the IPI and free the IRQ data */
 
        /* Already cleaned up ? */
@@ -1178,19 +1277,6 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq,
         */
        irq_clear_status_flags(virq, IRQ_LEVEL);
 
-#ifdef CONFIG_SMP
-       /* IPIs are special and come up with HW number 0 */
-       if (hw == XIVE_IPI_HW_IRQ) {
-               /*
-                * IPIs are marked per-cpu. We use separate HW interrupts under
-                * the hood but associated with the same "linux" interrupt
-                */
-               irq_set_chip_and_handler(virq, &xive_ipi_chip,
-                                        handle_percpu_irq);
-               return 0;
-       }
-#endif
-
        rc = xive_irq_alloc_data(virq, hw);
        if (rc)
                return rc;
@@ -1202,15 +1288,7 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq,
 
 static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq)
 {
-       struct irq_data *data = irq_get_irq_data(virq);
-       unsigned int hw_irq;
-
-       /* XXX Assign BAD number */
-       if (!data)
-               return;
-       hw_irq = (unsigned int)irqd_to_hwirq(data);
-       if (hw_irq != XIVE_IPI_HW_IRQ)
-               xive_irq_free_data(virq);
+       xive_irq_free_data(virq);
 }
 
 static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct,
@@ -1335,17 +1413,14 @@ static int xive_prepare_cpu(unsigned int cpu)
 
        xc = per_cpu(xive_cpu, cpu);
        if (!xc) {
-               struct device_node *np;
-
                xc = kzalloc_node(sizeof(struct xive_cpu),
                                  GFP_KERNEL, cpu_to_node(cpu));
                if (!xc)
                        return -ENOMEM;
-               np = of_get_cpu_node(cpu, NULL);
-               if (np)
-                       xc->chip_id = of_get_ibm_chip_id(np);
-               of_node_put(np);
                xc->hw_ipi = XIVE_BAD_IRQ;
+               xc->chip_id = XIVE_INVALID_CHIP_ID;
+               if (xive_ops->prepare_cpu)
+                       xive_ops->prepare_cpu(cpu, xc);
 
                per_cpu(xive_cpu, cpu) = xc;
        }
@@ -1408,13 +1483,12 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
                struct irq_desc *desc = irq_to_desc(irq);
                struct irq_data *d = irq_desc_get_irq_data(desc);
                struct xive_irq_data *xd;
-               unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 
                /*
                 * Ignore anything that isn't a XIVE irq and ignore
                 * IPIs, so can just be dropped.
                 */
-               if (d->domain != xive_irq_domain || hw_irq == XIVE_IPI_HW_IRQ)
+               if (d->domain != xive_irq_domain)
                        continue;
 
                /*
@@ -1592,16 +1666,15 @@ static void xive_debug_show_cpu(struct seq_file *m, int cpu)
        seq_puts(m, "\n");
 }
 
-static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d)
+static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d)
 {
-       struct irq_chip *chip = irq_data_get_irq_chip(d);
+       unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
        int rc;
        u32 target;
        u8 prio;
        u32 lirq;
-
-       if (!is_xive_irq(chip))
-               return;
+       struct xive_irq_data *xd;
+       u64 val;
 
        rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
        if (rc) {
@@ -1612,17 +1685,14 @@ static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data
        seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
                   hw_irq, target, prio, lirq);
 
-       if (d) {
-               struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
-               u64 val = xive_esb_read(xd, XIVE_ESB_GET);
-
-               seq_printf(m, "flags=%c%c%c PQ=%c%c",
-                          xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ',
-                          xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
-                          xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
-                          val & XIVE_ESB_VAL_P ? 'P' : '-',
-                          val & XIVE_ESB_VAL_Q ? 'Q' : '-');
-       }
+       xd = irq_data_get_irq_handler_data(d);
+       val = xive_esb_read(xd, XIVE_ESB_GET);
+       seq_printf(m, "flags=%c%c%c PQ=%c%c",
+                  xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ',
+                  xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
+                  xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
+                  val & XIVE_ESB_VAL_P ? 'P' : '-',
+                  val & XIVE_ESB_VAL_Q ? 'Q' : '-');
        seq_puts(m, "\n");
 }
 
@@ -1640,16 +1710,9 @@ static int xive_core_debug_show(struct seq_file *m, void *private)
 
        for_each_irq_desc(i, desc) {
                struct irq_data *d = irq_desc_get_irq_data(desc);
-               unsigned int hw_irq;
-
-               if (!d)
-                       continue;
-
-               hw_irq = (unsigned int)irqd_to_hwirq(d);
 
-               /* IPIs are special (HW number 0) */
-               if (hw_irq != XIVE_IPI_HW_IRQ)
-                       xive_debug_show_irq(m, hw_irq, d);
+               if (d->domain == xive_irq_domain)
+                       xive_debug_show_irq(m, d);
        }
        return 0;
 }
index 05a800a..57e3f15 100644 (file)
@@ -380,6 +380,11 @@ static void xive_native_update_pending(struct xive_cpu *xc)
        }
 }
 
+static void xive_native_prepare_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+       xc->chip_id = cpu_to_chip_id(cpu);
+}
+
 static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
 {
        s64 rc;
@@ -462,6 +467,7 @@ static const struct xive_ops xive_native_ops = {
        .match                  = xive_native_match,
        .shutdown               = xive_native_shutdown,
        .update_pending         = xive_native_update_pending,
+       .prepare_cpu            = xive_native_prepare_cpu,
        .setup_cpu              = xive_native_setup_cpu,
        .teardown_cpu           = xive_native_teardown_cpu,
        .sync_source            = xive_native_sync_source,
index 01ccc07..f143b6f 100644 (file)
@@ -549,7 +549,7 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
 static bool xive_spapr_match(struct device_node *node)
 {
        /* Ignore cascaded controllers for the moment */
-       return 1;
+       return true;
 }
 
 #ifdef CONFIG_SMP
index 9cf57c7..504e7ed 100644 (file)
@@ -5,8 +5,6 @@
 #ifndef __XIVE_INTERNAL_H
 #define __XIVE_INTERNAL_H
 
-#define XIVE_IPI_HW_IRQ                0 /* interrupt source # for IPIs */
-
 /*
  * A "disabled" interrupt should never fire, to catch problems
  * we set its logical number to this
@@ -46,6 +44,7 @@ struct xive_ops {
                                  u32 *sw_irq);
        int     (*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
        void    (*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
+       void    (*prepare_cpu)(unsigned int cpu, struct xive_cpu *xc);
        void    (*setup_cpu)(unsigned int cpu, struct xive_cpu *xc);
        void    (*teardown_cpu)(unsigned int cpu, struct xive_cpu *xc);
        bool    (*match)(struct device_node *np);
index bf7d696..c8173e9 100644 (file)
@@ -54,6 +54,7 @@
 #include <asm/code-patching.h>
 #include <asm/sections.h>
 #include <asm/inst.h>
+#include <asm/interrupt.h>
 
 #ifdef CONFIG_PPC64
 #include <asm/hvcall.h>
@@ -605,7 +606,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
                         * debugger break (IPI). This is similar to
                         * crash_kexec_secondary().
                         */
-                       if (TRAP(regs) != 0x100 || !wait_for_other_cpus(ncpus))
+                       if (TRAP(regs) !=  INTERRUPT_SYSTEM_RESET || !wait_for_other_cpus(ncpus))
                                smp_send_debugger_break();
 
                        wait_for_other_cpus(ncpus);
@@ -615,7 +616,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 
                if (!locked_down) {
                        /* for breakpoint or single step, print curr insn */
-                       if (bp || TRAP(regs) == 0xd00)
+                       if (bp || TRAP(regs) == INTERRUPT_TRACE)
                                ppc_inst_dump(regs->nip, 1, 0);
                        printf("enter ? for help\n");
                }
@@ -684,7 +685,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
                disable_surveillance();
                if (!locked_down) {
                        /* for breakpoint or single step, print current insn */
-                       if (bp || TRAP(regs) == 0xd00)
+                       if (bp || TRAP(regs) == INTERRUPT_TRACE)
                                ppc_inst_dump(regs->nip, 1, 0);
                        printf("enter ? for help\n");
                }
@@ -1769,9 +1770,12 @@ static void excprint(struct pt_regs *fp)
        printf("    sp: %lx\n", fp->gpr[1]);
        printf("   msr: %lx\n", fp->msr);
 
-       if (trap == 0x300 || trap == 0x380 || trap == 0x600 || trap == 0x200) {
+       if (trap == INTERRUPT_DATA_STORAGE ||
+           trap == INTERRUPT_DATA_SEGMENT ||
+           trap == INTERRUPT_ALIGNMENT ||
+           trap == INTERRUPT_MACHINE_CHECK) {
                printf("   dar: %lx\n", fp->dar);
-               if (trap != 0x380)
+               if (trap != INTERRUPT_DATA_SEGMENT)
                        printf(" dsisr: %lx\n", fp->dsisr);
        }
 
@@ -1785,7 +1789,7 @@ static void excprint(struct pt_regs *fp)
                       current->pid, current->comm);
        }
 
-       if (trap == 0x700)
+       if (trap == INTERRUPT_PROGRAM)
                print_bug_trap(fp);
 
        printf(linux_banner);
@@ -1815,25 +1819,16 @@ static void prregs(struct pt_regs *fp)
        }
 
 #ifdef CONFIG_PPC64
-       if (FULL_REGS(fp)) {
-               for (n = 0; n < 16; ++n)
-                       printf("R%.2d = "REG"   R%.2d = "REG"\n",
-                              n, fp->gpr[n], n+16, fp->gpr[n+16]);
-       } else {
-               for (n = 0; n < 7; ++n)
-                       printf("R%.2d = "REG"   R%.2d = "REG"\n",
-                              n, fp->gpr[n], n+7, fp->gpr[n+7]);
-       }
+#define R_PER_LINE 2
 #else
+#define R_PER_LINE 4
+#endif
+
        for (n = 0; n < 32; ++n) {
-               printf("R%.2d = %.8lx%s", n, fp->gpr[n],
-                      (n & 3) == 3? "\n": "   ");
-               if (n == 12 && !FULL_REGS(fp)) {
-                       printf("\n");
-                       break;
-               }
+               printf("R%.2d = "REG"%s", n, fp->gpr[n],
+                       (n % R_PER_LINE) == R_PER_LINE - 1 ? "\n" : "   ");
        }
-#endif
+
        printf("pc  = ");
        xmon_print_symbol(fp->nip, " ", "\n");
        if (!trap_is_syscall(fp) && cpu_has_feature(CPU_FTR_CFAR)) {
@@ -1846,7 +1841,9 @@ static void prregs(struct pt_regs *fp)
        printf("ctr = "REG"   xer = "REG"   trap = %4lx\n",
               fp->ctr, fp->xer, fp->trap);
        trap = TRAP(fp);
-       if (trap == 0x300 || trap == 0x380 || trap == 0x600)
+       if (trap == INTERRUPT_DATA_STORAGE ||
+           trap == INTERRUPT_DATA_SEGMENT ||
+           trap == INTERRUPT_ALIGNMENT)
                printf("dar = "REG"   dsisr = %.8lx\n", fp->dar, fp->dsisr);
 }
 
@@ -2727,30 +2724,6 @@ static void dump_all_xives(void)
                dump_one_xive(cpu);
 }
 
-static void dump_one_xive_irq(u32 num, struct irq_data *d)
-{
-       xmon_xive_get_irq_config(num, d);
-}
-
-static void dump_all_xive_irq(void)
-{
-       unsigned int i;
-       struct irq_desc *desc;
-
-       for_each_irq_desc(i, desc) {
-               struct irq_data *d = irq_desc_get_irq_data(desc);
-               unsigned int hwirq;
-
-               if (!d)
-                       continue;
-
-               hwirq = (unsigned int)irqd_to_hwirq(d);
-               /* IPIs are special (HW number 0) */
-               if (hwirq)
-                       dump_one_xive_irq(hwirq, d);
-       }
-}
-
 static void dump_xives(void)
 {
        unsigned long num;
@@ -2767,9 +2740,9 @@ static void dump_xives(void)
                return;
        } else if (c == 'i') {
                if (scanhex(&num))
-                       dump_one_xive_irq(num, NULL);
+                       xmon_xive_get_irq_config(num, NULL);
                else
-                       dump_all_xive_irq();
+                       xmon_xive_get_irq_all();
                return;
        }
 
@@ -2980,7 +2953,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr,
                if (!ppc_inst_prefixed(inst))
                        dump_func(ppc_inst_val(inst), adr);
                else
-                       dump_func(ppc_inst_as_u64(inst), adr);
+                       dump_func(ppc_inst_as_ulong(inst), adr);
                printf("\n");
        }
        return adr - first_adr;
@@ -4212,8 +4185,7 @@ static void dump_spu_fields(struct spu *spu)
        DUMP_FIELD(spu, "0x%p", pdata);
 }
 
-int
-spu_inst_dump(unsigned long adr, long count, int praddr)
+static int spu_inst_dump(unsigned long adr, long count, int praddr)
 {
        return generic_inst_dump(adr, count, praddr, print_insn_spu);
 }
index ed89ef7..383c53c 100644 (file)
@@ -68,7 +68,8 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts)
 }
 
 #ifdef CONFIG_TIME_NS
-static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
 {
        return _timens_data;
 }
index df01d73..1936f21 100644 (file)
@@ -58,7 +58,8 @@ extern struct ms_hyperv_tsc_page hvclock_page
 #endif
 
 #ifdef CONFIG_TIME_NS
-static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
 {
        return __timens_vdso_data;
 }
index 3eec59f..543a05e 100644 (file)
@@ -776,7 +776,7 @@ config I2C_MT7621
 
 config I2C_MV64XXX
        tristate "Marvell mv64xxx I2C Controller"
-       depends on MV64X60 || PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU || COMPILE_TEST
+       depends on PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU || COMPILE_TEST
        help
          If you say yes to this option, support will be included for the
          built-in I2C interface on the Marvell 64xxx line of host bridges.
index 73e6ae8..4bdd4c4 100644 (file)
@@ -180,14 +180,13 @@ static struct proc_dir_entry *proc_pmu_options;
 static int option_server_mode;
 
 int pmu_battery_count;
-int pmu_cur_battery;
+static int pmu_cur_battery;
 unsigned int pmu_power_flags = PMU_PWR_AC_PRESENT;
 struct pmu_battery_info pmu_batteries[PMU_MAX_BATTERIES];
 static int query_batt_timer = BATTERY_POLLING_COUNT;
 static struct adb_request batt_req;
 static struct proc_dir_entry *proc_pmu_batt[PMU_MAX_BATTERIES];
 
-int __fake_sleep;
 int asleep;
 
 #ifdef CONFIG_ADB
@@ -1833,6 +1832,7 @@ pmu_present(void)
  */
  
 static u32 save_via[8];
+static int __fake_sleep;
 
 static void
 save_via_state(void)
index 7761230..07f91ec 100644 (file)
@@ -56,7 +56,7 @@ static BLOCKING_NOTIFIER_HEAD(wf_client_list);
 static int wf_client_count;
 static unsigned int wf_overtemp;
 static unsigned int wf_overtemp_counter;
-struct task_struct *wf_thread;
+static struct task_struct *wf_thread;
 
 static struct platform_device wf_platform_device = {
        .name   = "windfarm",
index ab467b9..ba1ec6f 100644 (file)
@@ -433,7 +433,7 @@ struct pm121_sys_state {
        struct wf_pid_state     pid;
 };
 
-struct pm121_sys_state *pm121_sys_state[N_LOOPS] = {};
+static struct pm121_sys_state *pm121_sys_state[N_LOOPS] = {};
 
 /*
  * ****** CPU Fans Control Loop ******
index 79cb1ad..7596605 100644 (file)
@@ -94,7 +94,7 @@ static int smu_set_fan(int pwm, u8 id, u16 value)
                return rc;
        wait_for_completion(&comp);
 
-       /* Handle fallback (see coment above) */
+       /* Handle fallback (see comment above) */
        if (cmd.status != 0 && smu_supports_new_fans_ops) {
                printk(KERN_WARNING "windfarm: SMU failed new fan command "
                       "falling back to old method\n");
index f0d2dd3..acac0b5 100644 (file)
@@ -467,6 +467,34 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
                unsafe_put_user(__s->sig[0], &__c->sig[0], label);      \
        }                                                               \
 } while (0)
+
+#define unsafe_get_compat_sigset(set, compat, label) do {              \
+       const compat_sigset_t __user *__c = compat;                     \
+       compat_sigset_word hi, lo;                                      \
+       sigset_t *__s = set;                                            \
+                                                                       \
+       switch (_NSIG_WORDS) {                                          \
+       case 4:                                                         \
+               unsafe_get_user(lo, &__c->sig[7], label);               \
+               unsafe_get_user(hi, &__c->sig[6], label);               \
+               __s->sig[3] = hi | (((long)lo) << 32);                  \
+               fallthrough;                                            \
+       case 3:                                                         \
+               unsafe_get_user(lo, &__c->sig[5], label);               \
+               unsafe_get_user(hi, &__c->sig[4], label);               \
+               __s->sig[2] = hi | (((long)lo) << 32);                  \
+               fallthrough;                                            \
+       case 2:                                                         \
+               unsafe_get_user(lo, &__c->sig[3], label);               \
+               unsafe_get_user(hi, &__c->sig[2], label);               \
+               __s->sig[1] = hi | (((long)lo) << 32);                  \
+               fallthrough;                                            \
+       case 1:                                                         \
+               unsafe_get_user(lo, &__c->sig[1], label);               \
+               unsafe_get_user(hi, &__c->sig[0], label);               \
+               __s->sig[0] = hi | (((long)lo) << 32);                  \
+       }                                                               \
+} while (0)
 #else
 #define unsafe_put_compat_sigset(compat, set, label) do {              \
        compat_sigset_t __user *__c = compat;                           \
@@ -474,6 +502,13 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
                                                                        \
        unsafe_copy_to_user(__c, __s, sizeof(*__c), label);             \
 } while (0)
+
+#define unsafe_get_compat_sigset(set, compat, label) do {              \
+       const compat_sigset_t __user *__c = compat;                     \
+       sigset_t *__s = set;                                            \
+                                                                       \
+       unsafe_copy_from_user(__s, __c, sizeof(*__c), label);           \
+} while (0)
 #endif
 
 extern int compat_ptrace_request(struct task_struct *child,
index c7c6e8b..c05e903 100644 (file)
@@ -397,6 +397,7 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count);
 #define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e)
 #define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e)
 #define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e)
+#define unsafe_copy_from_user(d,s,l,e) unsafe_op_wrap(__copy_from_user(d,s,l),e)
 static inline unsigned long user_access_save(void) { return 0UL; }
 static inline void user_access_restore(unsigned long flags) { }
 #endif
index 2919f16..ce2f695 100644 (file)
@@ -46,16 +46,18 @@ static inline bool vdso_cycles_ok(u64 cycles)
 #endif
 
 #ifdef CONFIG_TIME_NS
-static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
-                         struct __kernel_timespec *ts)
+static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
+                                         struct __kernel_timespec *ts)
 {
-       const struct vdso_data *vd = __arch_get_timens_vdso_data();
+       const struct vdso_data *vd;
        const struct timens_offset *offs = &vdns->offset[clk];
        const struct vdso_timestamp *vdso_ts;
        u64 cycles, last, ns;
        u32 seq;
        s64 sec;
 
+       vd = vdns - (clk == CLOCK_MONOTONIC_RAW ? CS_RAW : CS_HRES_COARSE);
+       vd = __arch_get_timens_vdso_data(vd);
        if (clk != CLOCK_MONOTONIC_RAW)
                vd = &vd[CS_HRES_COARSE];
        else
@@ -92,13 +94,14 @@ static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
        return 0;
 }
 #else
-static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
 {
        return NULL;
 }
 
-static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
-                         struct __kernel_timespec *ts)
+static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
+                                         struct __kernel_timespec *ts)
 {
        return -EINVAL;
 }
@@ -159,10 +162,10 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
 }
 
 #ifdef CONFIG_TIME_NS
-static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
-                           struct __kernel_timespec *ts)
+static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
+                                           struct __kernel_timespec *ts)
 {
-       const struct vdso_data *vd = __arch_get_timens_vdso_data();
+       const struct vdso_data *vd = __arch_get_timens_vdso_data(vdns);
        const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
        const struct timens_offset *offs = &vdns->offset[clk];
        u64 nsec;
@@ -188,8 +191,8 @@ static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
        return 0;
 }
 #else
-static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
-                           struct __kernel_timespec *ts)
+static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
+                                           struct __kernel_timespec *ts)
 {
        return -1;
 }
@@ -310,7 +313,7 @@ __cvdso_gettimeofday_data(const struct vdso_data *vd,
        if (unlikely(tz != NULL)) {
                if (IS_ENABLED(CONFIG_TIME_NS) &&
                    vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
-                       vd = __arch_get_timens_vdso_data();
+                       vd = __arch_get_timens_vdso_data(vd);
 
                tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
                tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime;
@@ -333,7 +336,7 @@ __cvdso_time_data(const struct vdso_data *vd, __kernel_old_time_t *time)
 
        if (IS_ENABLED(CONFIG_TIME_NS) &&
            vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
-               vd = __arch_get_timens_vdso_data();
+               vd = __arch_get_timens_vdso_data(vd);
 
        t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);
 
@@ -363,7 +366,7 @@ int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock,
 
        if (IS_ENABLED(CONFIG_TIME_NS) &&
            vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
-               vd = __arch_get_timens_vdso_data();
+               vd = __arch_get_timens_vdso_data(vd);
 
        /*
         * Convert the clockid to a bitmask and use it to check which
index c25cf7c..33ee34f 100644 (file)
  *
  * We create two sets of source and destination buffers, one in regular memory,
  * the other cache-inhibited (by default we use /dev/fb0 for this, but an
- * alterative path for cache-inhibited memory may be provided).
- *
- * One way to get cache-inhibited memory is to use the "mem" kernel parameter
- * to limit the kernel to less memory than actually exists.  Addresses above
- * the limit may still be accessed but will be treated as cache-inhibited. For
- * example, if there is actually 4GB of memory and the parameter "mem=3GB" is
- * used, memory from address 0xC0000000 onwards is treated as cache-inhibited.
- * To access this region /dev/mem is used. The kernel should be configured
- * without CONFIG_STRICT_DEVMEM. In this case use:
- *         ./alignment_handler /dev/mem 0xc0000000
+ * alterative path for cache-inhibited memory may be provided, e.g. memtrace).
  *
  * We initialise the source buffers, then use whichever set of load/store
  * instructions is under test to copy bytes from the source buffers to the
index defe488..40253ab 100644 (file)
@@ -5,6 +5,7 @@ noarg:
 TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
                  large_vm_fork_separation bad_accesses pkey_exec_prot \
                  pkey_siginfo stack_expansion_signal stack_expansion_ldst
+TEST_PROGS := stress_code_patching.sh
 
 TEST_GEN_PROGS_EXTENDED := tlbie_test
 TEST_GEN_FILES := tempfile
diff --git a/tools/testing/selftests/powerpc/mm/stress_code_patching.sh b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh
new file mode 100755 (executable)
index 0000000..e454509
--- /dev/null
@@ -0,0 +1,49 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+TIMEOUT=30
+
+DEBUFS_DIR=`cat /proc/mounts | grep debugfs | awk '{print $2}'`
+if [ ! -e "$DEBUFS_DIR" ]
+then
+       echo "debugfs not found, skipping" 1>&2
+       exit 4
+fi
+
+if [ ! -e "$DEBUFS_DIR/tracing/current_tracer" ]
+then
+       echo "Tracing files not found, skipping" 1>&2
+       exit 4
+fi
+
+
+echo "Testing for spurious faults when mapping kernel memory..."
+
+if grep -q "FUNCTION TRACING IS CORRUPTED" "$DEBUFS_DIR/tracing/trace"
+then
+       echo "FAILED: Ftrace already dead. Probably due to a spurious fault" 1>&2
+       exit 1
+fi
+
+dmesg -C
+START_TIME=`date +%s`
+END_TIME=`expr $START_TIME + $TIMEOUT`
+while [ `date +%s` -lt $END_TIME ]
+do
+       echo function > $DEBUFS_DIR/tracing/current_tracer
+       echo nop > $DEBUFS_DIR/tracing/current_tracer
+       if dmesg | grep -q 'ftrace bug'
+       then
+               break
+       fi
+done
+
+echo nop > $DEBUFS_DIR/tracing/current_tracer
+if dmesg | grep -q 'ftrace bug'
+then
+       echo "FAILED: Mapping kernel memory causes spurious faults" 1>&2
+       exit 1
+else
+       echo "OK: Mapping kernel memory does not cause spurious faults"
+       exit 0
+fi
index 02dffb6..b099753 100644 (file)
@@ -324,7 +324,7 @@ int compress_file(int argc, char **argv, void *handle)
                                fprintf(stderr, "error: cannot progress; ");
                                fprintf(stderr, "too many faults\n");
                                exit(-1);
-                       };
+                       }
                }
 
                fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
index 8d3f006..a500639 100644 (file)
@@ -2,7 +2,7 @@
 TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
               ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
               ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
-              perf-hwbreak ptrace-syscall
+              perf-hwbreak ptrace-syscall ptrace-perf-hwbreak
 
 top_srcdir = ../../../../..
 include ../../lib.mk
index c1f324a..ecde2c1 100644 (file)
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <signal.h>
 #include <string.h>
 #include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/ptrace.h>
+#include <sys/sysinfo.h>
+#include <asm/ptrace.h>
 #include <elf.h>
 #include <pthread.h>
 #include <sys/syscall.h>
 #include <linux/hw_breakpoint.h>
 #include "utils.h"
 
+#ifndef PPC_DEBUG_FEATURE_DATA_BP_ARCH_31
+#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31      0x20
+#endif
+
 #define MAX_LOOPS 10000
 
 #define DAWR_LENGTH_MAX ((0x3f + 1) * 8)
 
-static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
-                                     int cpu, int group_fd,
-                                     unsigned long flags)
+int nprocs;
+
+static volatile int a = 10;
+static volatile int b = 10;
+static volatile char c[512 + 8] __attribute__((aligned(512)));
+
+static void perf_event_attr_set(struct perf_event_attr *attr,
+                               __u32 type, __u64 addr, __u64 len,
+                               bool exclude_user)
 {
-       attr->size = sizeof(*attr);
-       return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+       memset(attr, 0, sizeof(struct perf_event_attr));
+       attr->type           = PERF_TYPE_BREAKPOINT;
+       attr->size           = sizeof(struct perf_event_attr);
+       attr->bp_type        = type;
+       attr->bp_addr        = addr;
+       attr->bp_len         = len;
+       attr->exclude_kernel = 1;
+       attr->exclude_hv     = 1;
+       attr->exclude_guest  = 1;
+       attr->exclude_user   = exclude_user;
+       attr->disabled       = 1;
 }
 
-static inline bool breakpoint_test(int len)
+static int
+perf_process_event_open_exclude_user(__u32 type, __u64 addr, __u64 len, bool exclude_user)
+{
+       struct perf_event_attr attr;
+
+       perf_event_attr_set(&attr, type, addr, len, exclude_user);
+       return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+}
+
+static int perf_process_event_open(__u32 type, __u64 addr, __u64 len)
+{
+       struct perf_event_attr attr;
+
+       perf_event_attr_set(&attr, type, addr, len, 0);
+       return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+}
+
+static int perf_cpu_event_open(long cpu, __u32 type, __u64 addr, __u64 len)
 {
        struct perf_event_attr attr;
+
+       perf_event_attr_set(&attr, type, addr, len, 0);
+       return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
+}
+
+static void close_fds(int *fd, int n)
+{
+       int i;
+
+       for (i = 0; i < n; i++)
+               close(fd[i]);
+}
+
+static unsigned long read_fds(int *fd, int n)
+{
+       int i;
+       unsigned long c = 0;
+       unsigned long count = 0;
+       size_t res;
+
+       for (i = 0; i < n; i++) {
+               res = read(fd[i], &c, sizeof(c));
+               assert(res == sizeof(unsigned long long));
+               count += c;
+       }
+       return count;
+}
+
+static void reset_fds(int *fd, int n)
+{
+       int i;
+
+       for (i = 0; i < n; i++)
+               ioctl(fd[i], PERF_EVENT_IOC_RESET);
+}
+
+static void enable_fds(int *fd, int n)
+{
+       int i;
+
+       for (i = 0; i < n; i++)
+               ioctl(fd[i], PERF_EVENT_IOC_ENABLE);
+}
+
+static void disable_fds(int *fd, int n)
+{
+       int i;
+
+       for (i = 0; i < n; i++)
+               ioctl(fd[i], PERF_EVENT_IOC_DISABLE);
+}
+
+static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len)
+{
+       int i = 0;
+
+       /* Assume online processors are 0 to nprocs for simplisity */
+       for (i = 0; i < nprocs; i++) {
+               fd[i] = perf_cpu_event_open(i, type, addr, len);
+               if (fd[i] < 0) {
+                       close_fds(fd, i);
+                       return fd[i];
+               }
+       }
+       return 0;
+}
+
+static inline bool breakpoint_test(int len)
+{
        int fd;
 
-       /* setup counters */
-       memset(&attr, 0, sizeof(attr));
-       attr.disabled = 1;
-       attr.type = PERF_TYPE_BREAKPOINT;
-       attr.bp_type = HW_BREAKPOINT_R;
        /* bp_addr can point anywhere but needs to be aligned */
-       attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800;
-       attr.bp_len = len;
-       fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+       fd = perf_process_event_open(HW_BREAKPOINT_R, (__u64)(&fd) & 0xfffffffffffff800, len);
        if (fd < 0)
                return false;
        close(fd);
@@ -75,7 +178,6 @@ static inline bool dawr_supported(void)
 static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
 {
        int i,j;
-       struct perf_event_attr attr;
        size_t res;
        unsigned long long breaks, needed;
        int readint;
@@ -85,6 +187,7 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
        int break_fd;
        int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */
        volatile int *k;
+       __u64 len;
 
        /* align to 0x400 boundary as required by DAWR */
        readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) &
@@ -94,19 +197,11 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
        if (arraytest)
                ptr = &readintalign[0];
 
-       /* setup counters */
-       memset(&attr, 0, sizeof(attr));
-       attr.disabled = 1;
-       attr.type = PERF_TYPE_BREAKPOINT;
-       attr.bp_type = readwriteflag;
-       attr.bp_addr = (__u64)ptr;
-       attr.bp_len = sizeof(int);
-       if (arraytest)
-               attr.bp_len = DAWR_LENGTH_MAX;
-       attr.exclude_user = exclude_user;
-       break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+       len = arraytest ? DAWR_LENGTH_MAX : sizeof(int);
+       break_fd = perf_process_event_open_exclude_user(readwriteflag, (__u64)ptr,
+                                                       len, exclude_user);
        if (break_fd < 0) {
-               perror("sys_perf_event_open");
+               perror("perf_process_event_open_exclude_user");
                exit(1);
        }
 
@@ -153,7 +248,6 @@ static int runtest_dar_outside(void)
        void *target;
        volatile __u16 temp16;
        volatile __u64 temp64;
-       struct perf_event_attr attr;
        int break_fd;
        unsigned long long breaks;
        int fail = 0;
@@ -165,21 +259,11 @@ static int runtest_dar_outside(void)
                exit(EXIT_FAILURE);
        }
 
-       /* setup counters */
-       memset(&attr, 0, sizeof(attr));
-       attr.disabled = 1;
-       attr.type = PERF_TYPE_BREAKPOINT;
-       attr.exclude_kernel = 1;
-       attr.exclude_hv = 1;
-       attr.exclude_guest = 1;
-       attr.bp_type = HW_BREAKPOINT_RW;
        /* watch middle half of target array */
-       attr.bp_addr = (__u64)(target + 2);
-       attr.bp_len = 4;
-       break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+       break_fd = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)(target + 2), 4);
        if (break_fd < 0) {
                free(target);
-               perror("sys_perf_event_open");
+               perror("perf_process_event_open");
                exit(EXIT_FAILURE);
        }
 
@@ -263,11 +347,467 @@ static int runtest_dar_outside(void)
        return fail;
 }
 
+static void multi_dawr_workload(void)
+{
+       a += 10;
+       b += 10;
+       c[512 + 1] += 'a';
+}
+
+static int test_process_multi_diff_addr(void)
+{
+       unsigned long long breaks1 = 0, breaks2 = 0;
+       int fd1, fd2;
+       char *desc = "Process specific, Two events, diff addr";
+       size_t res;
+
+       fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+       if (fd1 < 0) {
+               perror("perf_process_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b));
+       if (fd2 < 0) {
+               close(fd1);
+               perror("perf_process_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       ioctl(fd1, PERF_EVENT_IOC_RESET);
+       ioctl(fd2, PERF_EVENT_IOC_RESET);
+       ioctl(fd1, PERF_EVENT_IOC_ENABLE);
+       ioctl(fd2, PERF_EVENT_IOC_ENABLE);
+       multi_dawr_workload();
+       ioctl(fd1, PERF_EVENT_IOC_DISABLE);
+       ioctl(fd2, PERF_EVENT_IOC_DISABLE);
+
+       res = read(fd1, &breaks1, sizeof(breaks1));
+       assert(res == sizeof(unsigned long long));
+       res = read(fd2, &breaks2, sizeof(breaks2));
+       assert(res == sizeof(unsigned long long));
+
+       close(fd1);
+       close(fd2);
+
+       if (breaks1 != 2 || breaks2 != 2) {
+               printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2);
+               return 1;
+       }
+
+       printf("TESTED: %s\n", desc);
+       return 0;
+}
+
+static int test_process_multi_same_addr(void)
+{
+       unsigned long long breaks1 = 0, breaks2 = 0;
+       int fd1, fd2;
+       char *desc = "Process specific, Two events, same addr";
+       size_t res;
+
+       fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+       if (fd1 < 0) {
+               perror("perf_process_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+       if (fd2 < 0) {
+               close(fd1);
+               perror("perf_process_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       ioctl(fd1, PERF_EVENT_IOC_RESET);
+       ioctl(fd2, PERF_EVENT_IOC_RESET);
+       ioctl(fd1, PERF_EVENT_IOC_ENABLE);
+       ioctl(fd2, PERF_EVENT_IOC_ENABLE);
+       multi_dawr_workload();
+       ioctl(fd1, PERF_EVENT_IOC_DISABLE);
+       ioctl(fd2, PERF_EVENT_IOC_DISABLE);
+
+       res = read(fd1, &breaks1, sizeof(breaks1));
+       assert(res == sizeof(unsigned long long));
+       res = read(fd2, &breaks2, sizeof(breaks2));
+       assert(res == sizeof(unsigned long long));
+
+       close(fd1);
+       close(fd2);
+
+       if (breaks1 != 2 || breaks2 != 2) {
+               printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2);
+               return 1;
+       }
+
+       printf("TESTED: %s\n", desc);
+       return 0;
+}
+
+static int test_process_multi_diff_addr_ro_wo(void)
+{
+       unsigned long long breaks1 = 0, breaks2 = 0;
+       int fd1, fd2;
+       char *desc = "Process specific, Two events, diff addr, one is RO, other is WO";
+       size_t res;
+
+       fd1 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
+       if (fd1 < 0) {
+               perror("perf_process_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       fd2 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b));
+       if (fd2 < 0) {
+               close(fd1);
+               perror("perf_process_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       ioctl(fd1, PERF_EVENT_IOC_RESET);
+       ioctl(fd2, PERF_EVENT_IOC_RESET);
+       ioctl(fd1, PERF_EVENT_IOC_ENABLE);
+       ioctl(fd2, PERF_EVENT_IOC_ENABLE);
+       multi_dawr_workload();
+       ioctl(fd1, PERF_EVENT_IOC_DISABLE);
+       ioctl(fd2, PERF_EVENT_IOC_DISABLE);
+
+       res = read(fd1, &breaks1, sizeof(breaks1));
+       assert(res == sizeof(unsigned long long));
+       res = read(fd2, &breaks2, sizeof(breaks2));
+       assert(res == sizeof(unsigned long long));
+
+       close(fd1);
+       close(fd2);
+
+       if (breaks1 != 1 || breaks2 != 1) {
+               printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2);
+               return 1;
+       }
+
+       printf("TESTED: %s\n", desc);
+       return 0;
+}
+
+static int test_process_multi_same_addr_ro_wo(void)
+{
+       unsigned long long breaks1 = 0, breaks2 = 0;
+       int fd1, fd2;
+       char *desc = "Process specific, Two events, same addr, one is RO, other is WO";
+       size_t res;
+
+       fd1 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a));
+       if (fd1 < 0) {
+               perror("perf_process_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       fd2 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
+       if (fd2 < 0) {
+               close(fd1);
+               perror("perf_process_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       ioctl(fd1, PERF_EVENT_IOC_RESET);
+       ioctl(fd2, PERF_EVENT_IOC_RESET);
+       ioctl(fd1, PERF_EVENT_IOC_ENABLE);
+       ioctl(fd2, PERF_EVENT_IOC_ENABLE);
+       multi_dawr_workload();
+       ioctl(fd1, PERF_EVENT_IOC_DISABLE);
+       ioctl(fd2, PERF_EVENT_IOC_DISABLE);
+
+       res = read(fd1, &breaks1, sizeof(breaks1));
+       assert(res == sizeof(unsigned long long));
+       res = read(fd2, &breaks2, sizeof(breaks2));
+       assert(res == sizeof(unsigned long long));
+
+       close(fd1);
+       close(fd2);
+
+       if (breaks1 != 1 || breaks2 != 1) {
+               printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2);
+               return 1;
+       }
+
+       printf("TESTED: %s\n", desc);
+       return 0;
+}
+
+static int test_syswide_multi_diff_addr(void)
+{
+       unsigned long long breaks1 = 0, breaks2 = 0;
+       int *fd1 = malloc(nprocs * sizeof(int));
+       int *fd2 = malloc(nprocs * sizeof(int));
+       char *desc = "Systemwide, Two events, diff addr";
+       int ret;
+
+       ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+       if (ret) {
+               perror("perf_systemwide_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b));
+       if (ret) {
+               close_fds(fd1, nprocs);
+               perror("perf_systemwide_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       reset_fds(fd1, nprocs);
+       reset_fds(fd2, nprocs);
+       enable_fds(fd1, nprocs);
+       enable_fds(fd2, nprocs);
+       multi_dawr_workload();
+       disable_fds(fd1, nprocs);
+       disable_fds(fd2, nprocs);
+
+       breaks1 = read_fds(fd1, nprocs);
+       breaks2 = read_fds(fd2, nprocs);
+
+       close_fds(fd1, nprocs);
+       close_fds(fd2, nprocs);
+
+       free(fd1);
+       free(fd2);
+
+       if (breaks1 != 2 || breaks2 != 2) {
+               printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2);
+               return 1;
+       }
+
+       printf("TESTED: %s\n", desc);
+       return 0;
+}
+
+static int test_syswide_multi_same_addr(void)
+{
+       unsigned long long breaks1 = 0, breaks2 = 0;
+       int *fd1 = malloc(nprocs * sizeof(int));
+       int *fd2 = malloc(nprocs * sizeof(int));
+       char *desc = "Systemwide, Two events, same addr";
+       int ret;
+
+       ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+       if (ret) {
+               perror("perf_systemwide_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+       if (ret) {
+               close_fds(fd1, nprocs);
+               perror("perf_systemwide_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       reset_fds(fd1, nprocs);
+       reset_fds(fd2, nprocs);
+       enable_fds(fd1, nprocs);
+       enable_fds(fd2, nprocs);
+       multi_dawr_workload();
+       disable_fds(fd1, nprocs);
+       disable_fds(fd2, nprocs);
+
+       breaks1 = read_fds(fd1, nprocs);
+       breaks2 = read_fds(fd2, nprocs);
+
+       close_fds(fd1, nprocs);
+       close_fds(fd2, nprocs);
+
+       free(fd1);
+       free(fd2);
+
+       if (breaks1 != 2 || breaks2 != 2) {
+               printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2);
+               return 1;
+       }
+
+       printf("TESTED: %s\n", desc);
+       return 0;
+}
+
+static int test_syswide_multi_diff_addr_ro_wo(void)
+{
+       unsigned long long breaks1 = 0, breaks2 = 0;
+       int *fd1 = malloc(nprocs * sizeof(int));
+       int *fd2 = malloc(nprocs * sizeof(int));
+       char *desc = "Systemwide, Two events, diff addr, one is RO, other is WO";
+       int ret;
+
+       ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
+       if (ret) {
+               perror("perf_systemwide_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b));
+       if (ret) {
+               close_fds(fd1, nprocs);
+               perror("perf_systemwide_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       reset_fds(fd1, nprocs);
+       reset_fds(fd2, nprocs);
+       enable_fds(fd1, nprocs);
+       enable_fds(fd2, nprocs);
+       multi_dawr_workload();
+       disable_fds(fd1, nprocs);
+       disable_fds(fd2, nprocs);
+
+       breaks1 = read_fds(fd1, nprocs);
+       breaks2 = read_fds(fd2, nprocs);
+
+       close_fds(fd1, nprocs);
+       close_fds(fd2, nprocs);
+
+       free(fd1);
+       free(fd2);
+
+       if (breaks1 != 1 || breaks2 != 1) {
+               printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2);
+               return 1;
+       }
+
+       printf("TESTED: %s\n", desc);
+       return 0;
+}
+
+static int test_syswide_multi_same_addr_ro_wo(void)
+{
+       unsigned long long breaks1 = 0, breaks2 = 0;
+       int *fd1 = malloc(nprocs * sizeof(int));
+       int *fd2 = malloc(nprocs * sizeof(int));
+       char *desc = "Systemwide, Two events, same addr, one is RO, other is WO";
+       int ret;
+
+       ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
+       if (ret) {
+               perror("perf_systemwide_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a));
+       if (ret) {
+               close_fds(fd1, nprocs);
+               perror("perf_systemwide_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       reset_fds(fd1, nprocs);
+       reset_fds(fd2, nprocs);
+       enable_fds(fd1, nprocs);
+       enable_fds(fd2, nprocs);
+       multi_dawr_workload();
+       disable_fds(fd1, nprocs);
+       disable_fds(fd2, nprocs);
+
+       breaks1 = read_fds(fd1, nprocs);
+       breaks2 = read_fds(fd2, nprocs);
+
+       close_fds(fd1, nprocs);
+       close_fds(fd2, nprocs);
+
+       free(fd1);
+       free(fd2);
+
+       if (breaks1 != 1 || breaks2 != 1) {
+               printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2);
+               return 1;
+       }
+
+       printf("TESTED: %s\n", desc);
+       return 0;
+}
+
+static int runtest_multi_dawr(void)
+{
+       int ret = 0;
+
+       ret |= test_process_multi_diff_addr();
+       ret |= test_process_multi_same_addr();
+       ret |= test_process_multi_diff_addr_ro_wo();
+       ret |= test_process_multi_same_addr_ro_wo();
+       ret |= test_syswide_multi_diff_addr();
+       ret |= test_syswide_multi_same_addr();
+       ret |= test_syswide_multi_diff_addr_ro_wo();
+       ret |= test_syswide_multi_same_addr_ro_wo();
+
+       return ret;
+}
+
+static int runtest_unaligned_512bytes(void)
+{
+       unsigned long long breaks = 0;
+       int fd;
+       char *desc = "Process specific, 512 bytes, unaligned";
+       __u64 addr = (__u64)&c + 8;
+       size_t res;
+
+       fd = perf_process_event_open(HW_BREAKPOINT_RW, addr, 512);
+       if (fd < 0) {
+               perror("perf_process_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       ioctl(fd, PERF_EVENT_IOC_RESET);
+       ioctl(fd, PERF_EVENT_IOC_ENABLE);
+       multi_dawr_workload();
+       ioctl(fd, PERF_EVENT_IOC_DISABLE);
+
+       res = read(fd, &breaks, sizeof(breaks));
+       assert(res == sizeof(unsigned long long));
+
+       close(fd);
+
+       if (breaks != 2) {
+               printf("FAILED: %s: %lld != 2\n", desc, breaks);
+               return 1;
+       }
+
+       printf("TESTED: %s\n", desc);
+       return 0;
+}
+
+/* There is no perf api to find number of available watchpoints. Use ptrace. */
+static int get_nr_wps(bool *arch_31)
+{
+       struct ppc_debug_info dbginfo;
+       int child_pid;
+
+       child_pid = fork();
+       if (!child_pid) {
+               int ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+               if (ret) {
+                       perror("PTRACE_TRACEME failed\n");
+                       exit(EXIT_FAILURE);
+               }
+               kill(getpid(), SIGUSR1);
+
+               sleep(1);
+               exit(EXIT_SUCCESS);
+       }
+
+       wait(NULL);
+       if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo)) {
+               perror("Can't get breakpoint info");
+               exit(EXIT_FAILURE);
+       }
+
+       *arch_31 = !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_ARCH_31);
+       return dbginfo.num_data_bps;
+}
+
 static int runtest(void)
 {
        int rwflag;
        int exclude_user;
        int ret;
+       bool dawr = dawr_supported();
+       bool arch_31 = false;
+       int nr_wps = get_nr_wps(&arch_31);
 
        /*
         * perf defines rwflag as two bits read and write and at least
@@ -280,7 +820,7 @@ static int runtest(void)
                                return ret;
 
                        /* if we have the dawr, we can do an array test */
-                       if (!dawr_supported())
+                       if (!dawr)
                                continue;
                        ret = runtestsingle(rwflag, exclude_user, 1);
                        if (ret)
@@ -289,6 +829,19 @@ static int runtest(void)
        }
 
        ret = runtest_dar_outside();
+       if (ret)
+               return ret;
+
+       if (dawr && nr_wps > 1) {
+               nprocs = get_nprocs();
+               ret = runtest_multi_dawr();
+               if (ret)
+                       return ret;
+       }
+
+       if (dawr && arch_31)
+               ret = runtest_unaligned_512bytes();
+
        return ret;
 }
 
index 2e0d86e..a0635a3 100644 (file)
@@ -194,6 +194,18 @@ static void test_workload(void)
                big_var[rand() % DAWR_MAX_LEN] = 'a';
        else
                cvar = big_var[rand() % DAWR_MAX_LEN];
+
+       /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */
+       gstruct.a[rand() % A_LEN] = 'a';
+
+       /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */
+       cvar = gstruct.b[rand() % B_LEN];
+
+       /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */
+       gstruct.a[rand() % A_LEN] = 'a';
+
+       /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */
+       cvar = gstruct.a[rand() % A_LEN];
 }
 
 static void check_success(pid_t child_pid, const char *name, const char *type,
@@ -417,6 +429,69 @@ static void test_sethwdebug_range_aligned(pid_t child_pid)
        ptrace_delhwdebug(child_pid, wh);
 }
 
+static void test_multi_sethwdebug_range(pid_t child_pid)
+{
+       struct ppc_hw_breakpoint info1, info2;
+       unsigned long wp_addr1, wp_addr2;
+       char *name1 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED";
+       char *name2 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED";
+       int len1, len2;
+       int wh1, wh2;
+
+       wp_addr1 = (unsigned long)&gstruct.a;
+       wp_addr2 = (unsigned long)&gstruct.b;
+       len1 = A_LEN;
+       len2 = B_LEN;
+       get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1);
+       get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2);
+
+       /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */
+       wh1 = ptrace_sethwdebug(child_pid, &info1);
+
+       /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */
+       wh2 = ptrace_sethwdebug(child_pid, &info2);
+
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name1, "WO", wp_addr1, len1);
+
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name2, "RO", wp_addr2, len2);
+
+       ptrace_delhwdebug(child_pid, wh1);
+       ptrace_delhwdebug(child_pid, wh2);
+}
+
+static void test_multi_sethwdebug_range_dawr_overlap(pid_t child_pid)
+{
+       struct ppc_hw_breakpoint info1, info2;
+       unsigned long wp_addr1, wp_addr2;
+       char *name = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap";
+       int len1, len2;
+       int wh1, wh2;
+
+       wp_addr1 = (unsigned long)&gstruct.a;
+       wp_addr2 = (unsigned long)&gstruct.a;
+       len1 = A_LEN;
+       len2 = A_LEN;
+       get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1);
+       get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2);
+
+       /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */
+       wh1 = ptrace_sethwdebug(child_pid, &info1);
+
+       /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */
+       wh2 = ptrace_sethwdebug(child_pid, &info2);
+
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "WO", wp_addr1, len1);
+
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "RO", wp_addr2, len2);
+
+       ptrace_delhwdebug(child_pid, wh1);
+       ptrace_delhwdebug(child_pid, wh2);
+}
+
 static void test_sethwdebug_range_unaligned(pid_t child_pid)
 {
        struct ppc_hw_breakpoint info;
@@ -504,6 +579,10 @@ run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr)
                        test_sethwdebug_range_unaligned(child_pid);
                        test_sethwdebug_range_unaligned_dar(child_pid);
                        test_sethwdebug_dawr_max_range(child_pid);
+                       if (dbginfo->num_data_bps > 1) {
+                               test_multi_sethwdebug_range(child_pid);
+                               test_multi_sethwdebug_range_dawr_overlap(child_pid);
+                       }
                }
        }
 }
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c
new file mode 100644 (file)
index 0000000..3344e74
--- /dev/null
@@ -0,0 +1,659 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <asm/unistd.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include "ptrace.h"
+
+char data[16];
+
+/* Overlapping address range */
+volatile __u64 *ptrace_data1 = (__u64 *)&data[0];
+volatile __u64 *perf_data1 = (__u64 *)&data[4];
+
+/* Non-overlapping address range */
+volatile __u64 *ptrace_data2 = (__u64 *)&data[0];
+volatile __u64 *perf_data2 = (__u64 *)&data[8];
+
+static unsigned long pid_max_addr(void)
+{
+       FILE *fp;
+       char *line, *c;
+       char addr[100];
+       size_t len = 0;
+
+       fp = fopen("/proc/kallsyms", "r");
+       if (!fp) {
+               printf("Failed to read /proc/kallsyms. Exiting..\n");
+               exit(EXIT_FAILURE);
+       }
+
+       while (getline(&line, &len, fp) != -1) {
+               if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") ||
+                   strstr(line, "pid_max_min"))
+                       continue;
+
+               strncpy(addr, line, len < 100 ? len : 100);
+               c = strchr(addr, ' ');
+               *c = '\0';
+               return strtoul(addr, &c, 16);
+       }
+       fclose(fp);
+       printf("Could not find pix_max. Exiting..\n");
+       exit(EXIT_FAILURE);
+       return -1;
+}
+
+static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len)
+{
+       memset(attr, 0, sizeof(struct perf_event_attr));
+       attr->type           = PERF_TYPE_BREAKPOINT;
+       attr->size           = sizeof(struct perf_event_attr);
+       attr->bp_type        = HW_BREAKPOINT_R;
+       attr->bp_addr        = addr;
+       attr->bp_len         = len;
+       attr->exclude_kernel = 1;
+       attr->exclude_hv     = 1;
+}
+
+static void perf_kernel_event_attr_set(struct perf_event_attr *attr)
+{
+       memset(attr, 0, sizeof(struct perf_event_attr));
+       attr->type           = PERF_TYPE_BREAKPOINT;
+       attr->size           = sizeof(struct perf_event_attr);
+       attr->bp_type        = HW_BREAKPOINT_R;
+       attr->bp_addr        = pid_max_addr();
+       attr->bp_len         = sizeof(unsigned long);
+       attr->exclude_user   = 1;
+       attr->exclude_hv     = 1;
+}
+
+static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len)
+{
+       struct perf_event_attr attr;
+
+       perf_user_event_attr_set(&attr, addr, len);
+       return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
+}
+
+static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len)
+{
+       struct perf_event_attr attr;
+
+       perf_user_event_attr_set(&attr, addr, len);
+       return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0);
+}
+
+static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len)
+{
+       struct perf_event_attr attr;
+
+       perf_user_event_attr_set(&attr, addr, len);
+       return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0);
+}
+
+static int perf_thread_kernel_event_open(pid_t child_pid)
+{
+       struct perf_event_attr attr;
+
+       perf_kernel_event_attr_set(&attr);
+       return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0);
+}
+
+static int perf_cpu_kernel_event_open(int cpu)
+{
+       struct perf_event_attr attr;
+
+       perf_kernel_event_attr_set(&attr);
+       return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
+}
+
+static int child(void)
+{
+       int ret;
+
+       ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+       if (ret) {
+               printf("Error: PTRACE_TRACEME failed\n");
+               return 0;
+       }
+       kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */
+
+       return 0;
+}
+
+static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
+                                    __u64 addr, int len)
+{
+       info->version = 1;
+       info->trigger_type = type;
+       info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+       info->addr = addr;
+       info->addr2 = addr + len;
+       info->condition_value = 0;
+       if (!len)
+               info->addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+       else
+               info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+}
+
+static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len)
+{
+       struct ppc_hw_breakpoint info;
+
+       ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
+       return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
+}
+
+static int test1(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int ret = 0;
+
+       /* Test:
+        * if (new per thread event by ptrace)
+        *      if (existing cpu event by perf)
+        *              if (addr range overlaps)
+        *                      fail;
+        */
+
+       perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1));
+       if (perf_fd < 0)
+               return -1;
+
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+       if (ptrace_fd > 0 || errno != ENOSPC)
+               ret = -1;
+
+       close(perf_fd);
+       return ret;
+}
+
+static int test2(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int ret = 0;
+
+       /* Test:
+        * if (new per thread event by ptrace)
+        *      if (existing cpu event by perf)
+        *              if (addr range does not overlaps)
+        *                      allow;
+        */
+
+       perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2));
+       if (perf_fd < 0)
+               return -1;
+
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
+       if (ptrace_fd < 0) {
+               ret = -1;
+               goto perf_close;
+       }
+       ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+
+perf_close:
+       close(perf_fd);
+       return ret;
+}
+
+static int test3(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int ret = 0;
+
+       /* Test:
+        * if (new per thread event by ptrace)
+        *      if (existing thread event by perf on the same thread)
+        *              if (addr range overlaps)
+        *                      fail;
+        */
+       perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1,
+                                        sizeof(*perf_data1));
+       if (perf_fd < 0)
+               return -1;
+
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+       if (ptrace_fd > 0 || errno != ENOSPC)
+               ret = -1;
+
+       close(perf_fd);
+       return ret;
+}
+
+static int test4(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int ret = 0;
+
+       /* Test:
+        * if (new per thread event by ptrace)
+        *      if (existing thread event by perf on the same thread)
+        *              if (addr range does not overlaps)
+        *                      fail;
+        */
+       perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2,
+                                        sizeof(*perf_data2));
+       if (perf_fd < 0)
+               return -1;
+
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
+       if (ptrace_fd < 0) {
+               ret = -1;
+               goto perf_close;
+       }
+       ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+
+perf_close:
+       close(perf_fd);
+       return ret;
+}
+
+static int test5(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int cpid;
+       int ret = 0;
+
+       /* Test:
+        * if (new per thread event by ptrace)
+        *      if (existing thread event by perf on the different thread)
+        *              allow;
+        */
+       cpid = fork();
+       if (!cpid) {
+               /* Temporary Child */
+               pause();
+               exit(EXIT_SUCCESS);
+       }
+
+       perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1));
+       if (perf_fd < 0) {
+               ret = -1;
+               goto kill_child;
+       }
+
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+       if (ptrace_fd < 0) {
+               ret = -1;
+               goto perf_close;
+       }
+
+       ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+perf_close:
+       close(perf_fd);
+kill_child:
+       kill(cpid, SIGINT);
+       return ret;
+}
+
+static int test6(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int ret = 0;
+
+       /* Test:
+        * if (new per thread kernel event by perf)
+        *      if (existing thread event by ptrace on the same thread)
+        *              allow;
+        * -- OR --
+        * if (new per cpu kernel event by perf)
+        *      if (existing thread event by ptrace)
+        *              allow;
+        */
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+       if (ptrace_fd < 0)
+               return -1;
+
+       perf_fd = perf_thread_kernel_event_open(child_pid);
+       if (perf_fd < 0) {
+               ret = -1;
+               goto ptrace_close;
+       }
+       close(perf_fd);
+
+       perf_fd = perf_cpu_kernel_event_open(0);
+       if (perf_fd < 0) {
+               ret = -1;
+               goto ptrace_close;
+       }
+       close(perf_fd);
+
+ptrace_close:
+       ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+       return ret;
+}
+
+static int test7(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int ret = 0;
+
+       /* Test:
+        * if (new per thread event by perf)
+        *      if (existing thread event by ptrace on the same thread)
+        *              if (addr range overlaps)
+        *                      fail;
+        */
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+       if (ptrace_fd < 0)
+               return -1;
+
+       perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1,
+                                        sizeof(*perf_data1));
+       if (perf_fd > 0 || errno != ENOSPC)
+               ret = -1;
+
+       ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+       return ret;
+}
+
+static int test8(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int ret = 0;
+
+       /* Test:
+        * if (new per thread event by perf)
+        *      if (existing thread event by ptrace on the same thread)
+        *              if (addr range does not overlaps)
+        *                      allow;
+        */
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
+       if (ptrace_fd < 0)
+               return -1;
+
+       perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2,
+                                        sizeof(*perf_data2));
+       if (perf_fd < 0) {
+               ret = -1;
+               goto ptrace_close;
+       }
+       close(perf_fd);
+
+ptrace_close:
+       ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+       return ret;
+}
+
+static int test9(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int cpid;
+       int ret = 0;
+
+       /* Test:
+        * if (new per thread event by perf)
+        *      if (existing thread event by ptrace on the other thread)
+        *              allow;
+        */
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+       if (ptrace_fd < 0)
+               return -1;
+
+       cpid = fork();
+       if (!cpid) {
+               /* Temporary Child */
+               pause();
+               exit(EXIT_SUCCESS);
+       }
+
+       perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1));
+       if (perf_fd < 0) {
+               ret = -1;
+               goto kill_child;
+       }
+       close(perf_fd);
+
+kill_child:
+       kill(cpid, SIGINT);
+       ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+       return ret;
+}
+
+static int test10(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int ret = 0;
+
+       /* Test:
+        * if (new per cpu event by perf)
+        *      if (existing thread event by ptrace on the same thread)
+        *              if (addr range overlaps)
+        *                      fail;
+        */
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+       if (ptrace_fd < 0)
+               return -1;
+
+       perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1));
+       if (perf_fd > 0 || errno != ENOSPC)
+               ret = -1;
+
+       ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+       return ret;
+}
+
+static int test11(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int ret = 0;
+
+       /* Test:
+        * if (new per cpu event by perf)
+        *      if (existing thread event by ptrace on the same thread)
+        *              if (addr range does not overlap)
+        *                      allow;
+        */
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
+       if (ptrace_fd < 0)
+               return -1;
+
+       perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2));
+       if (perf_fd < 0) {
+               ret = -1;
+               goto ptrace_close;
+       }
+       close(perf_fd);
+
+ptrace_close:
+       ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+       return ret;
+}
+
+static int test12(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int ret = 0;
+
+       /* Test:
+        * if (new per thread and per cpu event by perf)
+        *      if (existing thread event by ptrace on the same thread)
+        *              if (addr range overlaps)
+        *                      fail;
+        */
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+       if (ptrace_fd < 0)
+               return -1;
+
+       perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1));
+       if (perf_fd > 0 || errno != ENOSPC)
+               ret = -1;
+
+       ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+       return ret;
+}
+
+static int test13(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int ret = 0;
+
+       /* Test:
+        * if (new per thread and per cpu event by perf)
+        *      if (existing thread event by ptrace on the same thread)
+        *              if (addr range does not overlap)
+        *                      allow;
+        */
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
+       if (ptrace_fd < 0)
+               return -1;
+
+       perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2));
+       if (perf_fd < 0) {
+               ret = -1;
+               goto ptrace_close;
+       }
+       close(perf_fd);
+
+ptrace_close:
+       ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+       return ret;
+}
+
+static int test14(pid_t child_pid)
+{
+       int perf_fd;
+       int ptrace_fd;
+       int cpid;
+       int ret = 0;
+
+       /* Test:
+        * if (new per thread and per cpu event by perf)
+        *      if (existing thread event by ptrace on the other thread)
+        *              allow;
+        */
+       ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+       if (ptrace_fd < 0)
+               return -1;
+
+       cpid = fork();
+       if (!cpid) {
+               /* Temporary Child */
+               pause();
+               exit(EXIT_SUCCESS);
+       }
+
+       perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1,
+                                            sizeof(*perf_data1));
+       if (perf_fd < 0) {
+               ret = -1;
+               goto kill_child;
+       }
+       close(perf_fd);
+
+kill_child:
+       kill(cpid, SIGINT);
+       ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+       return ret;
+}
+
+static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg)
+{
+       int ret;
+
+       ret = fun(arg);
+       if (ret)
+               printf("%s: Error\n", msg);
+       else
+               printf("%s: Ok\n", msg);
+       return ret;
+}
+
+char *desc[14] = {
+       "perf cpu event -> ptrace thread event (Overlapping)",
+       "perf cpu event -> ptrace thread event (Non-overlapping)",
+       "perf thread event -> ptrace same thread event (Overlapping)",
+       "perf thread event -> ptrace same thread event (Non-overlapping)",
+       "perf thread event -> ptrace other thread event",
+       "ptrace thread event -> perf kernel event",
+       "ptrace thread event -> perf same thread event (Overlapping)",
+       "ptrace thread event -> perf same thread event (Non-overlapping)",
+       "ptrace thread event -> perf other thread event",
+       "ptrace thread event -> perf cpu event (Overlapping)",
+       "ptrace thread event -> perf cpu event (Non-overlapping)",
+       "ptrace thread event -> perf same thread & cpu event (Overlapping)",
+       "ptrace thread event -> perf same thread & cpu event (Non-overlapping)",
+       "ptrace thread event -> perf other thread & cpu event",
+};
+
+static int test(pid_t child_pid)
+{
+       int ret = TEST_PASS;
+
+       ret |= do_test(desc[0], test1, child_pid);
+       ret |= do_test(desc[1], test2, child_pid);
+       ret |= do_test(desc[2], test3, child_pid);
+       ret |= do_test(desc[3], test4, child_pid);
+       ret |= do_test(desc[4], test5, child_pid);
+       ret |= do_test(desc[5], test6, child_pid);
+       ret |= do_test(desc[6], test7, child_pid);
+       ret |= do_test(desc[7], test8, child_pid);
+       ret |= do_test(desc[8], test9, child_pid);
+       ret |= do_test(desc[9], test10, child_pid);
+       ret |= do_test(desc[10], test11, child_pid);
+       ret |= do_test(desc[11], test12, child_pid);
+       ret |= do_test(desc[12], test13, child_pid);
+       ret |= do_test(desc[13], test14, child_pid);
+
+       return ret;
+}
+
+static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
+{
+       if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) {
+               perror("Can't get breakpoint info");
+               exit(-1);
+       }
+}
+
+static int ptrace_perf_hwbreak(void)
+{
+       int ret;
+       pid_t child_pid;
+       struct ppc_debug_info dbginfo;
+
+       child_pid = fork();
+       if (!child_pid)
+               return child();
+
+       /* parent */
+       wait(NULL); /* <-- child (SIGUSR1) */
+
+       get_dbginfo(child_pid, &dbginfo);
+       SKIP_IF(dbginfo.num_data_bps <= 1);
+
+       ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1));
+       SKIP_IF(ret < 0);
+       close(ret);
+
+       ret = test(child_pid);
+
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       return ret;
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak");
+}
index f25e854..844d18c 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0+
 
-TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2
+TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2
 top_srcdir = ../../../../..
 
 CFLAGS += -I../../../../../usr/include
@@ -13,3 +13,4 @@ $(OUTPUT)/spectre_v2: CFLAGS += -m64
 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S
 $(OUTPUT)/rfi_flush: flush_utils.c
 $(OUTPUT)/entry_flush: flush_utils.c
+$(OUTPUT)/uaccess_flush: flush_utils.c
index 78cf914..68ce377 100644 (file)
@@ -53,7 +53,7 @@ int entry_flush_test(void)
 
        entry_flush = entry_flush_orig;
 
-       fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+       fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1);
        FAIL_IF(fd < 0);
 
        p = (char *)memalign(zero_size, CACHELINE_SIZE);
index 0c3c4c4..4d95965 100644 (file)
@@ -13,6 +13,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
+#include <sys/utsname.h>
 #include "utils.h"
 #include "flush_utils.h"
 
@@ -35,6 +36,18 @@ void syscall_loop(char *p, unsigned long iterations,
        }
 }
 
+void syscall_loop_uaccess(char *p, unsigned long iterations,
+                         unsigned long zero_size)
+{
+       struct utsname utsname;
+
+       for (unsigned long i = 0; i < iterations; i++) {
+               for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+                       load(p + j);
+               uname(&utsname);
+       }
+}
+
 static void sigill_handler(int signr, siginfo_t *info, void *unused)
 {
        static int warned;
index 07a5eb3..e1e6828 100644 (file)
@@ -9,9 +9,16 @@
 
 #define CACHELINE_SIZE 128
 
+#define PERF_L1D_READ_MISS_CONFIG      ((PERF_COUNT_HW_CACHE_L1D) |            \
+                                       (PERF_COUNT_HW_CACHE_OP_READ << 8) |    \
+                                       (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))
+
 void syscall_loop(char *p, unsigned long iterations,
                  unsigned long zero_size);
 
+void syscall_loop_uaccess(char *p, unsigned long iterations,
+                         unsigned long zero_size);
+
 void set_dscr(unsigned long val);
 
 #endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */
index 7565fd7..f73484a 100644 (file)
@@ -54,7 +54,7 @@ int rfi_flush_test(void)
 
        rfi_flush = rfi_flush_orig;
 
-       fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+       fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1);
        FAIL_IF(fd < 0);
 
        p = (char *)memalign(zero_size, CACHELINE_SIZE);
diff --git a/tools/testing/selftests/powerpc/security/uaccess_flush.c b/tools/testing/selftests/powerpc/security/uaccess_flush.c
new file mode 100644 (file)
index 0000000..cf80f96
--- /dev/null
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ * Copyright 2020 Canonical Ltd.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+#include "flush_utils.h"
+
+int uaccess_flush_test(void)
+{
+       char *p;
+       int repetitions = 10;
+       int fd, passes = 0, iter, rc = 0;
+       struct perf_event_read v;
+       __u64 l1d_misses_total = 0;
+       unsigned long iterations = 100000, zero_size = 24 * 1024;
+       unsigned long l1d_misses_expected;
+       int rfi_flush_orig;
+       int entry_flush_orig;
+       int uaccess_flush, uaccess_flush_orig;
+
+       SKIP_IF(geteuid() != 0);
+
+       // The PMU event we use only works on Power7 or later
+       SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+       if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
+               perror("Unable to read powerpc/rfi_flush debugfs file");
+               SKIP_IF(1);
+       }
+
+       if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) {
+               perror("Unable to read powerpc/entry_flush debugfs file");
+               SKIP_IF(1);
+       }
+
+       if (read_debugfs_file("powerpc/uaccess_flush", &uaccess_flush_orig) < 0) {
+               perror("Unable to read powerpc/entry_flush debugfs file");
+               SKIP_IF(1);
+       }
+
+       if (rfi_flush_orig != 0) {
+               if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) {
+                       perror("error writing to powerpc/rfi_flush debugfs file");
+                       FAIL_IF(1);
+               }
+       }
+
+       if (entry_flush_orig != 0) {
+               if (write_debugfs_file("powerpc/entry_flush", 0) < 0) {
+                       perror("error writing to powerpc/entry_flush debugfs file");
+                       FAIL_IF(1);
+               }
+       }
+
+       uaccess_flush = uaccess_flush_orig;
+
+       fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1);
+       FAIL_IF(fd < 0);
+
+       p = (char *)memalign(zero_size, CACHELINE_SIZE);
+
+       FAIL_IF(perf_event_enable(fd));
+
+       // disable L1 prefetching
+       set_dscr(1);
+
+       iter = repetitions;
+
+       /*
+        * We expect to see l1d miss for each cacheline access when entry_flush
+        * is set. Allow a small variation on this.
+        */
+       l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
+
+again:
+       FAIL_IF(perf_event_reset(fd));
+
+       syscall_loop_uaccess(p, iterations, zero_size);
+
+       FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
+
+       if (uaccess_flush && v.l1d_misses >= l1d_misses_expected)
+               passes++;
+       else if (!uaccess_flush && v.l1d_misses < (l1d_misses_expected / 2))
+               passes++;
+
+       l1d_misses_total += v.l1d_misses;
+
+       while (--iter)
+               goto again;
+
+       if (passes < repetitions) {
+               printf("FAIL (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d failures]\n",
+                      uaccess_flush, l1d_misses_total, uaccess_flush ? '<' : '>',
+                      uaccess_flush ? repetitions * l1d_misses_expected :
+                      repetitions * l1d_misses_expected / 2,
+                      repetitions - passes, repetitions);
+               rc = 1;
+       } else {
+               printf("PASS (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d pass]\n",
+                      uaccess_flush, l1d_misses_total, uaccess_flush ? '>' : '<',
+                      uaccess_flush ? repetitions * l1d_misses_expected :
+                      repetitions * l1d_misses_expected / 2,
+                      passes, repetitions);
+       }
+
+       if (uaccess_flush == uaccess_flush_orig) {
+               uaccess_flush = !uaccess_flush_orig;
+               if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush) < 0) {
+                       perror("error writing to powerpc/uaccess_flush debugfs file");
+                       return 1;
+               }
+               iter = repetitions;
+               l1d_misses_total = 0;
+               passes = 0;
+               goto again;
+       }
+
+       perf_event_disable(fd);
+       close(fd);
+
+       set_dscr(0);
+
+       if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) {
+               perror("unable to restore original value of powerpc/rfi_flush debugfs file");
+               return 1;
+       }
+
+       if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) {
+               perror("unable to restore original value of powerpc/entry_flush debugfs file");
+               return 1;
+       }
+
+       if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush_orig) < 0) {
+               perror("unable to restore original value of powerpc/uaccess_flush debugfs file");
+               return 1;
+       }
+
+       return rc;
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness(uaccess_flush_test, "uaccess_flush_test");
+}
index c75960a..1152107 100644 (file)
@@ -66,7 +66,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc)
        /* Get thread endianness: extract bit LE from MSR */
        thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR];
 
-       /***
+       /*
         * Little-Endian Machine
         */
 
@@ -126,7 +126,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc)
                }
        }
 
-       /***
+       /*
         * Big-Endian Machine
         */
 
index 7bf841a..6b13dc2 100644 (file)
@@ -25,12 +25,20 @@ static void fill_function_pointers(void)
        if (!vdso)
                vdso = dlopen("linux-gate.so.1",
                              RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+       if (!vdso)
+               vdso = dlopen("linux-vdso32.so.1",
+                             RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+       if (!vdso)
+               vdso = dlopen("linux-vdso64.so.1",
+                             RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
        if (!vdso) {
                pr_err("[WARN]\tfailed to find vDSO\n");
                return;
        }
 
        vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+       if (!vdso_clock_gettime)
+               vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__kernel_clock_gettime");
        if (!vdso_clock_gettime)
                pr_err("Warning: failed to find clock_gettime in vDSO\n");