s390: convert to generic entry
authorSven Schnelle <svens@linux.ibm.com>
Sat, 21 Nov 2020 10:14:56 +0000 (11:14 +0100)
committerVasily Gorbik <gor@linux.ibm.com>
Tue, 19 Jan 2021 11:29:26 +0000 (12:29 +0100)
This patch converts s390 to use the generic entry infrastructure from
kernel/entry/*.

There are a few special things on s390:

- PIF_PER_TRAP is moved to TIF_PER_TRAP as the generic code doesn't
  know about our PIF flags in exit_to_user_mode_loop().

- The old code had several ways to restart syscalls:

  a) PIF_SYSCALL_RESTART, which was only set during execve to force a
     restart after upgrading a process (usually qemu-kvm) to pgste page
     table extensions.

  b) PIF_SYSCALL, which is set by do_signal() to indicate that the
     current syscall should be restarted. This is changed so that
     do_signal() now also uses PIF_SYSCALL_RESTART. Continuing to use
     PIF_SYSCALL doesn't work with the generic code, and changing it
     to PIF_SYSCALL_RESTART makes PIF_SYSCALL and PIF_SYSCALL_RESTART
     more unique.

- On s390 calling sys_sigreturn or sys_rt_sigreturn is implemented by
executing a svc instruction on the process stack which causes a fault.
While handling that fault the fault code sets PIF_SYSCALL to hand over
processing to the syscall code on exit to usermode.

The patch introduces PIF_SYSCALL_RET_SET, which is set if ptrace sets
a return value for a syscall. The s390x ptrace ABI uses r2 both for the
syscall number and return value, so ptrace cannot set the syscall number +
return value at the same time. The flag makes handling that a bit easier.
do_syscall() will just skip executing the syscall if PIF_SYSCALL_RET_SET
is set.

CONFIG_DEBUG_ASCE was removd in favour of the generic CONFIG_DEBUG_ENTRY.
CR1/7/13 will be checked both on kernel entry and exit to contain the
correct asces.

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
40 files changed:
arch/s390/Kconfig
arch/s390/Kconfig.debug
arch/s390/configs/debug_defconfig
arch/s390/configs/defconfig
arch/s390/include/asm/cputime.h
arch/s390/include/asm/elf.h
arch/s390/include/asm/entry-common.h [new file with mode: 0644]
arch/s390/include/asm/fpu/api.h
arch/s390/include/asm/idle.h
arch/s390/include/asm/lowcore.h
arch/s390/include/asm/nmi.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/ptrace.h
arch/s390/include/asm/syscall.h
arch/s390/include/asm/thread_info.h
arch/s390/include/asm/uaccess.h
arch/s390/include/asm/vtime.h
arch/s390/include/uapi/asm/ptrace.h
arch/s390/kernel/Makefile
arch/s390/kernel/asm-offsets.c
arch/s390/kernel/compat_signal.c
arch/s390/kernel/entry.S
arch/s390/kernel/entry.h
arch/s390/kernel/fpu.c
arch/s390/kernel/idle.c
arch/s390/kernel/irq.c
arch/s390/kernel/nmi.c
arch/s390/kernel/process.c
arch/s390/kernel/ptrace.c
arch/s390/kernel/setup.c
arch/s390/kernel/signal.c
arch/s390/kernel/smp.c
arch/s390/kernel/sys_s390.c [deleted file]
arch/s390/kernel/syscall.c [new file with mode: 0644]
arch/s390/kernel/traps.c
arch/s390/kernel/uprobes.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/vsie.c
arch/s390/lib/uaccess.c
arch/s390/mm/fault.c

index c72874f..41a2c58 100644 (file)
@@ -123,6 +123,7 @@ config S390
        select GENERIC_ALLOCATOR
        select GENERIC_CPU_AUTOPROBE
        select GENERIC_CPU_VULNERABILITIES
+       select GENERIC_ENTRY
        select GENERIC_FIND_FIRST_BIT
        select GENERIC_GETTIMEOFDAY
        select GENERIC_PTDUMP
index 6bfacee..ef96c25 100644 (file)
@@ -6,10 +6,12 @@ config TRACE_IRQFLAGS_SUPPORT
 config EARLY_PRINTK
        def_bool y
 
-config DEBUG_USER_ASCE
-       bool "Debug User ASCE"
+config DEBUG_ENTRY
+       bool "Debug low-level entry code"
+       depends on DEBUG_KERNEL
        help
-         Check on exit to user space that address space control
-         elements are setup correctly.
+         This option enables sanity checks in s390 low-level entry code.
+         Some of these sanity checks may slow down kernel entries and
+         exits or otherwise impact performance.
 
          If unsure, say N.
index c1c4f97..2d8dcce 100644 (file)
@@ -833,7 +833,6 @@ CONFIG_BPF_KPROBE_OVERRIDE=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_FTRACE_STARTUP_TEST=y
 # CONFIG_EVENT_TRACE_STARTUP_TEST is not set
-CONFIG_DEBUG_USER_ASCE=y
 CONFIG_NOTIFIER_ERROR_INJECTION=m
 CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
 CONFIG_FAULT_INJECTION=y
@@ -857,3 +856,4 @@ CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_BPF=m
+CONFIG_DEBUG_ENTRY=y
index 467a06d..3eadcda 100644 (file)
@@ -781,7 +781,6 @@ CONFIG_FTRACE_SYSCALLS=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_BPF_KPROBE_OVERRIDE=y
 CONFIG_HIST_TRIGGERS=y
-CONFIG_DEBUG_USER_ASCE=y
 CONFIG_LKDTM=m
 CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
index cb729d1..1d38984 100644 (file)
@@ -35,4 +35,6 @@ u64 arch_cpu_idle_time(int cpu);
 
 #define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
 
+void account_idle_time_irq(void);
+
 #endif /* _S390_CPUTIME_H */
index 5775fc2..66d51ad 100644 (file)
@@ -233,8 +233,7 @@ extern char elf_platform[];
 do {                                                           \
        set_personality(PER_LINUX |                             \
                (current->personality & (~PER_MASK)));          \
-       current->thread.sys_call_table =                        \
-               (unsigned long) &sys_call_table;                \
+       current->thread.sys_call_table = sys_call_table;        \
 } while (0)
 #else /* CONFIG_COMPAT */
 #define SET_PERSONALITY(ex)                                    \
@@ -245,11 +244,11 @@ do {                                                              \
        if ((ex).e_ident[EI_CLASS] == ELFCLASS32) {             \
                set_thread_flag(TIF_31BIT);                     \
                current->thread.sys_call_table =                \
-                       (unsigned long) &sys_call_table_emu;    \
+                       sys_call_table_emu;                     \
        } else {                                                \
                clear_thread_flag(TIF_31BIT);                   \
                current->thread.sys_call_table =                \
-                       (unsigned long) &sys_call_table;        \
+                       sys_call_table;                         \
        }                                                       \
 } while (0)
 #endif /* CONFIG_COMPAT */
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
new file mode 100644 (file)
index 0000000..75cebc8
--- /dev/null
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_S390_ENTRY_COMMON_H
+#define ARCH_S390_ENTRY_COMMON_H
+
+#include <linux/sched.h>
+#include <linux/audit.h>
+#include <linux/tracehook.h>
+#include <linux/processor.h>
+#include <linux/uaccess.h>
+#include <asm/fpu/api.h>
+
+#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP)
+
+void do_per_trap(struct pt_regs *regs);
+void do_syscall(struct pt_regs *regs);
+
+typedef void (*pgm_check_func)(struct pt_regs *regs);
+
+extern pgm_check_func pgm_check_table[128];
+
+#ifdef CONFIG_DEBUG_ENTRY
+static __always_inline void arch_check_user_regs(struct pt_regs *regs)
+{
+       debug_user_asce(0);
+}
+
+#define arch_check_user_regs arch_check_user_regs
+#endif /* CONFIG_DEBUG_ENTRY */
+
+static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
+                                                       unsigned long ti_work)
+{
+       if (ti_work & _TIF_PER_TRAP) {
+               clear_thread_flag(TIF_PER_TRAP);
+               do_per_trap(regs);
+       }
+
+       if (ti_work & _TIF_GUARDED_STORAGE)
+               gs_load_bc_cb(regs);
+}
+
+#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work
+
+static __always_inline void arch_exit_to_user_mode(void)
+{
+       if (test_cpu_flag(CIF_FPU))
+               __load_fpu_regs();
+
+       if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
+               debug_user_asce(1);
+}
+
+#define arch_exit_to_user_mode arch_exit_to_user_mode
+
+static inline bool on_thread_stack(void)
+{
+       return !(((unsigned long)(current->stack) ^ current_stack_pointer()) & ~(THREAD_SIZE - 1));
+}
+
+#endif
index 34a7ae6..a959b81 100644 (file)
@@ -47,6 +47,8 @@
 #include <linux/preempt.h>
 
 void save_fpu_regs(void);
+void load_fpu_regs(void);
+void __load_fpu_regs(void);
 
 static inline int test_fp_ctl(u32 fpc)
 {
index 6d4226d..b04f6a7 100644 (file)
@@ -20,11 +20,13 @@ struct s390_idle_data {
        unsigned long long clock_idle_exit;
        unsigned long long timer_idle_enter;
        unsigned long long timer_idle_exit;
+       unsigned long mt_cycles_enter[8];
 };
 
 extern struct device_attribute dev_attr_idle_count;
 extern struct device_attribute dev_attr_idle_time_us;
 
-void psw_idle(struct s390_idle_data *, unsigned long);
+void psw_idle(struct s390_idle_data *data, unsigned long psw_mask);
+void psw_idle_exit(void);
 
 #endif /* _S390_IDLE_H */
index 69ce919..4d65c8e 100644 (file)
@@ -81,8 +81,8 @@ struct lowcore {
        psw_t   return_mcck_psw;                /* 0x02a0 */
 
        /* CPU accounting and timing values. */
-       __u64   sync_enter_timer;               /* 0x02b0 */
-       __u64   async_enter_timer;              /* 0x02b8 */
+       __u64   sys_enter_timer;                /* 0x02b0 */
+       __u8    pad_0x02b8[0x02c0-0x02b8];      /* 0x02b8 */
        __u64   mcck_enter_timer;               /* 0x02c0 */
        __u64   exit_timer;                     /* 0x02c8 */
        __u64   user_timer;                     /* 0x02d0 */
index 5afee80..20e51c9 100644 (file)
@@ -99,6 +99,7 @@ int nmi_alloc_per_cpu(struct lowcore *lc);
 void nmi_free_per_cpu(struct lowcore *lc);
 
 void s390_handle_mcck(void);
+void __s390_handle_mcck(void);
 int s390_do_machine_check(struct pt_regs *regs);
 
 #endif /* __ASSEMBLY__ */
index 2058a43..fa67b66 100644 (file)
 #include <asm/runtime_instr.h>
 #include <asm/fpu/types.h>
 #include <asm/fpu/internal.h>
+#include <asm/irqflags.h>
+
+typedef long (*sys_call_ptr_t)(unsigned long, unsigned long,
+                              unsigned long, unsigned long,
+                              unsigned long, unsigned long);
 
 static inline void set_cpu_flag(int flag)
 {
@@ -101,31 +106,32 @@ extern void __bpon(void);
  */
 struct thread_struct {
        unsigned int  acrs[NUM_ACRS];
-        unsigned long ksp;              /* kernel stack pointer             */
-       unsigned long user_timer;       /* task cputime in user space */
-       unsigned long guest_timer;      /* task cputime in kvm guest */
-       unsigned long system_timer;     /* task cputime in kernel space */
-       unsigned long hardirq_timer;    /* task cputime in hardirq context */
-       unsigned long softirq_timer;    /* task cputime in softirq context */
-       unsigned long sys_call_table;   /* system call table address */
-       unsigned long gmap_addr;        /* address of last gmap fault. */
-       unsigned int gmap_write_flag;   /* gmap fault write indication */
-       unsigned int gmap_int_code;     /* int code of last gmap fault */
-       unsigned int gmap_pfault;       /* signal of a pending guest pfault */
+       unsigned long ksp;                      /* kernel stack pointer */
+       unsigned long user_timer;               /* task cputime in user space */
+       unsigned long guest_timer;              /* task cputime in kvm guest */
+       unsigned long system_timer;             /* task cputime in kernel space */
+       unsigned long hardirq_timer;            /* task cputime in hardirq context */
+       unsigned long softirq_timer;            /* task cputime in softirq context */
+       const sys_call_ptr_t *sys_call_table;   /* system call table address */
+       unsigned long gmap_addr;                /* address of last gmap fault. */
+       unsigned int gmap_write_flag;           /* gmap fault write indication */
+       unsigned int gmap_int_code;             /* int code of last gmap fault */
+       unsigned int gmap_pfault;               /* signal of a pending guest pfault */
+
        /* Per-thread information related to debugging */
-       struct per_regs per_user;       /* User specified PER registers */
-       struct per_event per_event;     /* Cause of the last PER trap */
-       unsigned long per_flags;        /* Flags to control debug behavior */
-       unsigned int system_call;       /* system call number in signal */
-       unsigned long last_break;       /* last breaking-event-address. */
-        /* pfault_wait is used to block the process on a pfault event */
+       struct per_regs per_user;               /* User specified PER registers */
+       struct per_event per_event;             /* Cause of the last PER trap */
+       unsigned long per_flags;                /* Flags to control debug behavior */
+       unsigned int system_call;               /* system call number in signal */
+       unsigned long last_break;               /* last breaking-event-address. */
+       /* pfault_wait is used to block the process on a pfault event */
        unsigned long pfault_wait;
        struct list_head list;
        /* cpu runtime instrumentation */
        struct runtime_instr_cb *ri_cb;
-       struct gs_cb *gs_cb;            /* Current guarded storage cb */
-       struct gs_cb *gs_bc_cb;         /* Broadcast guarded storage cb */
-       unsigned char trap_tdb[256];    /* Transaction abort diagnose block */
+       struct gs_cb *gs_cb;                    /* Current guarded storage cb */
+       struct gs_cb *gs_bc_cb;                 /* Broadcast guarded storage cb */
+       unsigned char trap_tdb[256];            /* Transaction abort diagnose block */
        /*
         * Warning: 'fpu' is dynamically-sized. It *MUST* be at
         * the end.
@@ -184,6 +190,7 @@ static inline void release_thread(struct task_struct *tsk) { }
 
 /* Free guarded storage control block */
 void guarded_storage_release(struct task_struct *tsk);
+void gs_load_bc_cb(struct pt_regs *regs);
 
 unsigned long get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk) ((struct pt_regs *) \
@@ -324,6 +331,11 @@ extern void memcpy_absolute(void *, void *, size_t);
 extern int s390_isolate_bp(void);
 extern int s390_isolate_bp_guest(void);
 
+static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
+{
+       return arch_irqs_disabled_flags(regs->psw.mask);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __ASM_S390_PROCESSOR_H */
index 73ca7f7..f828be7 100644 (file)
 #include <uapi/asm/ptrace.h>
 
 #define PIF_SYSCALL            0       /* inside a system call */
-#define PIF_PER_TRAP           1       /* deliver sigtrap on return to user */
-#define PIF_SYSCALL_RESTART    2       /* restart the current system call */
+#define PIF_SYSCALL_RESTART    1       /* restart the current system call */
+#define PIF_SYSCALL_RET_SET    2       /* return value was set via ptrace */
 #define PIF_GUEST_FAULT                3       /* indicates program check in sie64a */
 
 #define _PIF_SYSCALL           BIT(PIF_SYSCALL)
-#define _PIF_PER_TRAP          BIT(PIF_PER_TRAP)
 #define _PIF_SYSCALL_RESTART   BIT(PIF_SYSCALL_RESTART)
+#define _PIF_SYSCALL_RET_SET   BIT(PIF_SYSCALL_RET_SET)
 #define _PIF_GUEST_FAULT       BIT(PIF_GUEST_FAULT)
 
 #ifndef __ASSEMBLY__
@@ -68,6 +68,9 @@ enum {
        &(*(struct psw_bits *)(&(__psw)));      \
 }))
 
+#define PGM_INT_CODE_MASK      0x7f
+#define PGM_INT_CODE_PER       0x80
+
 /*
  * The pt_regs struct defines the way the registers are stored on
  * the stack during a system call.
index d9d5de0..9107e3d 100644 (file)
@@ -14,8 +14,8 @@
 #include <linux/err.h>
 #include <asm/ptrace.h>
 
-extern const unsigned long sys_call_table[];
-extern const unsigned long sys_call_table_emu[];
+extern const sys_call_ptr_t sys_call_table[];
+extern const sys_call_ptr_t sys_call_table_emu[];
 
 static inline long syscall_get_nr(struct task_struct *task,
                                  struct pt_regs *regs)
@@ -56,6 +56,7 @@ static inline void syscall_set_return_value(struct task_struct *task,
                                            struct pt_regs *regs,
                                            int error, long val)
 {
+       set_pt_regs_flag(regs, PIF_SYSCALL_RET_SET);
        regs->gprs[2] = error ? error : val;
 }
 
@@ -97,4 +98,10 @@ static inline int syscall_get_arch(struct task_struct *task)
 #endif
        return AUDIT_ARCH_S390X;
 }
+
+static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
+{
+       return false;
+}
+
 #endif /* _ASM_SYSCALL_H */
index 3c5b1f9..28696ca 100644 (file)
@@ -36,6 +36,7 @@
  */
 struct thread_info {
        unsigned long           flags;          /* low level flags */
+       unsigned long           syscall_work;   /* SYSCALL_WORK_ flags */
 };
 
 /*
@@ -68,6 +69,7 @@ void arch_setup_new_exec(void);
 #define TIF_NOTIFY_SIGNAL      7       /* signal notifications exist */
 #define TIF_ISOLATE_BP         8       /* Run process with isolated BP */
 #define TIF_ISOLATE_BP_GUEST   9       /* Run KVM guests with isolated BP */
+#define TIF_PER_TRAP           10      /* Need to handle PER trap on exit to usermode */
 
 #define TIF_31BIT              16      /* 32bit process */
 #define TIF_MEMDIE             17      /* is terminating due to OOM killer */
@@ -91,6 +93,7 @@ void arch_setup_new_exec(void);
 #define _TIF_PATCH_PENDING     BIT(TIF_PATCH_PENDING)
 #define _TIF_ISOLATE_BP                BIT(TIF_ISOLATE_BP)
 #define _TIF_ISOLATE_BP_GUEST  BIT(TIF_ISOLATE_BP_GUEST)
+#define _TIF_PER_TRAP          BIT(TIF_PER_TRAP)
 
 #define _TIF_31BIT             BIT(TIF_31BIT)
 #define _TIF_SINGLE_STEP       BIT(TIF_SINGLE_STEP)
index c670788..4756d29 100644 (file)
@@ -18,7 +18,7 @@
 #include <asm/extable.h>
 #include <asm/facility.h>
 
-void debug_user_asce(void);
+void debug_user_asce(int exit);
 
 static inline int __range_ok(unsigned long addr, unsigned long size)
 {
index fac6a67..fe17e44 100644 (file)
@@ -4,4 +4,18 @@
 
 #define __ARCH_HAS_VTIME_TASK_SWITCH
 
+static inline void update_timer_sys(void)
+{
+       S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
+       S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.sys_enter_timer;
+       S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
+}
+
+static inline void update_timer_mcck(void)
+{
+       S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
+       S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.mcck_enter_timer;
+       S390_lowcore.last_update_timer = S390_lowcore.mcck_enter_timer;
+}
+
 #endif /* _S390_VTIME_H */
index 543dd70..ad64d67 100644 (file)
 #define ACR_SIZE       4
 
 
-#define PTRACE_OLDSETOPTIONS        21
-
+#define PTRACE_OLDSETOPTIONS           21
+#define PTRACE_SYSEMU                  31
+#define PTRACE_SYSEMU_SINGLESTEP       32
 #ifndef __ASSEMBLY__
 #include <linux/stddef.h>
 #include <linux/types.h>
index dd73b7f..c97818a 100644 (file)
@@ -34,7 +34,7 @@ CFLAGS_dumpstack.o    += -fno-optimize-sibling-calls
 CFLAGS_unwind_bc.o     += -fno-optimize-sibling-calls
 
 obj-y  := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
-obj-y  += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
+obj-y  += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y  += debug.o irq.o ipl.o dis.o diag.o vdso.o
 obj-y  += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o
 obj-y  += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
index 79724d8..d22bb28 100644 (file)
@@ -26,26 +26,14 @@ int main(void)
        BLANK();
        /* thread struct offsets */
        OFFSET(__THREAD_ksp, thread_struct, ksp);
-       OFFSET(__THREAD_sysc_table,  thread_struct, sys_call_table);
-       OFFSET(__THREAD_last_break, thread_struct, last_break);
-       OFFSET(__THREAD_FPU_fpc, thread_struct, fpu.fpc);
-       OFFSET(__THREAD_FPU_regs, thread_struct, fpu.regs);
-       OFFSET(__THREAD_per_cause, thread_struct, per_event.cause);
-       OFFSET(__THREAD_per_address, thread_struct, per_event.address);
-       OFFSET(__THREAD_per_paid, thread_struct, per_event.paid);
-       OFFSET(__THREAD_trap_tdb, thread_struct, trap_tdb);
        BLANK();
        /* thread info offsets */
        OFFSET(__TI_flags, task_struct, thread_info.flags);
        BLANK();
        /* pt_regs offsets */
-       OFFSET(__PT_ARGS, pt_regs, args);
        OFFSET(__PT_PSW, pt_regs, psw);
        OFFSET(__PT_GPRS, pt_regs, gprs);
        OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
-       OFFSET(__PT_INT_CODE, pt_regs, int_code);
-       OFFSET(__PT_INT_PARM, pt_regs, int_parm);
-       OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long);
        OFFSET(__PT_FLAGS, pt_regs, flags);
        OFFSET(__PT_CR1, pt_regs, cr1);
        DEFINE(__PT_SIZE, sizeof(struct pt_regs));
@@ -64,6 +52,7 @@ int main(void)
        OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit);
        OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
        OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit);
+       OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter);
        BLANK();
        /* hardware defined lowcore locations 0x000 - 0x1ff */
        OFFSET(__LC_EXT_PARAMS, lowcore, ext_params);
@@ -115,13 +104,9 @@ int main(void)
        OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
        OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
        OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
-       OFFSET(__LC_SYNC_ENTER_TIMER, lowcore, sync_enter_timer);
-       OFFSET(__LC_ASYNC_ENTER_TIMER, lowcore, async_enter_timer);
+       OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer);
        OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer);
        OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer);
-       OFFSET(__LC_USER_TIMER, lowcore, user_timer);
-       OFFSET(__LC_SYSTEM_TIMER, lowcore, system_timer);
-       OFFSET(__LC_STEAL_TIMER, lowcore, steal_timer);
        OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer);
        OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
        OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
index 38d4bdb..1d0e17e 100644 (file)
@@ -118,6 +118,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
        fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
 
        clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+       clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
        return 0;
 }
 
index f1ba197..785425b 100644 (file)
@@ -51,38 +51,8 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_SIZE_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
 STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
 
-_TIF_WORK      = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-                  _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING | \
-                  _TIF_NOTIFY_SIGNAL)
-_TIF_TRACE     = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
-                  _TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK      = (_CIF_FPU)
-_PIF_WORK      = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
-
 _LPP_OFFSET    = __LC_LPP
 
-       .macro  TRACE_IRQS_ON
-#ifdef CONFIG_TRACE_IRQFLAGS
-       basr    %r2,%r0
-       brasl   %r14,trace_hardirqs_on_caller
-#endif
-       .endm
-
-       .macro  TRACE_IRQS_OFF
-#ifdef CONFIG_TRACE_IRQFLAGS
-       basr    %r2,%r0
-       brasl   %r14,trace_hardirqs_off_caller
-#endif
-       .endm
-
-       .macro  LOCKDEP_SYS_EXIT
-#ifdef CONFIG_LOCKDEP
-       tm      __PT_PSW+1(%r11),0x01   # returning to user ?
-       jz      .+10
-       brasl   %r14,lockdep_sys_exit
-#endif
-       .endm
-
        .macro  CHECK_STACK savearea
 #ifdef CONFIG_CHECK_STACK
        tml     %r15,STACK_SIZE - CONFIG_STACK_GUARD
@@ -91,12 +61,6 @@ _LPP_OFFSET  = __LC_LPP
 #endif
        .endm
 
-       .macro  DEBUG_USER_ASCE
-#ifdef CONFIG_DEBUG_USER_ASCE
-       brasl   %r14,debug_user_asce
-#endif
-       .endm
-
        .macro  CHECK_VMAP_STACK savearea,oklabel
 #ifdef CONFIG_VMAP_STACK
        lgr     %r14,%r15
@@ -117,9 +81,9 @@ _LPP_OFFSET  = __LC_LPP
 #endif
        .endm
 
-       .macro  SWITCH_ASYNC savearea,timer,clock
+       .macro  SWITCH_KERNEL savearea
        tmhh    %r8,0x0001              # interrupting from user ?
-       jnz     4f
+       jnz     1f
 #if IS_ENABLED(CONFIG_KVM)
        lgr     %r14,%r9
        larl    %r13,.Lsie_gmap
@@ -130,92 +94,16 @@ _LPP_OFFSET        = __LC_LPP
        lghi    %r11,\savearea          # inside critical section, do cleanup
        brasl   %r14,.Lcleanup_sie
 #endif
-0:     larl    %r13,.Lpsw_idle_exit
-       cgr     %r13,%r9
-       jne     3f
-
-       larl    %r1,smp_cpu_mtid
-       llgf    %r1,0(%r1)
-       ltgr    %r1,%r1
-       jz      2f                      # no SMT, skip mt_cycles calculation
-       .insn   rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15)
-       larl    %r3,mt_cycles
-       ag      %r3,__LC_PERCPU_OFFSET
-       la      %r4,__SF_EMPTY+16(%r15)
-1:     lg      %r0,0(%r3)
-       slg     %r0,0(%r4)
-       alg     %r0,64(%r4)
-       stg     %r0,0(%r3)
-       la      %r3,8(%r3)
-       la      %r4,8(%r4)
-       brct    %r1,1b
-
-2:     mvc     __CLOCK_IDLE_EXIT(8,%r2), \clock
-       mvc     __TIMER_IDLE_EXIT(8,%r2), \timer
-       # account system time going idle
-       ni      __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT
-
-       lg      %r13,__LC_STEAL_TIMER
-       alg     %r13,__CLOCK_IDLE_ENTER(%r2)
-       slg     %r13,__LC_LAST_UPDATE_CLOCK
-       stg     %r13,__LC_STEAL_TIMER
-
-       mvc     __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
-
-       lg      %r13,__LC_SYSTEM_TIMER
-       alg     %r13,__LC_LAST_UPDATE_TIMER
-       slg     %r13,__TIMER_IDLE_ENTER(%r2)
-       stg     %r13,__LC_SYSTEM_TIMER
-       mvc     __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
-
-       nihh    %r8,0xfcfd              # clear wait state and irq bits
-3:     lg      %r14,__LC_ASYNC_STACK   # are we already on the target stack?
-       slgr    %r14,%r15
-       srag    %r14,%r14,STACK_SHIFT
-       jnz     5f
-       CHECK_STACK \savearea
+0:     CHECK_STACK \savearea
+       lgr     %r11,%r15
        aghi    %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-       j       6f
-4:     UPDATE_VTIME %r14,%r15,\timer
-       BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-5:     lg      %r15,__LC_ASYNC_STACK   # load async stack
-6:     la      %r11,STACK_FRAME_OVERHEAD(%r15)
-       .endm
-
-       .macro UPDATE_VTIME w1,w2,enter_timer
-       lg      \w1,__LC_EXIT_TIMER
-       lg      \w2,__LC_LAST_UPDATE_TIMER
-       slg     \w1,\enter_timer
-       slg     \w2,__LC_EXIT_TIMER
-       alg     \w1,__LC_USER_TIMER
-       alg     \w2,__LC_SYSTEM_TIMER
-       stg     \w1,__LC_USER_TIMER
-       stg     \w2,__LC_SYSTEM_TIMER
-       mvc     __LC_LAST_UPDATE_TIMER(8),\enter_timer
-       .endm
-
-       .macro RESTORE_SM_CLEAR_PER
-       stg     %r8,__LC_RETURN_PSW
-       ni      __LC_RETURN_PSW,0xbf
-       ssm     __LC_RETURN_PSW
-       .endm
-
-       .macro ENABLE_INTS
-       stosm   __SF_EMPTY(%r15),3
-       .endm
-
-       .macro ENABLE_INTS_TRACE
-       TRACE_IRQS_ON
-       ENABLE_INTS
-       .endm
-
-       .macro DISABLE_INTS
-       stnsm   __SF_EMPTY(%r15),0xfc
-       .endm
-
-       .macro DISABLE_INTS_TRACE
-       DISABLE_INTS
-       TRACE_IRQS_OFF
+       stg     %r11,__SF_BACKCHAIN(%r15)
+       j       2f
+1:     BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
+       lctlg   %c1,%c1,__LC_KERNEL_ASCE
+       lg      %r15,__LC_KERNEL_STACK
+       xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+2:     la      %r11,STACK_FRAME_OVERHEAD(%r15)
        .endm
 
        .macro STCK savearea
@@ -267,18 +155,17 @@ _LPP_OFFSET       = __LC_LPP
                    "jnz .+8; .long 0xb2e8d000", 82
        .endm
 
-       GEN_BR_THUNK %r9
        GEN_BR_THUNK %r14
        GEN_BR_THUNK %r14,%r11
 
        .section .kprobes.text, "ax"
 .Ldummy:
        /*
-        * This nop exists only in order to avoid that __switch_to starts at
+        * This nop exists only in order to avoid that __bpon starts at
         * the beginning of the kprobes text section. In that case we would
         * have several symbols at the same address. E.g. objdump would take
         * an arbitrary symbol name when disassembling this code.
-        * With the added nop in between the __switch_to symbol is unique
+        * With the added nop in between the __bpon symbol is unique
         * again.
         */
        nop     0
@@ -327,10 +214,6 @@ ENTRY(sie64a)
        stg     %r3,__SF_SIE_SAVEAREA(%r15)     # save guest register save area
        xc      __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
        mvc     __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
-       TSTMSK  __LC_CPU_FLAGS,_CIF_FPU         # load guest fp/vx registers ?
-       jno     .Lsie_load_guest_gprs
-       brasl   %r14,load_fpu_regs              # load guest fp/vx regs
-.Lsie_load_guest_gprs:
        lmg     %r0,%r13,0(%r3)                 # load guest gprs 0-13
        lg      %r14,__LC_GMAP                  # get gmap pointer
        ltgr    %r14,%r14
@@ -370,7 +253,6 @@ sie_exit:
        stmg    %r0,%r13,0(%r14)                # save guest gprs 0-13
        xgr     %r0,%r0                         # clear guest registers to
        xgr     %r1,%r1                         # prevent speculative use
-       xgr     %r2,%r2
        xgr     %r3,%r3
        xgr     %r4,%r4
        xgr     %r5,%r5
@@ -397,249 +279,68 @@ EXPORT_SYMBOL(sie_exit)
  */
 
 ENTRY(system_call)
-       stpt    __LC_SYNC_ENTER_TIMER
+       stpt    __LC_SYS_ENTER_TIMER
        stmg    %r8,%r15,__LC_SAVE_AREA_SYNC
        BPOFF
-       lg      %r12,__LC_CURRENT
-       lghi    %r14,_PIF_SYSCALL
+       lghi    %r14,0
 .Lsysc_per:
        lctlg   %c1,%c1,__LC_KERNEL_ASCE
-       lghi    %r13,__TASK_thread
+       lg      %r12,__LC_CURRENT
        lg      %r15,__LC_KERNEL_STACK
-       la      %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
-       UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
-       BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-       stmg    %r0,%r7,__PT_R0(%r11)
-       mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
-       mvc     __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
-       mvc     __PT_INT_CODE(4,%r11),__LC_SVC_ILC
-       stg     %r14,__PT_FLAGS(%r11)
        xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-       ENABLE_INTS
-.Lsysc_do_svc:
+       stmg    %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+       BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
        # clear user controlled register to prevent speculative use
        xgr     %r0,%r0
-       # load address of system call table
-       lg      %r10,__THREAD_sysc_table(%r13,%r12)
-       llgh    %r8,__PT_INT_CODE+2(%r11)
-       slag    %r8,%r8,3                       # shift and test for svc 0
-       jnz     .Lsysc_nr_ok
-       # svc 0: system call number in %r1
-       llgfr   %r1,%r1                         # clear high word in r1
-       sth     %r1,__PT_INT_CODE+2(%r11)
-       cghi    %r1,NR_syscalls
-       jnl     .Lsysc_nr_ok
-       slag    %r8,%r1,3
-.Lsysc_nr_ok:
-       stg     %r2,__PT_ORIG_GPR2(%r11)
-       stg     %r7,STACK_FRAME_OVERHEAD(%r15)
-       lg      %r9,0(%r8,%r10)                 # get system call add.
-       TSTMSK  __TI_flags(%r12),_TIF_TRACE
-       jnz     .Lsysc_tracesys
-       BASR_EX %r14,%r9                        # call sys_xxxx
-       stg     %r2,__PT_R2(%r11)               # store return value
-
-.Lsysc_return:
-#ifdef CONFIG_DEBUG_RSEQ
-       lgr     %r2,%r11
-       brasl   %r14,rseq_syscall
-#endif
-       LOCKDEP_SYS_EXIT
-.Lsysc_tif:
-       DISABLE_INTS
-       TSTMSK  __PT_FLAGS(%r11),_PIF_WORK
-       jnz     .Lsysc_work
-       TSTMSK  __TI_flags(%r12),_TIF_WORK
-       jnz     .Lsysc_work                     # check for work
-       DEBUG_USER_ASCE
+       xgr     %r1,%r1
+       xgr     %r4,%r4
+       xgr     %r5,%r5
+       xgr     %r6,%r6
+       xgr     %r7,%r7
+       xgr     %r8,%r8
+       xgr     %r9,%r9
+       xgr     %r10,%r10
+       xgr     %r11,%r11
+       la      %r2,STACK_FRAME_OVERHEAD(%r15)  # pointer to pt_regs
+       lgr     %r3,%r14
+       brasl   %r14,__do_syscall
        lctlg   %c1,%c1,__LC_USER_ASCE
-       BPEXIT  __TI_flags(%r12),_TIF_ISOLATE_BP
-       TSTMSK  __LC_CPU_FLAGS, _CIF_FPU
-       jz      .Lsysc_skip_fpu
-       brasl   %r14,load_fpu_regs
-.Lsysc_skip_fpu:
-       mvc     __LC_RETURN_PSW(16),__PT_PSW(%r11)
+       mvc     __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+       BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
+       lmg     %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
        stpt    __LC_EXIT_TIMER
-       lmg     %r0,%r15,__PT_R0(%r11)
        b       __LC_RETURN_LPSWE
-
-#
-# One of the work bits is on. Find out which one.
-#
-.Lsysc_work:
-       ENABLE_INTS
-       TSTMSK  __TI_flags(%r12),_TIF_NEED_RESCHED
-       jo      .Lsysc_reschedule
-       TSTMSK  __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
-       jo      .Lsysc_syscall_restart
-#ifdef CONFIG_UPROBES
-       TSTMSK  __TI_flags(%r12),_TIF_UPROBE
-       jo      .Lsysc_uprobe_notify
-#endif
-       TSTMSK  __TI_flags(%r12),_TIF_GUARDED_STORAGE
-       jo      .Lsysc_guarded_storage
-       TSTMSK  __PT_FLAGS(%r11),_PIF_PER_TRAP
-       jo      .Lsysc_singlestep
-#ifdef CONFIG_LIVEPATCH
-       TSTMSK  __TI_flags(%r12),_TIF_PATCH_PENDING
-       jo      .Lsysc_patch_pending    # handle live patching just before
-                                       # signals and possible syscall restart
-#endif
-       TSTMSK  __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
-       jo      .Lsysc_syscall_restart
-       TSTMSK  __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL)
-       jnz     .Lsysc_sigpending
-       TSTMSK  __TI_flags(%r12),_TIF_NOTIFY_RESUME
-       jo      .Lsysc_notify_resume
-       j       .Lsysc_return
-
-#
-# _TIF_NEED_RESCHED is set, call schedule
-#
-.Lsysc_reschedule:
-       larl    %r14,.Lsysc_return
-       jg      schedule
-
-#
-# _TIF_SIGPENDING is set, call do_signal
-#
-.Lsysc_sigpending:
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       brasl   %r14,do_signal
-       TSTMSK  __PT_FLAGS(%r11),_PIF_SYSCALL
-       jno     .Lsysc_return
-.Lsysc_do_syscall:
-       lghi    %r13,__TASK_thread
-       lmg     %r2,%r7,__PT_R2(%r11)   # load svc arguments
-       lghi    %r1,0                   # svc 0 returns -ENOSYS
-       j       .Lsysc_do_svc
-
-#
-# _TIF_NOTIFY_RESUME is set, call do_notify_resume
-#
-.Lsysc_notify_resume:
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       larl    %r14,.Lsysc_return
-       jg      do_notify_resume
-
-#
-# _TIF_UPROBE is set, call uprobe_notify_resume
-#
-#ifdef CONFIG_UPROBES
-.Lsysc_uprobe_notify:
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       larl    %r14,.Lsysc_return
-       jg      uprobe_notify_resume
-#endif
-
-#
-# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
-#
-.Lsysc_guarded_storage:
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       larl    %r14,.Lsysc_return
-       jg      gs_load_bc_cb
-#
-# _TIF_PATCH_PENDING is set, call klp_update_patch_state
-#
-#ifdef CONFIG_LIVEPATCH
-.Lsysc_patch_pending:
-       lg      %r2,__LC_CURRENT        # pass pointer to task struct
-       larl    %r14,.Lsysc_return
-       jg      klp_update_patch_state
-#endif
-
-#
-# _PIF_PER_TRAP is set, call do_per_trap
-#
-.Lsysc_singlestep:
-       ni      __PT_FLAGS+7(%r11),255-_PIF_PER_TRAP
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       larl    %r14,.Lsysc_return
-       jg      do_per_trap
-
-#
-# _PIF_SYSCALL_RESTART is set, repeat the current system call
-#
-.Lsysc_syscall_restart:
-       ni      __PT_FLAGS+7(%r11),255-_PIF_SYSCALL_RESTART
-       lmg     %r1,%r7,__PT_R1(%r11)   # load svc arguments
-       lg      %r2,__PT_ORIG_GPR2(%r11)
-       j       .Lsysc_do_svc
-
-#
-# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
-# and after the system call
-#
-.Lsysc_tracesys:
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       la      %r3,0
-       llgh    %r0,__PT_INT_CODE+2(%r11)
-       stg     %r0,__PT_R2(%r11)
-       brasl   %r14,do_syscall_trace_enter
-       lghi    %r0,NR_syscalls
-       clgr    %r0,%r2
-       jnh     .Lsysc_tracenogo
-       sllg    %r8,%r2,3
-       lg      %r9,0(%r8,%r10)
-       lmg     %r3,%r7,__PT_R3(%r11)
-       stg     %r7,STACK_FRAME_OVERHEAD(%r15)
-       lg      %r2,__PT_ORIG_GPR2(%r11)
-       BASR_EX %r14,%r9                # call sys_xxx
-       stg     %r2,__PT_R2(%r11)       # store return value
-.Lsysc_tracenogo:
-       TSTMSK  __TI_flags(%r12),_TIF_TRACE
-       jz      .Lsysc_return
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       larl    %r14,.Lsysc_return
-       jg      do_syscall_trace_exit
 ENDPROC(system_call)
 
 #
 # a new process exits the kernel with ret_from_fork
 #
 ENTRY(ret_from_fork)
-       la      %r11,STACK_FRAME_OVERHEAD(%r15)
-       lg      %r12,__LC_CURRENT
-       brasl   %r14,schedule_tail
-       tm      __PT_PSW+1(%r11),0x01   # forking a kernel thread ?
-       jne     .Lsysc_tracenogo
-       # it's a kernel thread
-       lmg     %r9,%r10,__PT_R9(%r11)  # load gprs
-       la      %r2,0(%r10)
-       BASR_EX %r14,%r9
-       j       .Lsysc_tracenogo
+       lgr     %r3,%r11
+       brasl   %r14,__ret_from_fork
+       lctlg   %c1,%c1,__LC_USER_ASCE
+       mvc     __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+       BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
+       lmg     %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+       stpt    __LC_EXIT_TIMER
+       b       __LC_RETURN_LPSWE
 ENDPROC(ret_from_fork)
 
-ENTRY(kernel_thread_starter)
-       la      %r2,0(%r10)
-       BASR_EX %r14,%r9
-       j       .Lsysc_tracenogo
-ENDPROC(kernel_thread_starter)
-
 /*
  * Program check handler routine
  */
 
 ENTRY(pgm_check_handler)
-       stpt    __LC_SYNC_ENTER_TIMER
+       stpt    __LC_SYS_ENTER_TIMER
        BPOFF
        stmg    %r8,%r15,__LC_SAVE_AREA_SYNC
-       lg      %r10,__LC_LAST_BREAK
-       srag    %r11,%r10,12
-       jnz     0f
-       /* if __LC_LAST_BREAK is < 4096, it contains one of
-        * the lpswe addresses in lowcore. Set it to 1 (initial state)
-        * to prevent leaking that address to userspace.
-        */
-       lghi    %r10,1
-0:     lg      %r12,__LC_CURRENT
-       lghi    %r11,0
+       lg      %r12,__LC_CURRENT
+       lghi    %r10,0
        lmg     %r8,%r9,__LC_PGM_OLD_PSW
        tmhh    %r8,0x0001              # coming from user space?
        jno     .Lpgm_skip_asce
        lctlg   %c1,%c1,__LC_KERNEL_ASCE
-       j       3f
+       j       3f                      # -> fault in user space
 .Lpgm_skip_asce:
 #if IS_ENABLED(CONFIG_KVM)
        # cleanup critical section for program checks in sie64a
@@ -653,7 +354,7 @@ ENTRY(pgm_check_handler)
        ni      __SIE_PROG0C+3(%r14),0xfe       # no longer in SIE
        lctlg   %c1,%c1,__LC_KERNEL_ASCE        # load primary asce
        larl    %r9,sie_exit                    # skip forward to sie_exit
-       lghi    %r11,_PIF_GUEST_FAULT
+       lghi    %r10,_PIF_GUEST_FAULT
 #endif
 1:     tmhh    %r8,0x4000              # PER bit set in old PSW ?
        jnz     2f                      # -> enabled, can't be a double fault
@@ -661,82 +362,37 @@ ENTRY(pgm_check_handler)
        jnz     .Lpgm_svcper            # -> single stepped svc
 2:     CHECK_STACK __LC_SAVE_AREA_SYNC
        aghi    %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-       # CHECK_VMAP_STACK branches to stack_overflow or 5f
-       CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f
-3:     UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
-       BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
+       # CHECK_VMAP_STACK branches to stack_overflow or 4f
+       CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
+3:     BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
        lg      %r15,__LC_KERNEL_STACK
-       lgr     %r14,%r12
-       aghi    %r14,__TASK_thread      # pointer to thread_struct
-       lghi    %r13,__LC_PGM_TDB
-       tm      __LC_PGM_ILC+2,0x02     # check for transaction abort
-       jz      4f
-       mvc     __THREAD_trap_tdb(256,%r14),0(%r13)
-4:     stg     %r10,__THREAD_last_break(%r14)
-5:     lgr     %r13,%r11
-       la      %r11,STACK_FRAME_OVERHEAD(%r15)
+4:     la      %r11,STACK_FRAME_OVERHEAD(%r15)
+       stg     %r10,__PT_FLAGS(%r11)
+       xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
        stmg    %r0,%r7,__PT_R0(%r11)
+       mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+       stmg    %r8,%r9,__PT_PSW(%r11)
+
        # clear user controlled registers to prevent speculative use
        xgr     %r0,%r0
        xgr     %r1,%r1
-       xgr     %r2,%r2
        xgr     %r3,%r3
        xgr     %r4,%r4
        xgr     %r5,%r5
        xgr     %r6,%r6
        xgr     %r7,%r7
-       mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
-       stmg    %r8,%r9,__PT_PSW(%r11)
-       mvc     __PT_INT_CODE(4,%r11),__LC_PGM_ILC
-       mvc     __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
-       stg     %r13,__PT_FLAGS(%r11)
-       stg     %r10,__PT_ARGS(%r11)
-       tm      __LC_PGM_ILC+3,0x80     # check for per exception
-       jz      6f
-       tmhh    %r8,0x0001              # kernel per event ?
-       jz      .Lpgm_kprobe
-       oi      __PT_FLAGS+7(%r11),_PIF_PER_TRAP
-       mvc     __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
-       mvc     __THREAD_per_cause(2,%r14),__LC_PER_CODE
-       mvc     __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-6:     xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-       RESTORE_SM_CLEAR_PER
-       larl    %r1,pgm_check_table
-       llgh    %r10,__PT_INT_CODE+2(%r11)
-       nill    %r10,0x007f
-       sll     %r10,3
-       je      .Lpgm_return
-       lg      %r9,0(%r10,%r1)         # load address of handler routine
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       BASR_EX %r14,%r9                # branch to interrupt-handler
-.Lpgm_return:
-       LOCKDEP_SYS_EXIT
-       tm      __PT_PSW+1(%r11),0x01   # returning to user ?
-       jno     .Lpgm_restore
-       TSTMSK  __PT_FLAGS(%r11),_PIF_SYSCALL
-       jo      .Lsysc_do_syscall
-       j       .Lsysc_tif
-.Lpgm_restore:
-       DISABLE_INTS
-       TSTMSK  __LC_CPU_FLAGS, _CIF_FPU
-       jz      .Lpgm_skip_fpu
-       brasl   %r14,load_fpu_regs
-.Lpgm_skip_fpu:
-       mvc     __LC_RETURN_PSW(16),__PT_PSW(%r11)
+       lgr     %r2,%r11
+       brasl   %r14,__do_pgm_check
+       tmhh    %r8,0x0001              # returning to user space?
+       jno     .Lpgm_exit_kernel
+       lctlg   %c1,%c1,__LC_USER_ASCE
+       BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
        stpt    __LC_EXIT_TIMER
-       lmg     %r0,%r15,__PT_R0(%r11)
+.Lpgm_exit_kernel:
+       mvc     __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+       lmg     %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
        b       __LC_RETURN_LPSWE
 
-#
-# PER event in supervisor state, must be kprobes
-#
-.Lpgm_kprobe:
-       xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-       RESTORE_SM_CLEAR_PER
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       brasl   %r14,do_per_trap
-       j       .Lpgm_return
-
 #
 # single stepped system call
 #
@@ -744,26 +400,26 @@ ENTRY(pgm_check_handler)
        mvc     __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
        larl    %r14,.Lsysc_per
        stg     %r14,__LC_RETURN_PSW+8
-       lghi    %r14,_PIF_SYSCALL | _PIF_PER_TRAP
+       lghi    %r14,1
        lpswe   __LC_RETURN_PSW         # branch to .Lsysc_per
 ENDPROC(pgm_check_handler)
 
 /*
- * IO interrupt handler routine
+ * Interrupt handler macro used for external and IO interrupts.
  */
-ENTRY(io_int_handler)
+.macro INT_HANDLER name,lc_old_psw,handler
+ENTRY(\name)
        STCK    __LC_INT_CLOCK
-       stpt    __LC_ASYNC_ENTER_TIMER
+       stpt    __LC_SYS_ENTER_TIMER
        BPOFF
        stmg    %r8,%r15,__LC_SAVE_AREA_ASYNC
        lg      %r12,__LC_CURRENT
-       lmg     %r8,%r9,__LC_IO_OLD_PSW
-       SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK
+       lmg     %r8,%r9,\lc_old_psw
+       SWITCH_KERNEL __LC_SAVE_AREA_ASYNC
        stmg    %r0,%r7,__PT_R0(%r11)
        # clear user controlled registers to prevent speculative use
        xgr     %r0,%r0
        xgr     %r1,%r1
-       xgr     %r2,%r2
        xgr     %r3,%r3
        xgr     %r4,%r4
        xgr     %r5,%r5
@@ -772,322 +428,48 @@ ENTRY(io_int_handler)
        xgr     %r10,%r10
        mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
        stmg    %r8,%r9,__PT_PSW(%r11)
-       tm      __PT_PSW+1(%r11),0x01   # coming from user space?
-       jno     .Lio_skip_asce
+       tm      %r8,0x0001              # coming from user space?
+       jno     1f
        lctlg   %c1,%c1,__LC_KERNEL_ASCE
-.Lio_skip_asce:
-       mvc     __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
-       xc      __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-       xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-       TRACE_IRQS_OFF
-.Lio_loop:
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       lghi    %r3,IO_INTERRUPT
-       tm      __PT_INT_CODE+8(%r11),0x80      # adapter interrupt ?
-       jz      .Lio_call
-       lghi    %r3,THIN_INTERRUPT
-.Lio_call:
-       brasl   %r14,do_IRQ
-       TSTMSK  __LC_MACHINE_FLAGS,MACHINE_FLAG_LPAR
-       jz      .Lio_return
-       tpi     0
-       jz      .Lio_return
-       mvc     __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
-       j       .Lio_loop
-.Lio_return:
-       LOCKDEP_SYS_EXIT
-       TSTMSK  __TI_flags(%r12),_TIF_WORK
-       jnz     .Lio_work               # there is work to do (signals etc.)
-       TSTMSK  __LC_CPU_FLAGS,_CIF_WORK
-       jnz     .Lio_work
-.Lio_restore:
-       TRACE_IRQS_ON
+1:     lgr     %r2,%r11                # pass pointer to pt_regs
+       brasl   %r14,\handler
        mvc     __LC_RETURN_PSW(16),__PT_PSW(%r11)
-       tm      __PT_PSW+1(%r11),0x01   # returning to user ?
-       jno     .Lio_exit_kernel
-       DEBUG_USER_ASCE
+       tmhh    %r8,0x0001              # returning to user ?
+       jno     2f
        lctlg   %c1,%c1,__LC_USER_ASCE
        BPEXIT  __TI_flags(%r12),_TIF_ISOLATE_BP
        stpt    __LC_EXIT_TIMER
-.Lio_exit_kernel:
-       lmg     %r0,%r15,__PT_R0(%r11)
+2:     lmg     %r0,%r15,__PT_R0(%r11)
        b       __LC_RETURN_LPSWE
-.Lio_done:
-
-#
-# There is work todo, find out in which context we have been interrupted:
-# 1) if we return to user space we can do all _TIF_WORK work
-# 2) if we return to kernel code and kvm is enabled check if we need to
-#    modify the psw to leave SIE
-# 3) if we return to kernel code and preemptive scheduling is enabled check
-#    the preemption counter and if it is zero call preempt_schedule_irq
-# Before any work can be done, a switch to the kernel stack is required.
-#
-.Lio_work:
-       tm      __PT_PSW+1(%r11),0x01   # returning to user ?
-       jo      .Lio_work_user          # yes -> do resched & signal
-#ifdef CONFIG_PREEMPTION
-       # check for preemptive scheduling
-       icm     %r0,15,__LC_PREEMPT_COUNT
-       jnz     .Lio_restore            # preemption is disabled
-       TSTMSK  __TI_flags(%r12),_TIF_NEED_RESCHED
-       jno     .Lio_restore
-       # switch to kernel stack
-       lg      %r1,__PT_R15(%r11)
-       aghi    %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-       mvc     STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-       xc      __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
-       la      %r11,STACK_FRAME_OVERHEAD(%r1)
-       lgr     %r15,%r1
-       brasl   %r14,preempt_schedule_irq
-       j       .Lio_return
-#else
-       j       .Lio_restore
-#endif
-
-#
-# Need to do work before returning to userspace, switch to kernel stack
-#
-.Lio_work_user:
-       lg      %r1,__LC_KERNEL_STACK
-       mvc     STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-       xc      __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
-       la      %r11,STACK_FRAME_OVERHEAD(%r1)
-       lgr     %r15,%r1
-
-#
-# One of the work bits is on. Find out which one.
-#
-       TSTMSK  __TI_flags(%r12),_TIF_NEED_RESCHED
-       jo      .Lio_reschedule
-#ifdef CONFIG_LIVEPATCH
-       TSTMSK  __TI_flags(%r12),_TIF_PATCH_PENDING
-       jo      .Lio_patch_pending
-#endif
-       TSTMSK  __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL)
-       jnz     .Lio_sigpending
-       TSTMSK  __TI_flags(%r12),_TIF_NOTIFY_RESUME
-       jo      .Lio_notify_resume
-       TSTMSK  __TI_flags(%r12),_TIF_GUARDED_STORAGE
-       jo      .Lio_guarded_storage
-       TSTMSK  __LC_CPU_FLAGS,_CIF_FPU
-       jo      .Lio_vxrs
-       j       .Lio_return
-
-#
-# CIF_FPU is set, restore floating-point controls and floating-point registers.
-#
-.Lio_vxrs:
-       larl    %r14,.Lio_return
-       jg      load_fpu_regs
-
-#
-# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
-#
-.Lio_guarded_storage:
-       ENABLE_INTS_TRACE
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       brasl   %r14,gs_load_bc_cb
-       DISABLE_INTS_TRACE
-       j       .Lio_return
+ENDPROC(\name)
+.endm
 
-#
-# _TIF_NEED_RESCHED is set, call schedule
-#
-.Lio_reschedule:
-       ENABLE_INTS_TRACE
-       brasl   %r14,schedule           # call scheduler
-       DISABLE_INTS_TRACE
-       j       .Lio_return
-
-#
-# _TIF_PATCH_PENDING is set, call klp_update_patch_state
-#
-#ifdef CONFIG_LIVEPATCH
-.Lio_patch_pending:
-       lg      %r2,__LC_CURRENT        # pass pointer to task struct
-       larl    %r14,.Lio_return
-       jg      klp_update_patch_state
-#endif
-
-#
-# _TIF_SIGPENDING or is set, call do_signal
-#
-.Lio_sigpending:
-       ENABLE_INTS_TRACE
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       brasl   %r14,do_signal
-       DISABLE_INTS_TRACE
-       j       .Lio_return
-
-#
-# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
-#
-.Lio_notify_resume:
-       ENABLE_INTS_TRACE
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       brasl   %r14,do_notify_resume
-       DISABLE_INTS_TRACE
-       j       .Lio_return
-ENDPROC(io_int_handler)
-
-/*
- * External interrupt handler routine
- */
-ENTRY(ext_int_handler)
-       STCK    __LC_INT_CLOCK
-       stpt    __LC_ASYNC_ENTER_TIMER
-       BPOFF
-       stmg    %r8,%r15,__LC_SAVE_AREA_ASYNC
-       lg      %r12,__LC_CURRENT
-       lmg     %r8,%r9,__LC_EXT_OLD_PSW
-       SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK
-       stmg    %r0,%r7,__PT_R0(%r11)
-       # clear user controlled registers to prevent speculative use
-       xgr     %r0,%r0
-       xgr     %r1,%r1
-       xgr     %r2,%r2
-       xgr     %r3,%r3
-       xgr     %r4,%r4
-       xgr     %r5,%r5
-       xgr     %r6,%r6
-       xgr     %r7,%r7
-       xgr     %r10,%r10
-       mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
-       stmg    %r8,%r9,__PT_PSW(%r11)
-       tm      __PT_PSW+1(%r11),0x01   # coming from user space?
-       jno     .Lext_skip_asce
-       lctlg   %c1,%c1,__LC_KERNEL_ASCE
-.Lext_skip_asce:
-       lghi    %r1,__LC_EXT_PARAMS2
-       mvc     __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
-       mvc     __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
-       mvc     __PT_INT_PARM_LONG(8,%r11),0(%r1)
-       xc      __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-       xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-       TRACE_IRQS_OFF
-       lgr     %r2,%r11                # pass pointer to pt_regs
-       lghi    %r3,EXT_INTERRUPT
-       brasl   %r14,do_IRQ
-       j       .Lio_return
-ENDPROC(ext_int_handler)
+INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq
+INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
 
 /*
  * Load idle PSW.
  */
 ENTRY(psw_idle)
        stg     %r3,__SF_EMPTY(%r15)
-       larl    %r1,.Lpsw_idle_exit
+       larl    %r1,psw_idle_exit
        stg     %r1,__SF_EMPTY+8(%r15)
        larl    %r1,smp_cpu_mtid
        llgf    %r1,0(%r1)
        ltgr    %r1,%r1
        jz      .Lpsw_idle_stcctm
-       .insn   rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15)
+       .insn   rsy,0xeb0000000017,%r1,5,__MT_CYCLES_ENTER(%r2)
 .Lpsw_idle_stcctm:
        oi      __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
        BPON
        STCK    __CLOCK_IDLE_ENTER(%r2)
        stpt    __TIMER_IDLE_ENTER(%r2)
        lpswe   __SF_EMPTY(%r15)
-.Lpsw_idle_exit:
+.globl psw_idle_exit
+psw_idle_exit:
        BR_EX   %r14
 ENDPROC(psw_idle)
 
-/*
- * Store floating-point controls and floating-point or vector register
- * depending whether the vector facility is available. A critical section
- * cleanup assures that the registers are stored even if interrupted for
- * some other work.  The CIF_FPU flag is set to trigger a lazy restore
- * of the register contents at return from io or a system call.
- */
-ENTRY(save_fpu_regs)
-       stnsm   __SF_EMPTY(%r15),0xfc
-       lg      %r2,__LC_CURRENT
-       aghi    %r2,__TASK_thread
-       TSTMSK  __LC_CPU_FLAGS,_CIF_FPU
-       jo      .Lsave_fpu_regs_exit
-       stfpc   __THREAD_FPU_fpc(%r2)
-       lg      %r3,__THREAD_FPU_regs(%r2)
-       TSTMSK  __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
-       jz      .Lsave_fpu_regs_fp        # no -> store FP regs
-       VSTM    %v0,%v15,0,%r3            # vstm 0,15,0(3)
-       VSTM    %v16,%v31,256,%r3         # vstm 16,31,256(3)
-       j       .Lsave_fpu_regs_done      # -> set CIF_FPU flag
-.Lsave_fpu_regs_fp:
-       std     0,0(%r3)
-       std     1,8(%r3)
-       std     2,16(%r3)
-       std     3,24(%r3)
-       std     4,32(%r3)
-       std     5,40(%r3)
-       std     6,48(%r3)
-       std     7,56(%r3)
-       std     8,64(%r3)
-       std     9,72(%r3)
-       std     10,80(%r3)
-       std     11,88(%r3)
-       std     12,96(%r3)
-       std     13,104(%r3)
-       std     14,112(%r3)
-       std     15,120(%r3)
-.Lsave_fpu_regs_done:
-       oi      __LC_CPU_FLAGS+7,_CIF_FPU
-.Lsave_fpu_regs_exit:
-       ssm     __SF_EMPTY(%r15)
-       BR_EX   %r14
-.Lsave_fpu_regs_end:
-ENDPROC(save_fpu_regs)
-EXPORT_SYMBOL(save_fpu_regs)
-
-/*
- * Load floating-point controls and floating-point or vector registers.
- * A critical section cleanup assures that the register contents are
- * loaded even if interrupted for some other work.
- *
- * There are special calling conventions to fit into sysc and io return work:
- *     %r15:   <kernel stack>
- * The function requires:
- *     %r4
- */
-load_fpu_regs:
-       stnsm   __SF_EMPTY(%r15),0xfc
-       lg      %r4,__LC_CURRENT
-       aghi    %r4,__TASK_thread
-       TSTMSK  __LC_CPU_FLAGS,_CIF_FPU
-       jno     .Lload_fpu_regs_exit
-       lfpc    __THREAD_FPU_fpc(%r4)
-       TSTMSK  __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
-       lg      %r4,__THREAD_FPU_regs(%r4)      # %r4 <- reg save area
-       jz      .Lload_fpu_regs_fp              # -> no VX, load FP regs
-       VLM     %v0,%v15,0,%r4
-       VLM     %v16,%v31,256,%r4
-       j       .Lload_fpu_regs_done
-.Lload_fpu_regs_fp:
-       ld      0,0(%r4)
-       ld      1,8(%r4)
-       ld      2,16(%r4)
-       ld      3,24(%r4)
-       ld      4,32(%r4)
-       ld      5,40(%r4)
-       ld      6,48(%r4)
-       ld      7,56(%r4)
-       ld      8,64(%r4)
-       ld      9,72(%r4)
-       ld      10,80(%r4)
-       ld      11,88(%r4)
-       ld      12,96(%r4)
-       ld      13,104(%r4)
-       ld      14,112(%r4)
-       ld      15,120(%r4)
-.Lload_fpu_regs_done:
-       ni      __LC_CPU_FLAGS+7,255-_CIF_FPU
-.Lload_fpu_regs_exit:
-       ssm     __SF_EMPTY(%r15)
-       BR_EX   %r14
-.Lload_fpu_regs_end:
-ENDPROC(load_fpu_regs)
-
 /*
  * Machine check handler routines
  */
@@ -1146,11 +528,8 @@ ENTRY(mcck_int_handler)
        mvc     __LC_MCCK_ENTER_TIMER(8),0(%r14)
        TSTMSK  __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
        jo      3f
-       la      %r14,__LC_SYNC_ENTER_TIMER
-       clc     0(8,%r14),__LC_ASYNC_ENTER_TIMER
-       jl      0f
-       la      %r14,__LC_ASYNC_ENTER_TIMER
-0:     clc     0(8,%r14),__LC_EXIT_TIMER
+       la      %r14,__LC_SYS_ENTER_TIMER
+       clc     0(8,%r14),__LC_EXIT_TIMER
        jl      1f
        la      %r14,__LC_EXIT_TIMER
 1:     clc     0(8,%r14),__LC_LAST_UPDATE_TIMER
@@ -1165,14 +544,13 @@ ENTRY(mcck_int_handler)
        TSTMSK  __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
        jno     .Lmcck_panic
 4:     ssm     __LC_PGM_NEW_PSW        # turn dat on, keep irqs off
-       SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER,__LC_MCCK_CLOCK
+       SWITCH_KERNEL __LC_GPREGS_SAVE_AREA+64
 .Lmcck_skip:
        lghi    %r14,__LC_GPREGS_SAVE_AREA+64
        stmg    %r0,%r7,__PT_R0(%r11)
        # clear user controlled registers to prevent speculative use
        xgr     %r0,%r0
        xgr     %r1,%r1
-       xgr     %r2,%r2
        xgr     %r3,%r3
        xgr     %r4,%r4
        xgr     %r5,%r5
@@ -1183,7 +561,6 @@ ENTRY(mcck_int_handler)
        stmg    %r8,%r9,__PT_PSW(%r11)
        la      %r14,4095
        mvc     __PT_CR1(8,%r11),__LC_CREGS_SAVE_AREA-4095+8(%r14)
-       lctlg   %c1,%c1,__LC_KERNEL_ASCE
        xc      __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
        xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
        lgr     %r2,%r11                # pass pointer to pt_regs
@@ -1195,9 +572,7 @@ ENTRY(mcck_int_handler)
        xc      __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
        la      %r11,STACK_FRAME_OVERHEAD(%r1)
        lgr     %r15,%r1
-       TRACE_IRQS_OFF
        brasl   %r14,s390_handle_mcck
-       TRACE_IRQS_ON
 .Lmcck_return:
        lctlg   %c1,%c1,__PT_CR1(%r11)
        lmg     %r0,%r10,__PT_R0(%r11)
index a16c33b..3d0c0ac 100644 (file)
@@ -17,8 +17,9 @@ void io_int_handler(void);
 void mcck_int_handler(void);
 void restart_int_handler(void);
 
-asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
-asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
+void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs);
+void __do_pgm_check(struct pt_regs *regs);
+void __do_syscall(struct pt_regs *regs, int per_trap);
 
 void do_protection_exception(struct pt_regs *regs);
 void do_dat_exception(struct pt_regs *regs);
@@ -48,9 +49,7 @@ void translation_exception(struct pt_regs *regs);
 void vector_exception(struct pt_regs *regs);
 void monitor_event_exception(struct pt_regs *regs);
 
-void do_per_trap(struct pt_regs *regs);
 void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
-void syscall_trace(struct pt_regs *regs, int entryexit);
 void kernel_stack_overflow(struct pt_regs * regs);
 void do_signal(struct pt_regs *regs);
 void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
@@ -58,7 +57,8 @@ void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 void do_notify_resume(struct pt_regs *regs);
 
 void __init init_IRQ(void);
-void do_IRQ(struct pt_regs *regs, int irq);
+void do_io_irq(struct pt_regs *regs);
+void do_ext_irq(struct pt_regs *regs);
 void do_restart(void);
 void __init startup_init(void);
 void die(struct pt_regs *regs, const char *str);
@@ -82,8 +82,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user
 
 DECLARE_PER_CPU(u64, mt_cycles[8]);
 
-void gs_load_bc_cb(struct pt_regs *regs);
-
 unsigned long stack_alloc(void);
 void stack_free(unsigned long stack);
 
index 0da378e..d864c9a 100644 (file)
@@ -175,3 +175,91 @@ void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
                : "1", "cc");
 }
 EXPORT_SYMBOL(__kernel_fpu_end);
+
+void __load_fpu_regs(void)
+{
+       struct fpu *state = &current->thread.fpu;
+       unsigned long *regs = current->thread.fpu.regs;
+
+       asm volatile("lfpc %0" : : "Q" (state->fpc));
+       if (likely(MACHINE_HAS_VX)) {
+               asm volatile("lgr       1,%0\n"
+                            "VLM       0,15,0,1\n"
+                            "VLM       16,31,256,1\n"
+                            :
+                            : "d" (regs)
+                            : "1", "cc", "memory");
+       } else {
+               asm volatile("ld 0,%0" : : "Q" (regs[0]));
+               asm volatile("ld 1,%0" : : "Q" (regs[1]));
+               asm volatile("ld 2,%0" : : "Q" (regs[2]));
+               asm volatile("ld 3,%0" : : "Q" (regs[3]));
+               asm volatile("ld 4,%0" : : "Q" (regs[4]));
+               asm volatile("ld 5,%0" : : "Q" (regs[5]));
+               asm volatile("ld 6,%0" : : "Q" (regs[6]));
+               asm volatile("ld 7,%0" : : "Q" (regs[7]));
+               asm volatile("ld 8,%0" : : "Q" (regs[8]));
+               asm volatile("ld 9,%0" : : "Q" (regs[9]));
+               asm volatile("ld 10,%0" : : "Q" (regs[10]));
+               asm volatile("ld 11,%0" : : "Q" (regs[11]));
+               asm volatile("ld 12,%0" : : "Q" (regs[12]));
+               asm volatile("ld 13,%0" : : "Q" (regs[13]));
+               asm volatile("ld 14,%0" : : "Q" (regs[14]));
+               asm volatile("ld 15,%0" : : "Q" (regs[15]));
+       }
+       clear_cpu_flag(CIF_FPU);
+}
+EXPORT_SYMBOL(__load_fpu_regs);
+
+void load_fpu_regs(void)
+{
+       raw_local_irq_disable();
+       __load_fpu_regs();
+       raw_local_irq_enable();
+}
+EXPORT_SYMBOL(load_fpu_regs);
+
+void save_fpu_regs(void)
+{
+       unsigned long flags, *regs;
+       struct fpu *state;
+
+       local_irq_save(flags);
+
+       if (test_cpu_flag(CIF_FPU))
+               goto out;
+
+       state = &current->thread.fpu;
+       regs = current->thread.fpu.regs;
+
+       asm volatile("stfpc %0" : "=Q" (state->fpc));
+       if (likely(MACHINE_HAS_VX)) {
+               asm volatile("lgr       1,%0\n"
+                            "VSTM      0,15,0,1\n"
+                            "VSTM      16,31,256,1\n"
+                            :
+                            : "d" (regs)
+                            : "1", "cc", "memory");
+       } else {
+               asm volatile("std 0,%0" : "=Q" (regs[0]));
+               asm volatile("std 1,%0" : "=Q" (regs[1]));
+               asm volatile("std 2,%0" : "=Q" (regs[2]));
+               asm volatile("std 3,%0" : "=Q" (regs[3]));
+               asm volatile("std 4,%0" : "=Q" (regs[4]));
+               asm volatile("std 5,%0" : "=Q" (regs[5]));
+               asm volatile("std 6,%0" : "=Q" (regs[6]));
+               asm volatile("std 7,%0" : "=Q" (regs[7]));
+               asm volatile("std 8,%0" : "=Q" (regs[8]));
+               asm volatile("std 9,%0" : "=Q" (regs[9]));
+               asm volatile("std 10,%0" : "=Q" (regs[10]));
+               asm volatile("std 11,%0" : "=Q" (regs[11]));
+               asm volatile("std 12,%0" : "=Q" (regs[12]));
+               asm volatile("std 13,%0" : "=Q" (regs[13]));
+               asm volatile("std 14,%0" : "=Q" (regs[14]));
+               asm volatile("std 15,%0" : "=Q" (regs[15]));
+       }
+       set_cpu_flag(CIF_FPU);
+out:
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(save_fpu_regs);
index a5d4d80..812073e 100644 (file)
 #include <linux/cpu.h>
 #include <linux/sched/cputime.h>
 #include <trace/events/power.h>
+#include <asm/cpu_mf.h>
 #include <asm/nmi.h>
 #include <asm/smp.h>
 #include "entry.h"
 
 static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 
+void account_idle_time_irq(void)
+{
+       struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+       u64 cycles_new[8];
+       int i;
+
+       clear_cpu_flag(CIF_ENABLED_WAIT);
+       if (smp_cpu_mtid) {
+               stcctm(MT_DIAG, smp_cpu_mtid, cycles_new);
+               for (i = 0; i < smp_cpu_mtid; i++)
+                       this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
+       }
+
+       idle->clock_idle_exit = S390_lowcore.int_clock;
+       idle->timer_idle_exit = S390_lowcore.sys_enter_timer;
+
+       S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock;
+       S390_lowcore.last_update_clock = idle->clock_idle_exit;
+
+       S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter;
+       S390_lowcore.last_update_timer = idle->timer_idle_exit;
+}
+
 void arch_cpu_idle(void)
 {
        struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
index f8a8b94..c6d40bc 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/irq.h>
+#include <linux/entry-common.h>
 #include <asm/irq_regs.h>
 #include <asm/cputime.h>
 #include <asm/lowcore.h>
@@ -95,19 +96,97 @@ static const struct irq_class irqclass_sub_desc[] = {
        {.irq = CPU_RST,    .name = "RST", .desc = "[CPU] CPU Restart"},
 };
 
-void do_IRQ(struct pt_regs *regs, int irq)
+static void do_IRQ(struct pt_regs *regs, int irq)
 {
-       struct pt_regs *old_regs;
-
-       old_regs = set_irq_regs(regs);
-       irq_enter();
        if (tod_after_eq(S390_lowcore.int_clock,
                         S390_lowcore.clock_comparator))
                /* Serve timer interrupts first. */
                clock_comparator_work();
        generic_handle_irq(irq);
+}
+
+static int on_async_stack(void)
+{
+       unsigned long frame = current_frame_address();
+
+       return !!!((S390_lowcore.async_stack - frame) >> (PAGE_SHIFT + THREAD_SIZE_ORDER));
+}
+
+static void do_irq_async(struct pt_regs *regs, int irq)
+{
+       if (on_async_stack())
+               do_IRQ(regs, irq);
+       else
+               CALL_ON_STACK(do_IRQ, S390_lowcore.async_stack, 2, regs, irq);
+}
+
+static int irq_pending(struct pt_regs *regs)
+{
+       int cc;
+
+       asm volatile("tpi 0\n"
+                    "ipm %0" : "=d" (cc) : : "cc");
+       return cc >> 28;
+}
+
+void noinstr do_io_irq(struct pt_regs *regs)
+{
+       irqentry_state_t state = irqentry_enter(regs);
+       struct pt_regs *old_regs = set_irq_regs(regs);
+       int from_idle;
+
+       irq_enter();
+
+       if (user_mode(regs))
+               update_timer_sys();
+
+       from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
+       if (from_idle)
+               account_idle_time_irq();
+
+       do {
+               memcpy(&regs->int_code, &S390_lowcore.subchannel_id, 12);
+               if (S390_lowcore.io_int_word & BIT(31))
+                       do_irq_async(regs, THIN_INTERRUPT);
+               else
+                       do_irq_async(regs, IO_INTERRUPT);
+       } while (MACHINE_IS_LPAR && irq_pending(regs));
+
+       irq_exit();
+       set_irq_regs(old_regs);
+       irqentry_exit(regs, state);
+
+       if (from_idle)
+               regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
+}
+
+void noinstr do_ext_irq(struct pt_regs *regs)
+{
+       irqentry_state_t state = irqentry_enter(regs);
+       struct pt_regs *old_regs = set_irq_regs(regs);
+       int from_idle;
+
+       irq_enter();
+
+       if (user_mode(regs))
+               update_timer_sys();
+
+       memcpy(&regs->int_code, &S390_lowcore.ext_cpu_addr, 4);
+       regs->int_parm = S390_lowcore.ext_params;
+       regs->int_parm_long = *(unsigned long *)S390_lowcore.ext_params2;
+
+       from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
+       if (from_idle)
+               account_idle_time_irq();
+
+       do_irq_async(regs, EXT_INTERRUPT);
+
        irq_exit();
        set_irq_regs(old_regs);
+       irqentry_exit(regs, state);
+
+       if (from_idle)
+               regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
 }
 
 static void show_msi_interrupt(struct seq_file *p, int irq)
index 86c8d53..11f8c29 100644 (file)
@@ -131,12 +131,11 @@ static notrace void s390_handle_damage(void)
 NOKPROBE_SYMBOL(s390_handle_damage);
 
 /*
- * Main machine check handler function. Will be called with interrupts enabled
- * or disabled and machine checks enabled or disabled.
+ * Main machine check handler function. Will be called with interrupts disabled
+ * and machine checks enabled.
  */
-void s390_handle_mcck(void)
+void __s390_handle_mcck(void)
 {
-       unsigned long flags;
        struct mcck_struct mcck;
 
        /*
@@ -144,12 +143,10 @@ void s390_handle_mcck(void)
         * machine checks. Afterwards delete the old state and enable machine
         * checks again.
         */
-       local_irq_save(flags);
        local_mcck_disable();
        mcck = *this_cpu_ptr(&cpu_mcck);
        memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
        local_mcck_enable();
-       local_irq_restore(flags);
 
        if (mcck.channel_report)
                crw_handle_channel_report();
@@ -181,8 +178,13 @@ void s390_handle_mcck(void)
                do_exit(SIGSEGV);
        }
 }
-EXPORT_SYMBOL_GPL(s390_handle_mcck);
 
+void noinstr s390_handle_mcck(void)
+{
+       trace_hardirqs_off();
+       __s390_handle_mcck();
+       trace_hardirqs_on();
+}
 /*
  * returns 0 if all required registers are available
  * returns 1 otherwise
@@ -344,6 +346,9 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
        int mcck_pending = 0;
 
        nmi_enter();
+
+       if (user_mode(regs))
+               update_timer_mcck();
        inc_irq_stat(NMI_NMI);
        mci.val = S390_lowcore.mcck_interruption_code;
        mcck = this_cpu_ptr(&cpu_mcck);
index bc3ca54..367bd00 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/random.h>
 #include <linux/export.h>
 #include <linux/init_task.h>
+#include <linux/entry-common.h>
 #include <asm/cpu_mf.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/unwind.h>
 #include "entry.h"
 
-asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
+void ret_from_fork(void) asm("ret_from_fork");
 
-extern void kernel_thread_starter(void);
+void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs)
+{
+       void (*func)(void *arg);
+
+       schedule_tail(prev);
+
+       if (!user_mode(regs)) {
+               /* Kernel thread */
+               func = (void *)regs->gprs[9];
+               func((void *)regs->gprs[10]);
+       }
+       clear_pt_regs_flag(regs, PIF_SYSCALL);
+       syscall_exit_to_user_mode(regs);
+}
 
 void flush_thread(void)
 {
@@ -108,10 +122,12 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
        p->thread.last_break = 1;
 
        frame->sf.back_chain = 0;
+       frame->sf.gprs[5] = (unsigned long)frame + sizeof(struct stack_frame);
+       frame->sf.gprs[6] = (unsigned long)p;
        /* new return point is ret_from_fork */
-       frame->sf.gprs[8] = (unsigned long) ret_from_fork;
+       frame->sf.gprs[8] = (unsigned long)ret_from_fork;
        /* fake return stack for resume(), don't go back to schedule */
-       frame->sf.gprs[9] = (unsigned long) frame;
+       frame->sf.gprs[9] = (unsigned long)frame;
 
        /* Store access registers to kernel stack of new process. */
        if (unlikely(p->flags & PF_KTHREAD)) {
@@ -120,10 +136,10 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
                frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
                                PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
                frame->childregs.psw.addr =
-                               (unsigned long) kernel_thread_starter;
+                               (unsigned long)__ret_from_fork;
                frame->childregs.gprs[9] = new_stackp; /* function */
                frame->childregs.gprs[10] = arg;
-               frame->childregs.gprs[11] = (unsigned long) do_exit;
+               frame->childregs.gprs[11] = (unsigned long)do_exit;
                frame->childregs.orig_gpr2 = -1;
 
                return 0;
@@ -153,7 +169,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
        return 0;
 }
 
-asmlinkage void execve_tail(void)
+void execve_tail(void)
 {
        current->thread.fpu.fpc = 0;
        asm volatile("sfpc %0" : : "d" (0));
index a76dd27..18b3416 100644 (file)
@@ -7,6 +7,7 @@
  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
  */
 
+#include "asm/ptrace.h"
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
@@ -37,9 +38,6 @@
 #include "compat_ptrace.h"
 #endif
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
 void update_cr_regs(struct task_struct *task)
 {
        struct pt_regs *regs = task_pt_regs(task);
@@ -140,7 +138,7 @@ void ptrace_disable(struct task_struct *task)
        memset(&task->thread.per_user, 0, sizeof(task->thread.per_user));
        memset(&task->thread.per_event, 0, sizeof(task->thread.per_event));
        clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
-       clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP);
+       clear_tsk_thread_flag(task, TIF_PER_TRAP);
        task->thread.per_flags = 0;
 }
 
@@ -322,25 +320,6 @@ static inline void __poke_user_per(struct task_struct *child,
                child->thread.per_user.end = data;
 }
 
-static void fixup_int_code(struct task_struct *child, addr_t data)
-{
-       struct pt_regs *regs = task_pt_regs(child);
-       int ilc = regs->int_code >> 16;
-       u16 insn;
-
-       if (ilc > 6)
-               return;
-
-       if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16),
-                       &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn))
-               return;
-
-       /* double check that tracee stopped on svc instruction */
-       if ((insn >> 8) != 0xa)
-               return;
-
-       regs->int_code = 0x20000 | (data & 0xffff);
-}
 /*
  * Write a word to the user area of a process at location addr. This
  * operation does have an additional problem compared to peek_user.
@@ -374,10 +353,12 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
                }
 
                if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
-                       addr == offsetof(struct user, regs.gprs[2]))
-                       fixup_int_code(child, data);
-               *(addr_t *)((addr_t) &regs->psw + addr) = data;
+                       addr == offsetof(struct user, regs.gprs[2])) {
+                       struct pt_regs *regs = task_pt_regs(child);
 
+                       regs->int_code = 0x20000 | (data & 0xffff);
+               }
+               *(addr_t *)((addr_t) &regs->psw + addr) = data;
        } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
                /*
                 * access registers are stored in the thread structure
@@ -742,10 +723,12 @@ static int __poke_user_compat(struct task_struct *child,
                        regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
                                (__u64)(tmp & PSW32_ADDR_AMODE);
                } else {
-
                        if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
-                               addr == offsetof(struct compat_user, regs.gprs[2]))
-                               fixup_int_code(child, data);
+                               addr == offsetof(struct compat_user, regs.gprs[2])) {
+                               struct pt_regs *regs = task_pt_regs(child);
+
+                               regs->int_code = 0x20000 | (data & 0xffff);
+                       }
                        /* gpr 0-15 */
                        *(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
                }
@@ -862,82 +845,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 }
 #endif
 
-asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
-{
-       unsigned long mask = -1UL;
-       long ret = -1;
-
-       if (is_compat_task())
-               mask = 0xffffffff;
-
-       /*
-        * The sysc_tracesys code in entry.S stored the system
-        * call number to gprs[2].
-        */
-       if (test_thread_flag(TIF_SYSCALL_TRACE) &&
-           tracehook_report_syscall_entry(regs)) {
-               /*
-                * Tracing decided this syscall should not happen. Skip
-                * the system call and the system call restart handling.
-                */
-               goto skip;
-       }
-
-#ifdef CONFIG_SECCOMP
-       /* Do the secure computing check after ptrace. */
-       if (unlikely(test_thread_flag(TIF_SECCOMP))) {
-               struct seccomp_data sd;
-
-               if (is_compat_task()) {
-                       sd.instruction_pointer = regs->psw.addr & 0x7fffffff;
-                       sd.arch = AUDIT_ARCH_S390;
-               } else {
-                       sd.instruction_pointer = regs->psw.addr;
-                       sd.arch = AUDIT_ARCH_S390X;
-               }
-
-               sd.nr = regs->int_code & 0xffff;
-               sd.args[0] = regs->orig_gpr2 & mask;
-               sd.args[1] = regs->gprs[3] & mask;
-               sd.args[2] = regs->gprs[4] & mask;
-               sd.args[3] = regs->gprs[5] & mask;
-               sd.args[4] = regs->gprs[6] & mask;
-               sd.args[5] = regs->gprs[7] & mask;
-
-               if (__secure_computing(&sd) == -1)
-                       goto skip;
-       }
-#endif /* CONFIG_SECCOMP */
-
-       if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-               trace_sys_enter(regs, regs->int_code & 0xffff);
-
-
-       audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask,
-                           regs->gprs[3] &mask, regs->gprs[4] &mask,
-                           regs->gprs[5] &mask);
-
-       if ((signed long)regs->gprs[2] >= NR_syscalls) {
-               regs->gprs[2] = -ENOSYS;
-               ret = -ENOSYS;
-       }
-       return regs->gprs[2];
-skip:
-       clear_pt_regs_flag(regs, PIF_SYSCALL);
-       return ret;
-}
-
-asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
-{
-       audit_syscall_exit(regs);
-
-       if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-               trace_sys_exit(regs, regs->gprs[2]);
-
-       if (test_thread_flag(TIF_SYSCALL_TRACE))
-               tracehook_report_syscall_exit(regs, 0);
-}
-
 /*
  * user_regset definitions.
  */
index 1fbed91..c7feda8 100644 (file)
@@ -411,8 +411,7 @@ static void __init setup_lowcore_dat_off(void)
        memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
               sizeof(lc->alt_stfle_fac_list));
        nmi_alloc_boot_cpu(lc);
-       lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
-       lc->async_enter_timer = S390_lowcore.async_enter_timer;
+       lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
        lc->exit_timer = S390_lowcore.exit_timer;
        lc->user_timer = S390_lowcore.user_timer;
        lc->system_timer = S390_lowcore.system_timer;
index b27b6c1..fce1b2a 100644 (file)
@@ -170,6 +170,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
        fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
 
        clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+       clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
        return 0;
 }
 
@@ -459,7 +460,8 @@ static void handle_signal(struct ksignal *ksig, sigset_t *oldset,
  * the kernel can handle, and then we build all the user-level signal handling
  * stack-frames in one go after that.
  */
-void do_signal(struct pt_regs *regs)
+
+void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal)
 {
        struct ksignal ksig;
        sigset_t *oldset = sigmask_to_save();
@@ -472,7 +474,7 @@ void do_signal(struct pt_regs *regs)
        current->thread.system_call =
                test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
 
-       if (test_thread_flag(TIF_SIGPENDING) && get_signal(&ksig)) {
+       if (has_signal && get_signal(&ksig)) {
                /* Whee!  Actually deliver the signal.  */
                if (current->thread.system_call) {
                        regs->int_code = current->thread.system_call;
@@ -498,6 +500,7 @@ void do_signal(struct pt_regs *regs)
                }
                /* No longer in a system call */
                clear_pt_regs_flag(regs, PIF_SYSCALL);
+               clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
                rseq_signal_deliver(&ksig, regs);
                if (is_compat_task())
                        handle_signal32(&ksig, oldset, regs);
@@ -508,6 +511,7 @@ void do_signal(struct pt_regs *regs)
 
        /* No handlers present - check for system call restart */
        clear_pt_regs_flag(regs, PIF_SYSCALL);
+       clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
        if (current->thread.system_call) {
                regs->int_code = current->thread.system_call;
                switch (regs->gprs[2]) {
@@ -520,9 +524,9 @@ void do_signal(struct pt_regs *regs)
                case -ERESTARTNOINTR:
                        /* Restart system call with magic TIF bit. */
                        regs->gprs[2] = regs->orig_gpr2;
-                       set_pt_regs_flag(regs, PIF_SYSCALL);
+                       set_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
                        if (test_thread_flag(TIF_SINGLE_STEP))
-                               clear_pt_regs_flag(regs, PIF_PER_TRAP);
+                               clear_thread_flag(TIF_PER_TRAP);
                        break;
                }
        }
index 27c7630..c5abbb9 100644 (file)
@@ -499,7 +499,7 @@ static void smp_handle_ext_call(void)
        if (test_bit(ec_call_function_single, &bits))
                generic_smp_call_function_single_interrupt();
        if (test_bit(ec_mcck_pending, &bits))
-               s390_handle_mcck();
+               __s390_handle_mcck();
 }
 
 static void do_ext_call_interrupt(struct ext_code ext_code,
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
deleted file mode 100644 (file)
index 202fa73..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  S390 version
- *    Copyright IBM Corp. 1999, 2000
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- *               Thomas Spatzier (tspat@de.ibm.com)
- *
- *  Derived from "arch/i386/kernel/sys_i386.c"
- *
- *  This file contains various random system calls that
- *  have a non-standard calling sequence on the Linux/s390
- *  platform.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/syscalls.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/utsname.h>
-#include <linux/personality.h>
-#include <linux/unistd.h>
-#include <linux/ipc.h>
-#include <linux/uaccess.h>
-#include "entry.h"
-
-/*
- * Perform the mmap() system call. Linux for S/390 isn't able to handle more
- * than 5 system call parameters, so this system call uses a memory block
- * for parameter passing.
- */
-
-struct s390_mmap_arg_struct {
-       unsigned long addr;
-       unsigned long len;
-       unsigned long prot;
-       unsigned long flags;
-       unsigned long fd;
-       unsigned long offset;
-};
-
-SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
-{
-       struct s390_mmap_arg_struct a;
-       int error = -EFAULT;
-
-       if (copy_from_user(&a, arg, sizeof(a)))
-               goto out;
-       error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-out:
-       return error;
-}
-
-#ifdef CONFIG_SYSVIPC
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls.
- */
-SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
-               unsigned long, third, void __user *, ptr)
-{
-       if (call >> 16)
-               return -EINVAL;
-       /* The s390 sys_ipc variant has only five parameters instead of six
-        * like the generic variant. The only difference is the handling of
-        * the SEMTIMEDOP subcall where on s390 the third parameter is used
-        * as a pointer to a struct timespec where the generic variant uses
-        * the fifth parameter.
-        * Therefore we can call the generic variant by simply passing the
-        * third parameter also as fifth parameter.
-        */
-       return ksys_ipc(call, first, second, third, ptr, third);
-}
-#endif /* CONFIG_SYSVIPC */
-
-SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
-{
-       unsigned int ret = current->personality;
-
-       if (personality(current->personality) == PER_LINUX32 &&
-           personality(personality) == PER_LINUX)
-               personality |= PER_LINUX32;
-
-       if (personality != 0xffffffff)
-               set_personality(personality);
-
-       if (personality(ret) == PER_LINUX32)
-               ret &= ~PER_LINUX32;
-
-       return ret;
-}
-
-SYSCALL_DEFINE0(ni_syscall)
-{
-       return -ENOSYS;
-}
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
new file mode 100644 (file)
index 0000000..25c0fb1
--- /dev/null
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Thomas Spatzier (tspat@de.ibm.com)
+ *
+ *  Derived from "arch/i386/kernel/sys_i386.c"
+ *
+ *  This file contains various random system calls that
+ *  have a non-standard calling sequence on the Linux/s390
+ *  platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <linux/personality.h>
+#include <linux/unistd.h>
+#include <linux/ipc.h>
+#include <linux/uaccess.h>
+#include <linux/string.h>
+#include <linux/thread_info.h>
+#include <linux/entry-common.h>
+
+#include <asm/ptrace.h>
+#include <asm/vtime.h>
+
+#include "entry.h"
+
+/*
+ * Perform the mmap() system call. Linux for S/390 isn't able to handle more
+ * than 5 system call parameters, so this system call uses a memory block
+ * for parameter passing.
+ */
+
+struct s390_mmap_arg_struct {
+       unsigned long addr;
+       unsigned long len;
+       unsigned long prot;
+       unsigned long flags;
+       unsigned long fd;
+       unsigned long offset;
+};
+
+SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
+{
+       struct s390_mmap_arg_struct a;
+       int error = -EFAULT;
+
+       if (copy_from_user(&a, arg, sizeof(a)))
+               goto out;
+       error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+out:
+       return error;
+}
+
+#ifdef CONFIG_SYSVIPC
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls.
+ */
+SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
+               unsigned long, third, void __user *, ptr)
+{
+       if (call >> 16)
+               return -EINVAL;
+       /* The s390 sys_ipc variant has only five parameters instead of six
+        * like the generic variant. The only difference is the handling of
+        * the SEMTIMEDOP subcall where on s390 the third parameter is used
+        * as a pointer to a struct timespec where the generic variant uses
+        * the fifth parameter.
+        * Therefore we can call the generic variant by simply passing the
+        * third parameter also as fifth parameter.
+        */
+       return ksys_ipc(call, first, second, third, ptr, third);
+}
+#endif /* CONFIG_SYSVIPC */
+
+SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
+{
+       unsigned int ret = current->personality;
+
+       if (personality(current->personality) == PER_LINUX32 &&
+           personality(personality) == PER_LINUX)
+               personality |= PER_LINUX32;
+
+       if (personality != 0xffffffff)
+               set_personality(personality);
+
+       if (personality(ret) == PER_LINUX32)
+               ret &= ~PER_LINUX32;
+
+       return ret;
+}
+
+SYSCALL_DEFINE0(ni_syscall)
+{
+       return -ENOSYS;
+}
+
+void do_syscall(struct pt_regs *regs)
+{
+       unsigned long nr;
+
+       nr = regs->int_code & 0xffff;
+       if (!nr) {
+               nr = regs->gprs[1] & 0xffff;
+               regs->int_code &= ~0xffffUL;
+               regs->int_code |= nr;
+       }
+
+       regs->gprs[2] = nr;
+
+       nr = syscall_enter_from_user_mode_work(regs, nr);
+
+       /*
+        * In the s390 ptrace ABI, both the syscall number and the return value
+        * use gpr2. However, userspace puts the syscall number either in the
+        * svc instruction itself, or uses gpr1. To make at least skipping syscalls
+        * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here
+        * and if set, the syscall will be skipped.
+        */
+       if (!test_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)) {
+               regs->gprs[2] = -ENOSYS;
+               if (likely(nr < NR_syscalls)) {
+                       regs->gprs[2] = current->thread.sys_call_table[nr](
+                                       regs->orig_gpr2, regs->gprs[3],
+                                       regs->gprs[4], regs->gprs[5],
+                                       regs->gprs[6], regs->gprs[7]);
+               }
+       } else {
+               clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET);
+       }
+       syscall_exit_to_user_mode_work(regs);
+}
+
+void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
+{
+       enter_from_user_mode(regs);
+
+       memcpy(&regs->gprs[8], S390_lowcore.save_area_sync, 8 * sizeof(unsigned long));
+       memcpy(&regs->int_code, &S390_lowcore.svc_ilc, sizeof(regs->int_code));
+       regs->psw = S390_lowcore.svc_old_psw;
+
+       update_timer_sys();
+
+       local_irq_enable();
+       regs->orig_gpr2 = regs->gprs[2];
+
+       if (per_trap)
+               set_thread_flag(TIF_PER_TRAP);
+
+       for (;;) {
+               regs->flags = 0;
+               set_pt_regs_flag(regs, PIF_SYSCALL);
+               do_syscall(regs);
+               if (!test_pt_regs_flag(regs, PIF_SYSCALL_RESTART))
+                       break;
+               local_irq_enable();
+       }
+       exit_to_user_mode();
+}
index 8d1e8a1..db7dd59 100644 (file)
@@ -13,6 +13,8 @@
  * 'Traps.c' handles hardware traps and faults after we have saved some
  * state in 'asm.s'.
  */
+#include "asm/irqflags.h"
+#include "asm/ptrace.h"
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/extable.h>
@@ -23,7 +25,9 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/cpu.h>
+#include <linux/entry-common.h>
 #include <asm/fpu/api.h>
+#include <asm/vtime.h>
 #include "entry.h"
 
 static inline void __user *get_trap_ip(struct pt_regs *regs)
@@ -288,3 +292,64 @@ void __init trap_init(void)
        local_mcck_enable();
        test_monitor_call();
 }
+
+void noinstr __do_pgm_check(struct pt_regs *regs)
+{
+       unsigned long last_break = S390_lowcore.breaking_event_addr;
+       unsigned int trapnr, syscall_redirect = 0;
+       irqentry_state_t state;
+
+       regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc;
+       regs->int_parm_long = S390_lowcore.trans_exc_code;
+
+       state = irqentry_enter(regs);
+
+       if (user_mode(regs)) {
+               update_timer_sys();
+               if (last_break < 4096)
+                       last_break = 1;
+               current->thread.last_break = last_break;
+               regs->args[0] = last_break;
+       }
+
+       if (S390_lowcore.pgm_code & 0x0200) {
+               /* transaction abort */
+               memcpy(&current->thread.trap_tdb, &S390_lowcore.pgm_tdb, 256);
+       }
+
+       if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) {
+               if (user_mode(regs)) {
+                       struct per_event *ev = &current->thread.per_event;
+
+                       set_thread_flag(TIF_PER_TRAP);
+                       ev->address = S390_lowcore.per_address;
+                       ev->cause = *(u16 *)&S390_lowcore.per_code;
+                       ev->paid = S390_lowcore.per_access_id;
+               } else {
+                       /* PER event in kernel is kprobes */
+                       __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
+                       do_per_trap(regs);
+                       goto out;
+               }
+       }
+
+       if (!irqs_disabled_flags(regs->psw.mask))
+               trace_hardirqs_on();
+       __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
+
+       trapnr = regs->int_code & PGM_INT_CODE_MASK;
+       if (trapnr)
+               pgm_check_table[trapnr](regs);
+       syscall_redirect = user_mode(regs) && test_pt_regs_flag(regs, PIF_SYSCALL);
+out:
+       local_irq_disable();
+       irqentry_exit(regs, state);
+
+       if (syscall_redirect) {
+               enter_from_user_mode(regs);
+               local_irq_enable();
+               regs->orig_gpr2 = regs->gprs[2];
+               do_syscall(regs);
+               exit_to_user_mode();
+       }
+}
index 5007fac..bbf8622 100644 (file)
@@ -32,7 +32,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
                return -EINVAL;
        if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)
                return -EINVAL;
-       clear_pt_regs_flag(regs, PIF_PER_TRAP);
+       clear_thread_flag(TIF_PER_TRAP);
        auprobe->saved_per = psw_bits(regs->psw).per;
        auprobe->saved_int_code = regs->int_code;
        regs->int_code = UPROBE_TRAP_NR;
@@ -103,7 +103,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
                /* fix per address */
                current->thread.per_event.address = utask->vaddr;
                /* trigger per event */
-               set_pt_regs_flag(regs, PIF_PER_TRAP);
+               set_thread_flag(TIF_PER_TRAP);
        }
        return 0;
 }
@@ -259,7 +259,7 @@ static void sim_stor_event(struct pt_regs *regs, void *addr, int len)
                return;
        current->thread.per_event.address = regs->psw.addr;
        current->thread.per_event.cause = PER_EVENT_STORE >> 16;
-       set_pt_regs_flag(regs, PIF_PER_TRAP);
+       set_thread_flag(TIF_PER_TRAP);
 }
 
 /*
index dbafd05..759bbc0 100644 (file)
@@ -45,6 +45,7 @@
 #include <asm/timex.h>
 #include <asm/ap.h>
 #include <asm/uv.h>
+#include <asm/fpu/api.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 
@@ -4147,6 +4148,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                               vcpu->run->s.regs.gprs,
                               sizeof(sie_page->pv_grregs));
                }
+               if (test_cpu_flag(CIF_FPU))
+                       load_fpu_regs();
                exit_reason = sie64a(vcpu->arch.sie_block,
                                     vcpu->run->s.regs.gprs);
                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
index c5d0a58..bd803e0 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/sclp.h>
 #include <asm/nmi.h>
 #include <asm/dis.h>
+#include <asm/fpu/api.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 
@@ -1028,6 +1029,8 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
         */
        vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
        barrier();
+       if (test_cpu_flag(CIF_FPU))
+               load_fpu_regs();
        if (!kvm_s390_vcpu_sie_inhibited(vcpu))
                rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
        barrier();
index e8f6424..2fece1f 100644 (file)
@@ -16,8 +16,8 @@
 #include <asm/mmu_context.h>
 #include <asm/facility.h>
 
-#ifdef CONFIG_DEBUG_USER_ASCE
-void debug_user_asce(void)
+#ifdef CONFIG_DEBUG_ENTRY
+void debug_user_asce(int exit)
 {
        unsigned long cr1, cr7;
 
@@ -25,12 +25,14 @@ void debug_user_asce(void)
        __ctl_store(cr7, 7, 7);
        if (cr1 == S390_lowcore.kernel_asce && cr7 == S390_lowcore.user_asce)
                return;
-       panic("incorrect ASCE on kernel exit\n"
+       panic("incorrect ASCE on kernel %s\n"
              "cr1:    %016lx cr7:  %016lx\n"
              "kernel: %016llx user: %016llx\n",
-             cr1, cr7, S390_lowcore.kernel_asce, S390_lowcore.user_asce);
+             exit ? "exit" : "entry", cr1, cr7,
+             S390_lowcore.kernel_asce, S390_lowcore.user_asce);
+
 }
-#endif /*CONFIG_DEBUG_USER_ASCE */
+#endif /*CONFIG_DEBUG_ENTRY */
 
 #ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
 static DEFINE_STATIC_KEY_FALSE(have_mvcos);
index b821010..e30c7c7 100644 (file)
@@ -385,7 +385,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
         * The instruction that caused the program check has
         * been nullified. Don't signal single step via SIGTRAP.
         */
-       clear_pt_regs_flag(regs, PIF_PER_TRAP);
+       clear_thread_flag(TIF_PER_TRAP);
 
        if (kprobe_page_fault(regs, 14))
                return 0;