1 /* SPDX-License-Identifier: GPL-2.0-or-later */
4 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
5 * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
6 * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
7 * Adapted for Power Macintosh by Paul Mackerras.
8 * Low-level exception handlers and MMU support
9 * rewritten by Paul Mackerras.
10 * Copyright (C) 1996 Paul Mackerras.
11 * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
13 * This file contains the system call entry code, context switch
14 * code, and exception/interrupt return code for PowerPC.
17 #include <linux/errno.h>
18 #include <linux/err.h>
19 #include <asm/cache.h>
20 #include <asm/unistd.h>
21 #include <asm/processor.h>
24 #include <asm/thread_info.h>
25 #include <asm/code-patching-asm.h>
26 #include <asm/ppc_asm.h>
27 #include <asm/asm-offsets.h>
28 #include <asm/cputable.h>
29 #include <asm/firmware.h>
31 #include <asm/ptrace.h>
32 #include <asm/irqflags.h>
33 #include <asm/hw_irq.h>
34 #include <asm/context_tracking.h>
36 #include <asm/ppc-opcode.h>
37 #include <asm/barrier.h>
38 #include <asm/export.h>
39 #include <asm/asm-compat.h>
40 #ifdef CONFIG_PPC_BOOK3S
41 #include <asm/exception-64s.h>
43 #include <asm/exception-64e.h>
45 #include <asm/feature-fixups.h>
53 .tc sys_call_table[TC],sys_call_table
56 COMPAT_SYS_CALL_TABLE:
57 .tc compat_sys_call_table[TC],compat_sys_call_table
60 /* This value is used to mark exception frames on the stack. */
62 .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
67 #ifdef CONFIG_PPC_BOOK3S
68 .macro system_call_vectored name trapnr
69 .globl system_call_vectored_\name
70 system_call_vectored_\name:
71 _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
72 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
74 extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
76 END_FTR_SECTION_IFSET(CPU_FTR_TM)
78 SCV_INTERRUPT_TO_KERNEL
90 /* Can we avoid saving r3-r8 in common case? */
97 /* Zero r9-r12, this should only be required when restoring all GPRs */
111 addi r10,r1,STACK_FRAME_OVERHEAD
112 ld r11,exception_marker@toc(r2)
113 std r11,-16(r10) /* "regshere" marker */
117 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
120 * scv enters with MSR[EE]=1 and is immediately considered soft-masked.
121 * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED,
122 * and interrupts may be masked and pending already.
123 * system_call_exception() will call trace_hardirqs_off() which means
124 * interrupts could already have been blocked before trace_hardirqs_off,
125 * but this is the best we can do.
128 /* Calling convention has r9 = orig r0, r10 = regs */
130 bl system_call_exception
132 .Lsyscall_vectored_\name\()_exit:
133 addi r4,r1,STACK_FRAME_OVERHEAD
135 bl syscall_exit_prepare
142 stdcx. r0,0,r1 /* to clear the reservation */
143 END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
147 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
150 bne .Lsyscall_vectored_\name\()_restore_regs
152 /* rfscv returns with LR->NIA and CTR->MSR */
156 /* Could zero these as per ABI, but we may consider a stricter ABI
157 * which preserves these if libc implementations can benefit, so
158 * restore them for now until further measurement is done. */
165 /* Zero volatile regs that may contain sensitive kernel data */
173 * We don't need to restore AMR on the way back to userspace for KUAP.
174 * The value of AMR only matters while we're in the kernel.
182 b . /* prevent speculative execution */
184 .Lsyscall_vectored_\name\()_restore_regs:
206 system_call_vectored common 0x3000
208 * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
209 * which is tested by system_call_exception when r0 is -1 (as set by vector
212 system_call_vectored sigill 0x7ff0
216 * Entered via kernel return set up by kernel/sstep.c, must match entry regs
218 .globl system_call_vectored_emulate
219 system_call_vectored_emulate:
220 _ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate)
221 li r10,IRQS_ALL_DISABLED
222 stb r10,PACAIRQSOFTMASK(r13)
223 b system_call_vectored_common
226 .balign IFETCH_ALIGN_BYTES
227 .globl system_call_common_real
228 system_call_common_real:
229 ld r10,PACAKMSR(r13) /* get MSR value for kernel */
232 .balign IFETCH_ALIGN_BYTES
233 .globl system_call_common
235 _ASM_NOKPROBE_SYMBOL(system_call_common)
236 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
238 extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
240 END_FTR_SECTION_IFSET(CPU_FTR_TM)
250 #ifdef CONFIG_PPC_FSL_BOOK3E
251 START_BTB_FLUSH_SECTION
253 END_BTB_FLUSH_SECTION
258 /* Can we avoid saving r3-r8 in common case? */
265 /* Zero r9-r12, this should only be required when restoring all GPRs */
277 * This clears CR0.SO (bit 28), which is the error indication on
278 * return from this system call.
280 rldimi r12,r11,28,(63-28)
285 addi r10,r1,STACK_FRAME_OVERHEAD
286 ld r11,exception_marker@toc(r2)
287 std r11,-16(r10) /* "regshere" marker */
290 * We always enter kernel from userspace with irq soft-mask enabled and
291 * nothing pending. system_call_exception() will call
292 * trace_hardirqs_off().
294 li r11,IRQS_ALL_DISABLED
295 li r12,PACA_IRQ_HARD_DIS
296 stb r11,PACAIRQSOFTMASK(r13)
297 stb r12,PACAIRQHAPPENED(r13)
299 /* Calling convention has r9 = orig r0, r10 = regs */
301 bl system_call_exception
304 addi r4,r1,STACK_FRAME_OVERHEAD
306 bl syscall_exit_prepare
314 stdcx. r0,0,r1 /* to clear the reservation */
315 END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
322 bne .Lsyscall_restore_regs
323 /* Zero volatile regs that may contain sensitive kernel data */
336 .Lsyscall_restore_regs_cont:
340 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
343 * We don't need to restore AMR on the way back to userspace for KUAP.
344 * The value of AMR only matters while we're in the kernel.
352 b . /* prevent speculative execution */
354 .Lsyscall_restore_regs:
363 b .Lsyscall_restore_regs_cont
365 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
367 /* Firstly we need to enable TM in the kernel */
370 rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG
373 /* tabort, this dooms the transaction, nothing else */
374 li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
378 * Return directly to userspace. We have corrupted user register state,
379 * but userspace will never see that register state. Execution will
380 * resume after the tbegin of the aborted transaction with the
381 * checkpointed register state.
389 b . /* prevent speculative execution */
392 #ifdef CONFIG_PPC_BOOK3S
393 _GLOBAL(ret_from_fork_scv)
396 li r3,0 /* fork() return value */
397 b .Lsyscall_vectored_common_exit
400 _GLOBAL(ret_from_fork)
403 li r3,0 /* fork() return value */
406 _GLOBAL(ret_from_kernel_thread)
411 #ifdef PPC64_ELF_ABI_v2
418 #ifdef CONFIG_PPC_BOOK3S_64
420 #define FLUSH_COUNT_CACHE \
422 patch_site 1b, patch__call_flush_branch_caches1; \
424 patch_site 1b, patch__call_flush_branch_caches2; \
426 patch_site 1b, patch__call_flush_branch_caches3
435 .global flush_branch_caches
437 /* Save LR into r9 */
440 // Flush the link stack
451 // If we're just flushing the link stack, return here
453 patch_site 3b patch__flush_link_stack_return
461 patch_site 2b patch__flush_count_cache_return
473 #define FLUSH_COUNT_CACHE
474 #endif /* CONFIG_PPC_BOOK3S_64 */
477 * This routine switches between two different tasks. The process
478 * state of one is saved on its kernel stack. Then the state
479 * of the other is restored from its kernel stack. The memory
480 * management hardware is updated to the second process's state.
481 * Finally, we can return to the second process, via interrupt_return.
482 * On entry, r3 points to the THREAD for the current task, r4
483 * points to the THREAD for the new task.
485 * Note: there are two ways to get to the "going out" portion
486 * of this code; either by coming in via the entry (_switch)
487 * or via "fork" which must set up an environment equivalent
488 * to the "_switch" path. If you change this you'll have to change
489 * the fork code also.
491 * The code which creates the new task context is in 'copy_thread'
492 * in arch/powerpc/kernel/process.c
498 stdu r1,-SWITCH_FRAME_SIZE(r1)
499 /* r3-r13 are caller saved -- Cort */
501 std r0,_NIP(r1) /* Return to switch caller */
504 std r1,KSP(r3) /* Set old stack pointer */
506 kuap_check_amr r9, r10
508 FLUSH_COUNT_CACHE /* Clobbers r9, ctr */
511 * On SMP kernels, care must be taken because a task may be
512 * scheduled off CPUx and on to CPUy. Memory ordering must be
515 * Cacheable stores on CPUx will be visible when the task is
516 * scheduled on CPUy by virtue of the core scheduler barriers
517 * (see "Notes on Program-Order guarantees on SMP systems." in
518 * kernel/sched/core.c).
520 * Uncacheable stores in the case of involuntary preemption must
521 * be taken care of. The smp_mb__after_spinlock() in __schedule()
522 * is implemented as hwsync on powerpc, which orders MMIO too. So
523 * long as there is an hwsync in the context switch path, it will
524 * be executed on the source CPU after the task has performed
525 * all MMIO ops on that CPU, and on the destination CPU before the
526 * task performs any MMIO ops there.
530 * The kernel context switch path must contain a spin_lock,
531 * which contains larx/stcx, which will clear any reservation
532 * of the task being switched.
534 #ifdef CONFIG_PPC_BOOK3S
535 /* Cancel all explict user streams as they will have no use after context
536 * switch and will stop the HW from creating streams itself
538 DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
541 addi r6,r4,-THREAD /* Convert THREAD to 'current' */
542 std r6,PACACURRENT(r13) /* Set new 'current' */
543 #if defined(CONFIG_STACKPROTECTOR)
544 ld r6, TASK_CANARY(r6)
545 std r6, PACA_CANARY(r13)
548 ld r8,KSP(r4) /* new stack pointer */
549 #ifdef CONFIG_PPC_BOOK3S_64
550 BEGIN_MMU_FTR_SECTION
552 END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
554 clrrdi r6,r8,28 /* get its ESID */
555 clrrdi r9,r1,28 /* get current sp ESID */
557 clrrdi r6,r8,40 /* get its 1T ESID */
558 clrrdi r9,r1,40 /* get current sp 1T ESID */
559 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_1T_SEGMENT)
560 clrldi. r0,r6,2 /* is new ESID c00000000? */
561 cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */
563 beq 2f /* if yes, don't slbie it */
565 /* Bolt in the new stack SLB entry */
566 ld r7,KSP_VSID(r4) /* Get new stack's VSID */
567 oris r0,r6,(SLB_ESID_V)@h
568 ori r0,r0,(SLB_NUM_BOLTED-1)@l
570 li r9,MMU_SEGSIZE_1T /* insert B field */
571 oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
572 rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0
573 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
575 /* Update the last bolted SLB. No write barriers are needed
576 * here, provided we only update the current CPU's SLB shadow
579 ld r9,PACA_SLBSHADOWPTR(r13)
581 std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
582 li r12,SLBSHADOW_STACKVSID
583 STDX_BE r7,r12,r9 /* Save VSID */
584 li r12,SLBSHADOW_STACKESID
585 STDX_BE r0,r12,r9 /* Save ESID */
587 /* No need to check for MMU_FTR_NO_SLBIE_B here, since when
588 * we have 1TB segments, the only CPUs known to have the errata
589 * only support less than 1TB of system memory and we'll never
590 * actually hit this code path.
596 slbie r6 /* Workaround POWER5 < DD2.1 issue */
597 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
601 #endif /* CONFIG_PPC_BOOK3S_64 */
603 clrrdi r7, r8, THREAD_SHIFT /* base of new stack */
604 /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
605 because we don't need to leave the 288-byte ABI gap at the
606 top of the kernel stack. */
607 addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
610 * PMU interrupts in radix may come in here. They will use r1, not
611 * PACAKSAVE, so this stack switch will not cause a problem. They
612 * will store to the process stack, which may then be migrated to
613 * another CPU. However the rq lock release on this CPU paired with
614 * the rq lock acquire on the new CPU before the stack becomes
615 * active on the new CPU, will order those stores.
617 mr r1,r8 /* start using new stack pointer */
618 std r7,PACAKSAVE(r13)
623 /* r3-r13 are destroyed -- Cort */
626 /* convert old thread to its task_struct for return value */
628 ld r7,_NIP(r1) /* Return to _switch caller in new task */
630 addi r1,r1,SWITCH_FRAME_SIZE
634 * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
635 * touched, no exit work created, then this can be used.
637 .balign IFETCH_ALIGN_BYTES
638 .globl fast_interrupt_return
639 fast_interrupt_return:
640 _ASM_NOKPROBE_SYMBOL(fast_interrupt_return)
641 kuap_check_amr r3, r4
644 #ifdef CONFIG_PPC_BOOK3S
645 bne .Lfast_user_interrupt_return_amr
646 kuap_kernel_restore r3, r4
648 li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
649 bne+ .Lfast_kernel_interrupt_return
650 addi r3,r1,STACK_FRAME_OVERHEAD
651 bl unrecoverable_exception
652 b . /* should not get here */
654 bne .Lfast_user_interrupt_return
655 b .Lfast_kernel_interrupt_return
658 .balign IFETCH_ALIGN_BYTES
659 .globl interrupt_return
661 _ASM_NOKPROBE_SYMBOL(interrupt_return)
664 beq .Lkernel_interrupt_return
665 addi r3,r1,STACK_FRAME_OVERHEAD
666 bl interrupt_exit_user_prepare
668 bne- .Lrestore_nvgprs
670 #ifdef CONFIG_PPC_BOOK3S
671 .Lfast_user_interrupt_return_amr:
672 kuap_user_restore r3, r4
674 .Lfast_user_interrupt_return:
680 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
685 stdcx. r0,0,r1 /* to clear the reservation */
688 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
710 b . /* prevent speculative execution */
714 b .Lfast_user_interrupt_return
716 .balign IFETCH_ALIGN_BYTES
717 .Lkernel_interrupt_return:
718 addi r3,r1,STACK_FRAME_OVERHEAD
719 bl interrupt_exit_kernel_prepare
721 .Lfast_kernel_interrupt_return:
729 stdcx. r0,0,r1 /* to clear the reservation */
732 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
748 * Leaving a stale exception_marker on the stack can confuse
749 * the reliable stack unwinder later on. Clear it.
751 std r0,STACK_FRAME_OVERHEAD-16(r1)
755 bne- cr1,1f /* emulate stack store */
761 b . /* prevent speculative execution */
764 * Emulate stack store with update. New r1 value was already calculated
765 * and updated in our interrupt regs by emulate_loadstore, but we can't
766 * store the previous value of r1 to the stack before re-loading our
767 * registers from it, otherwise they could be clobbered. Use
768 * PACA_EXGEN as temporary storage to hold the store data, as
769 * interrupts are disabled here so it won't be clobbered.
772 std r9,PACA_EXGEN+0(r13)
773 addi r9,r1,INT_FRAME_SIZE /* get original r1 */
777 std r9,0(r1) /* perform store component of stdu */
778 ld r9,PACA_EXGEN+0(r13)
781 b . /* prevent speculative execution */
783 #ifdef CONFIG_PPC_RTAS
785 * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
786 * called with the MMU off.
788 * In addition, we need to be in 32b mode, at least for now.
790 * Note: r3 is an input parameter to rtas, so don't trash it...
795 stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
797 /* Because RTAS is running in 32b mode, it clobbers the high order half
798 * of all registers that it saves. We therefore save those registers
799 * RTAS might touch to the stack. (r0, r3-r13 are caller saved)
801 SAVE_GPR(2, r1) /* Save the TOC */
802 SAVE_GPR(13, r1) /* Save paca */
803 SAVE_NVGPRS(r1) /* Save the non-volatiles */
816 /* Temporary workaround to clear CR until RTAS can be modified to
823 /* There is no way it is acceptable to get here with interrupts enabled,
824 * check it with the asm equivalent of WARN_ON
826 lbz r0,PACAIRQSOFTMASK(r13)
827 1: tdeqi r0,IRQS_ENABLED
828 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
831 /* Hard-disable interrupts */
837 /* Unfortunately, the stack pointer and the MSR are also clobbered,
838 * so they are saved in the PACA which allows us to restore
839 * our original state after RTAS returns.
842 std r6,PACASAVEDMSR(r13)
844 /* Setup our real return addr */
845 LOAD_REG_ADDR(r4,rtas_return_loc)
846 clrldi r4,r4,2 /* convert to realmode address */
850 ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI
854 rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
855 ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
859 sync /* disable interrupts so SRR0/1 */
860 mtmsrd r0 /* don't get trashed */
862 LOAD_REG_ADDR(r4, rtas)
863 ld r5,RTASENTRY(r4) /* get the rtas->entry value */
864 ld r4,RTASBASE(r4) /* get the rtas->base value */
869 b . /* prevent speculative execution */
875 * Clear RI and set SF before anything.
880 sldi r0,r0,(MSR_SF_LG - MSR_RI_LG)
885 /* relocation is off at this point */
887 clrldi r4,r4,2 /* convert to realmode address */
891 ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */
893 ld r1,PACAR1(r4) /* Restore our SP */
894 ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
899 b . /* prevent speculative execution */
900 _ASM_NOKPROBE_SYMBOL(__enter_rtas)
901 _ASM_NOKPROBE_SYMBOL(rtas_return_loc)
904 1: .8byte rtas_restore_regs
907 /* relocation is on at this point */
908 REST_GPR(2, r1) /* Restore the TOC */
909 REST_GPR(13, r1) /* Restore paca */
910 REST_NVGPRS(r1) /* Restore the non-volatiles */
925 addi r1,r1,SWITCH_FRAME_SIZE /* Unstack our frame */
926 ld r0,16(r1) /* get return address */
929 blr /* return to caller */
931 #endif /* CONFIG_PPC_RTAS */
936 stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
938 /* Because PROM is running in 32b mode, it clobbers the high order half
939 * of all registers that it saves. We therefore save those registers
940 * PROM might touch to the stack. (r0, r3-r13 are caller saved)
950 /* Put PROM address in SRR0 */
953 /* Setup our trampoline return addr in LR */
959 /* Prepare a 32-bit mode big endian MSR
961 #ifdef CONFIG_PPC_BOOK3E
962 rlwinm r11,r11,0,1,31
965 #else /* CONFIG_PPC_BOOK3E */
966 LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE)
970 #endif /* CONFIG_PPC_BOOK3E */
972 1: /* Return from OF */
975 /* Just make sure that r1 top 32 bits didn't get
980 /* Restore the MSR (back to 64 bits) */
985 /* Restore other registers */
992 addi r1,r1,SWITCH_FRAME_SIZE