1 /* SPDX-License-Identifier: GPL-2.0-or-later */
4 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
5 * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
6 * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
7 * Adapted for Power Macintosh by Paul Mackerras.
8 * Low-level exception handlers and MMU support
9 * rewritten by Paul Mackerras.
10 * Copyright (C) 1996 Paul Mackerras.
11 * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
13 * This file contains the system call entry code, context switch
14 * code, and exception/interrupt return code for PowerPC.
17 #include <linux/errno.h>
18 #include <linux/err.h>
19 #include <asm/cache.h>
20 #include <asm/unistd.h>
21 #include <asm/processor.h>
24 #include <asm/thread_info.h>
25 #include <asm/code-patching-asm.h>
26 #include <asm/ppc_asm.h>
27 #include <asm/asm-offsets.h>
28 #include <asm/cputable.h>
29 #include <asm/firmware.h>
31 #include <asm/ptrace.h>
32 #include <asm/irqflags.h>
33 #include <asm/hw_irq.h>
34 #include <asm/context_tracking.h>
36 #include <asm/ppc-opcode.h>
37 #include <asm/barrier.h>
38 #include <asm/export.h>
39 #include <asm/asm-compat.h>
40 #ifdef CONFIG_PPC_BOOK3S
41 #include <asm/exception-64s.h>
43 #include <asm/exception-64e.h>
45 #include <asm/feature-fixups.h>
53 .tc sys_call_table[TC],sys_call_table
56 COMPAT_SYS_CALL_TABLE:
57 .tc compat_sys_call_table[TC],compat_sys_call_table
60 /* This value is used to mark exception frames on the stack. */
62 .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
67 #ifdef CONFIG_PPC_BOOK3S
68 .macro system_call_vectored name trapnr
69 .globl system_call_vectored_\name
70 system_call_vectored_\name:
71 _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
72 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
74 extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
76 END_FTR_SECTION_IFSET(CPU_FTR_TM)
90 /* Can we avoid saving r3-r8 in common case? */
97 /* Zero r9-r12, this should only be required when restoring all GPRs */
112 addi r10,r1,STACK_FRAME_OVERHEAD
113 ld r11,exception_marker@toc(r2)
114 std r11,-16(r10) /* "regshere" marker */
118 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
121 * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
122 * would clobber syscall parameters. Also we always enter with IRQs
123 * enabled and nothing pending. system_call_exception() will call
124 * trace_hardirqs_off().
126 * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The
127 * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED.
130 /* Calling convention has r9 = orig r0, r10 = regs */
132 bl system_call_exception
134 .Lsyscall_vectored_\name\()_exit:
135 addi r4,r1,STACK_FRAME_OVERHEAD
137 bl syscall_exit_prepare
144 stdcx. r0,0,r1 /* to clear the reservation */
145 END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
149 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
152 bne .Lsyscall_vectored_\name\()_restore_regs
154 /* rfscv returns with LR->NIA and CTR->MSR */
158 /* Could zero these as per ABI, but we may consider a stricter ABI
159 * which preserves these if libc implementations can benefit, so
160 * restore them for now until further measurement is done. */
167 /* Zero volatile regs that may contain sensitive kernel data */
175 * We don't need to restore AMR on the way back to userspace for KUAP.
176 * The value of AMR only matters while we're in the kernel.
184 b . /* prevent speculative execution */
186 .Lsyscall_vectored_\name\()_restore_regs:
208 system_call_vectored common 0x3000
210 * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
211 * which is tested by system_call_exception when r0 is -1 (as set by vector
214 system_call_vectored sigill 0x7ff0
218 * Entered via kernel return set up by kernel/sstep.c, must match entry regs
220 .globl system_call_vectored_emulate
221 system_call_vectored_emulate:
222 _ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate)
223 li r10,IRQS_ALL_DISABLED
224 stb r10,PACAIRQSOFTMASK(r13)
225 b system_call_vectored_common
228 .balign IFETCH_ALIGN_BYTES
229 .globl system_call_common
231 _ASM_NOKPROBE_SYMBOL(system_call_common)
232 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
234 extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
236 END_FTR_SECTION_IFSET(CPU_FTR_TM)
246 #ifdef CONFIG_PPC_FSL_BOOK3E
247 START_BTB_FLUSH_SECTION
249 END_BTB_FLUSH_SECTION
254 /* Can we avoid saving r3-r8 in common case? */
261 /* Zero r9-r12, this should only be required when restoring all GPRs */
273 * This clears CR0.SO (bit 28), which is the error indication on
274 * return from this system call.
276 rldimi r12,r11,28,(63-28)
282 addi r10,r1,STACK_FRAME_OVERHEAD
283 ld r11,exception_marker@toc(r2)
284 std r11,-16(r10) /* "regshere" marker */
287 * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
288 * would clobber syscall parameters. Also we always enter with IRQs
289 * enabled and nothing pending. system_call_exception() will call
290 * trace_hardirqs_off().
292 li r11,IRQS_ALL_DISABLED
293 li r12,PACA_IRQ_HARD_DIS
294 stb r11,PACAIRQSOFTMASK(r13)
295 stb r12,PACAIRQHAPPENED(r13)
297 /* Calling convention has r9 = orig r0, r10 = regs */
299 bl system_call_exception
302 addi r4,r1,STACK_FRAME_OVERHEAD
304 bl syscall_exit_prepare
312 stdcx. r0,0,r1 /* to clear the reservation */
313 END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
320 bne .Lsyscall_restore_regs
321 /* Zero volatile regs that may contain sensitive kernel data */
334 .Lsyscall_restore_regs_cont:
338 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
341 * We don't need to restore AMR on the way back to userspace for KUAP.
342 * The value of AMR only matters while we're in the kernel.
350 b . /* prevent speculative execution */
352 .Lsyscall_restore_regs:
361 b .Lsyscall_restore_regs_cont
363 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
365 /* Firstly we need to enable TM in the kernel */
368 rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG
371 /* tabort, this dooms the transaction, nothing else */
372 li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
376 * Return directly to userspace. We have corrupted user register state,
377 * but userspace will never see that register state. Execution will
378 * resume after the tbegin of the aborted transaction with the
379 * checkpointed register state.
387 b . /* prevent speculative execution */
390 #ifdef CONFIG_PPC_BOOK3S
391 _GLOBAL(ret_from_fork_scv)
394 li r3,0 /* fork() return value */
395 b .Lsyscall_vectored_common_exit
398 _GLOBAL(ret_from_fork)
401 li r3,0 /* fork() return value */
404 _GLOBAL(ret_from_kernel_thread)
409 #ifdef PPC64_ELF_ABI_v2
416 #ifdef CONFIG_PPC_BOOK3E
417 /* Save non-volatile GPRs, if not already saved. */
426 _ASM_NOKPROBE_SYMBOL(save_nvgprs);
429 #ifdef CONFIG_PPC_BOOK3S_64
431 #define FLUSH_COUNT_CACHE \
433 patch_site 1b, patch__call_flush_branch_caches
442 .global flush_branch_caches
444 /* Save LR into r9 */
447 // Flush the link stack
458 // If we're just flushing the link stack, return here
460 patch_site 3b patch__flush_link_stack_return
468 patch_site 2b patch__flush_count_cache_return
480 #define FLUSH_COUNT_CACHE
481 #endif /* CONFIG_PPC_BOOK3S_64 */
484 * This routine switches between two different tasks. The process
485 * state of one is saved on its kernel stack. Then the state
486 * of the other is restored from its kernel stack. The memory
487 * management hardware is updated to the second process's state.
488 * Finally, we can return to the second process, via interrupt_return.
489 * On entry, r3 points to the THREAD for the current task, r4
490 * points to the THREAD for the new task.
492 * Note: there are two ways to get to the "going out" portion
493 * of this code; either by coming in via the entry (_switch)
494 * or via "fork" which must set up an environment equivalent
495 * to the "_switch" path. If you change this you'll have to change
496 * the fork code also.
498 * The code which creates the new task context is in 'copy_thread'
499 * in arch/powerpc/kernel/process.c
505 stdu r1,-SWITCH_FRAME_SIZE(r1)
506 /* r3-r13 are caller saved -- Cort */
508 std r0,_NIP(r1) /* Return to switch caller */
511 std r1,KSP(r3) /* Set old stack pointer */
513 kuap_check_amr r9, r10
518 * On SMP kernels, care must be taken because a task may be
519 * scheduled off CPUx and on to CPUy. Memory ordering must be
522 * Cacheable stores on CPUx will be visible when the task is
523 * scheduled on CPUy by virtue of the core scheduler barriers
524 * (see "Notes on Program-Order guarantees on SMP systems." in
525 * kernel/sched/core.c).
527 * Uncacheable stores in the case of involuntary preemption must
528 * be taken care of. The smp_mb__after_spinlock() in __schedule()
529 * is implemented as hwsync on powerpc, which orders MMIO too. So
530 * long as there is an hwsync in the context switch path, it will
531 * be executed on the source CPU after the task has performed
532 * all MMIO ops on that CPU, and on the destination CPU before the
533 * task performs any MMIO ops there.
537 * The kernel context switch path must contain a spin_lock,
538 * which contains larx/stcx, which will clear any reservation
539 * of the task being switched.
541 #ifdef CONFIG_PPC_BOOK3S
542 /* Cancel all explict user streams as they will have no use after context
543 * switch and will stop the HW from creating streams itself
545 DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
548 addi r6,r4,-THREAD /* Convert THREAD to 'current' */
549 std r6,PACACURRENT(r13) /* Set new 'current' */
550 #if defined(CONFIG_STACKPROTECTOR)
551 ld r6, TASK_CANARY(r6)
552 std r6, PACA_CANARY(r13)
555 ld r8,KSP(r4) /* new stack pointer */
556 #ifdef CONFIG_PPC_BOOK3S_64
557 BEGIN_MMU_FTR_SECTION
559 END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
561 clrrdi r6,r8,28 /* get its ESID */
562 clrrdi r9,r1,28 /* get current sp ESID */
564 clrrdi r6,r8,40 /* get its 1T ESID */
565 clrrdi r9,r1,40 /* get current sp 1T ESID */
566 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_1T_SEGMENT)
567 clrldi. r0,r6,2 /* is new ESID c00000000? */
568 cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */
570 beq 2f /* if yes, don't slbie it */
572 /* Bolt in the new stack SLB entry */
573 ld r7,KSP_VSID(r4) /* Get new stack's VSID */
574 oris r0,r6,(SLB_ESID_V)@h
575 ori r0,r0,(SLB_NUM_BOLTED-1)@l
577 li r9,MMU_SEGSIZE_1T /* insert B field */
578 oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
579 rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0
580 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
582 /* Update the last bolted SLB. No write barriers are needed
583 * here, provided we only update the current CPU's SLB shadow
586 ld r9,PACA_SLBSHADOWPTR(r13)
588 std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
589 li r12,SLBSHADOW_STACKVSID
590 STDX_BE r7,r12,r9 /* Save VSID */
591 li r12,SLBSHADOW_STACKESID
592 STDX_BE r0,r12,r9 /* Save ESID */
594 /* No need to check for MMU_FTR_NO_SLBIE_B here, since when
595 * we have 1TB segments, the only CPUs known to have the errata
596 * only support less than 1TB of system memory and we'll never
597 * actually hit this code path.
603 slbie r6 /* Workaround POWER5 < DD2.1 issue */
604 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
608 #endif /* CONFIG_PPC_BOOK3S_64 */
610 clrrdi r7, r8, THREAD_SHIFT /* base of new stack */
611 /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
612 because we don't need to leave the 288-byte ABI gap at the
613 top of the kernel stack. */
614 addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
617 * PMU interrupts in radix may come in here. They will use r1, not
618 * PACAKSAVE, so this stack switch will not cause a problem. They
619 * will store to the process stack, which may then be migrated to
620 * another CPU. However the rq lock release on this CPU paired with
621 * the rq lock acquire on the new CPU before the stack becomes
622 * active on the new CPU, will order those stores.
624 mr r1,r8 /* start using new stack pointer */
625 std r7,PACAKSAVE(r13)
630 /* r3-r13 are destroyed -- Cort */
633 /* convert old thread to its task_struct for return value */
635 ld r7,_NIP(r1) /* Return to _switch caller in new task */
637 addi r1,r1,SWITCH_FRAME_SIZE
640 #ifdef CONFIG_PPC_BOOK3S
642 * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
643 * touched, no exit work created, then this can be used.
645 .balign IFETCH_ALIGN_BYTES
646 .globl fast_interrupt_return
647 fast_interrupt_return:
648 _ASM_NOKPROBE_SYMBOL(fast_interrupt_return)
649 kuap_check_amr r3, r4
652 bne .Lfast_user_interrupt_return
653 kuap_restore_amr r3, r4
655 li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
656 bne+ .Lfast_kernel_interrupt_return
657 addi r3,r1,STACK_FRAME_OVERHEAD
658 bl unrecoverable_exception
659 b . /* should not get here */
661 .balign IFETCH_ALIGN_BYTES
662 .globl interrupt_return
664 _ASM_NOKPROBE_SYMBOL(interrupt_return)
667 beq .Lkernel_interrupt_return
668 addi r3,r1,STACK_FRAME_OVERHEAD
669 bl interrupt_exit_user_prepare
671 bne- .Lrestore_nvgprs
673 .Lfast_user_interrupt_return:
679 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
684 stdcx. r0,0,r1 /* to clear the reservation */
687 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
709 b . /* prevent speculative execution */
713 b .Lfast_user_interrupt_return
715 .balign IFETCH_ALIGN_BYTES
716 .Lkernel_interrupt_return:
717 addi r3,r1,STACK_FRAME_OVERHEAD
718 bl interrupt_exit_kernel_prepare
720 .Lfast_kernel_interrupt_return:
728 stdcx. r0,0,r1 /* to clear the reservation */
731 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
747 * Leaving a stale exception_marker on the stack can confuse
748 * the reliable stack unwinder later on. Clear it.
750 std r0,STACK_FRAME_OVERHEAD-16(r1)
754 bne- cr1,1f /* emulate stack store */
760 b . /* prevent speculative execution */
763 * Emulate stack store with update. New r1 value was already calculated
764 * and updated in our interrupt regs by emulate_loadstore, but we can't
765 * store the previous value of r1 to the stack before re-loading our
766 * registers from it, otherwise they could be clobbered. Use
767 * PACA_EXGEN as temporary storage to hold the store data, as
768 * interrupts are disabled here so it won't be clobbered.
771 std r9,PACA_EXGEN+0(r13)
772 addi r9,r1,INT_FRAME_SIZE /* get original r1 */
776 std r9,0(r1) /* perform store component of stdu */
777 ld r9,PACA_EXGEN+0(r13)
780 b . /* prevent speculative execution */
781 #endif /* CONFIG_PPC_BOOK3S */
783 #ifdef CONFIG_PPC_RTAS
785 * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
786 * called with the MMU off.
788 * In addition, we need to be in 32b mode, at least for now.
790 * Note: r3 is an input parameter to rtas, so don't trash it...
795 stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
797 /* Because RTAS is running in 32b mode, it clobbers the high order half
798 * of all registers that it saves. We therefore save those registers
799 * RTAS might touch to the stack. (r0, r3-r13 are caller saved)
801 SAVE_GPR(2, r1) /* Save the TOC */
802 SAVE_GPR(13, r1) /* Save paca */
803 SAVE_NVGPRS(r1) /* Save the non-volatiles */
816 /* Temporary workaround to clear CR until RTAS can be modified to
823 /* There is no way it is acceptable to get here with interrupts enabled,
824 * check it with the asm equivalent of WARN_ON
826 lbz r0,PACAIRQSOFTMASK(r13)
827 1: tdeqi r0,IRQS_ENABLED
828 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
831 /* Hard-disable interrupts */
837 /* Unfortunately, the stack pointer and the MSR are also clobbered,
838 * so they are saved in the PACA which allows us to restore
839 * our original state after RTAS returns.
842 std r6,PACASAVEDMSR(r13)
844 /* Setup our real return addr */
845 LOAD_REG_ADDR(r4,rtas_return_loc)
846 clrldi r4,r4,2 /* convert to realmode address */
850 ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI
854 rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
855 ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
859 sync /* disable interrupts so SRR0/1 */
860 mtmsrd r0 /* don't get trashed */
862 LOAD_REG_ADDR(r4, rtas)
863 ld r5,RTASENTRY(r4) /* get the rtas->entry value */
864 ld r4,RTASBASE(r4) /* get the rtas->base value */
869 b . /* prevent speculative execution */
875 * Clear RI and set SF before anything.
880 sldi r0,r0,(MSR_SF_LG - MSR_RI_LG)
885 /* relocation is off at this point */
887 clrldi r4,r4,2 /* convert to realmode address */
891 ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */
893 ld r1,PACAR1(r4) /* Restore our SP */
894 ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
899 b . /* prevent speculative execution */
900 _ASM_NOKPROBE_SYMBOL(__enter_rtas)
901 _ASM_NOKPROBE_SYMBOL(rtas_return_loc)
904 1: .8byte rtas_restore_regs
907 /* relocation is on at this point */
908 REST_GPR(2, r1) /* Restore the TOC */
909 REST_GPR(13, r1) /* Restore paca */
910 REST_NVGPRS(r1) /* Restore the non-volatiles */
925 addi r1,r1,SWITCH_FRAME_SIZE /* Unstack our frame */
926 ld r0,16(r1) /* get return address */
929 blr /* return to caller */
931 #endif /* CONFIG_PPC_RTAS */
936 stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
938 /* Because PROM is running in 32b mode, it clobbers the high order half
939 * of all registers that it saves. We therefore save those registers
940 * PROM might touch to the stack. (r0, r3-r13 are caller saved)
950 /* Put PROM address in SRR0 */
953 /* Setup our trampoline return addr in LR */
959 /* Prepare a 32-bit mode big endian MSR
961 #ifdef CONFIG_PPC_BOOK3E
962 rlwinm r11,r11,0,1,31
965 #else /* CONFIG_PPC_BOOK3E */
966 LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
970 #endif /* CONFIG_PPC_BOOK3E */
972 1: /* Return from OF */
975 /* Just make sure that r1 top 32 bits didn't get
980 /* Restore the MSR (back to 64 bits) */
985 /* Restore other registers */
992 addi r1,r1,SWITCH_FRAME_SIZE