Merge branch 'for-5.14/intel-ish' into for-linus
[linux-2.6-microblaze.git] / arch / powerpc / kernel / interrupt.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2
3 #include <linux/context_tracking.h>
4 #include <linux/err.h>
5 #include <linux/compat.h>
6
7 #include <asm/asm-prototypes.h>
8 #include <asm/kup.h>
9 #include <asm/cputime.h>
10 #include <asm/interrupt.h>
11 #include <asm/hw_irq.h>
12 #include <asm/interrupt.h>
13 #include <asm/kprobes.h>
14 #include <asm/paca.h>
15 #include <asm/ptrace.h>
16 #include <asm/reg.h>
17 #include <asm/signal.h>
18 #include <asm/switch_to.h>
19 #include <asm/syscall.h>
20 #include <asm/time.h>
21 #include <asm/unistd.h>
22
23 #if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32)
24 unsigned long global_dbcr0[NR_CPUS];
25 #endif
26
27 typedef long (*syscall_fn)(long, long, long, long, long, long);
28
29 /* Has to run notrace because it is entered not completely "reconciled" */
30 notrace long system_call_exception(long r3, long r4, long r5,
31                                    long r6, long r7, long r8,
32                                    unsigned long r0, struct pt_regs *regs)
33 {
34         syscall_fn f;
35
36         kuep_lock();
37
38         regs->orig_gpr3 = r3;
39
40         if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
41                 BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
42
43         trace_hardirqs_off(); /* finish reconciling */
44
45         CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
46         user_exit_irqoff();
47
48         if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
49                 BUG_ON(!(regs->msr & MSR_RI));
50         BUG_ON(!(regs->msr & MSR_PR));
51         BUG_ON(arch_irq_disabled_regs(regs));
52
53 #ifdef CONFIG_PPC_PKEY
54         if (mmu_has_feature(MMU_FTR_PKEY)) {
55                 unsigned long amr, iamr;
56                 bool flush_needed = false;
57                 /*
58                  * When entering from userspace we mostly have the AMR/IAMR
59                  * different from kernel default values. Hence don't compare.
60                  */
61                 amr = mfspr(SPRN_AMR);
62                 iamr = mfspr(SPRN_IAMR);
63                 regs->amr  = amr;
64                 regs->iamr = iamr;
65                 if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
66                         mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
67                         flush_needed = true;
68                 }
69                 if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
70                         mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
71                         flush_needed = true;
72                 }
73                 if (flush_needed)
74                         isync();
75         } else
76 #endif
77                 kuap_assert_locked();
78
79         booke_restore_dbcr0();
80
81         account_cpu_user_entry();
82
83         account_stolen_time();
84
85         /*
86          * This is not required for the syscall exit path, but makes the
87          * stack frame look nicer. If this was initialised in the first stack
88          * frame, or if the unwinder was taught the first stack frame always
89          * returns to user with IRQS_ENABLED, this store could be avoided!
90          */
91         irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
92
93         local_irq_enable();
94
95         if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
96                 if (unlikely(trap_is_unsupported_scv(regs))) {
97                         /* Unsupported scv vector */
98                         _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
99                         return regs->gpr[3];
100                 }
101                 /*
102                  * We use the return value of do_syscall_trace_enter() as the
103                  * syscall number. If the syscall was rejected for any reason
104                  * do_syscall_trace_enter() returns an invalid syscall number
105                  * and the test against NR_syscalls will fail and the return
106                  * value to be used is in regs->gpr[3].
107                  */
108                 r0 = do_syscall_trace_enter(regs);
109                 if (unlikely(r0 >= NR_syscalls))
110                         return regs->gpr[3];
111                 r3 = regs->gpr[3];
112                 r4 = regs->gpr[4];
113                 r5 = regs->gpr[5];
114                 r6 = regs->gpr[6];
115                 r7 = regs->gpr[7];
116                 r8 = regs->gpr[8];
117
118         } else if (unlikely(r0 >= NR_syscalls)) {
119                 if (unlikely(trap_is_unsupported_scv(regs))) {
120                         /* Unsupported scv vector */
121                         _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
122                         return regs->gpr[3];
123                 }
124                 return -ENOSYS;
125         }
126
127         /* May be faster to do array_index_nospec? */
128         barrier_nospec();
129
130         if (unlikely(is_compat_task())) {
131                 f = (void *)compat_sys_call_table[r0];
132
133                 r3 &= 0x00000000ffffffffULL;
134                 r4 &= 0x00000000ffffffffULL;
135                 r5 &= 0x00000000ffffffffULL;
136                 r6 &= 0x00000000ffffffffULL;
137                 r7 &= 0x00000000ffffffffULL;
138                 r8 &= 0x00000000ffffffffULL;
139
140         } else {
141                 f = (void *)sys_call_table[r0];
142         }
143
144         return f(r3, r4, r5, r6, r7, r8);
145 }
146
147 /*
148  * local irqs must be disabled. Returns false if the caller must re-enable
149  * them, check for new work, and try again.
150  *
151  * This should be called with local irqs disabled, but if they were previously
152  * enabled when the interrupt handler returns (indicating a process-context /
153  * synchronous interrupt) then irqs_enabled should be true.
154  */
155 static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri)
156 {
157         /* This must be done with RI=1 because tracing may touch vmaps */
158         trace_hardirqs_on();
159
160         /* This pattern matches prep_irq_for_idle */
161         if (clear_ri)
162                 __hard_EE_RI_disable();
163         else
164                 __hard_irq_disable();
165 #ifdef CONFIG_PPC64
166         if (unlikely(lazy_irq_pending_nocheck())) {
167                 /* Took an interrupt, may have more exit work to do. */
168                 if (clear_ri)
169                         __hard_RI_enable();
170                 trace_hardirqs_off();
171                 local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
172
173                 return false;
174         }
175         local_paca->irq_happened = 0;
176         irq_soft_mask_set(IRQS_ENABLED);
177 #endif
178         return true;
179 }
180
181 static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled)
182 {
183         if (__prep_irq_for_enabled_exit(clear_ri))
184                 return true;
185
186         /*
187          * Must replay pending soft-masked interrupts now. Don't just
188          * local_irq_enabe(); local_irq_disable(); because if we are
189          * returning from an asynchronous interrupt here, another one
190          * might hit after irqs are enabled, and it would exit via this
191          * same path allowing another to fire, and so on unbounded.
192          *
193          * If interrupts were enabled when this interrupt exited,
194          * indicating a process context (synchronous) interrupt,
195          * local_irq_enable/disable can be used, which will enable
196          * interrupts rather than keeping them masked (unclear how
197          * much benefit this is over just replaying for all cases,
198          * because we immediately disable again, so all we're really
199          * doing is allowing hard interrupts to execute directly for
200          * a very small time, rather than being masked and replayed).
201          */
202         if (irqs_enabled) {
203                 local_irq_enable();
204                 local_irq_disable();
205         } else {
206                 replay_soft_interrupts();
207         }
208
209         return false;
210 }
211
212 static notrace void booke_load_dbcr0(void)
213 {
214 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
215         unsigned long dbcr0 = current->thread.debug.dbcr0;
216
217         if (likely(!(dbcr0 & DBCR0_IDM)))
218                 return;
219
220         /*
221          * Check to see if the dbcr0 register is set up to debug.
222          * Use the internal debug mode bit to do this.
223          */
224         mtmsr(mfmsr() & ~MSR_DE);
225         if (IS_ENABLED(CONFIG_PPC32)) {
226                 isync();
227                 global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0);
228         }
229         mtspr(SPRN_DBCR0, dbcr0);
230         mtspr(SPRN_DBSR, -1);
231 #endif
232 }
233
234 /*
235  * This should be called after a syscall returns, with r3 the return value
236  * from the syscall. If this function returns non-zero, the system call
237  * exit assembly should additionally load all GPR registers and CTR and XER
238  * from the interrupt frame.
239  *
240  * The function graph tracer can not trace the return side of this function,
241  * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
242  */
243 notrace unsigned long syscall_exit_prepare(unsigned long r3,
244                                            struct pt_regs *regs,
245                                            long scv)
246 {
247         unsigned long ti_flags;
248         unsigned long ret = 0;
249         bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
250
251         CT_WARN_ON(ct_state() == CONTEXT_USER);
252
253         kuap_assert_locked();
254
255         regs->result = r3;
256
257         /* Check whether the syscall is issued inside a restartable sequence */
258         rseq_syscall(regs);
259
260         ti_flags = current_thread_info()->flags;
261
262         if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
263                 if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
264                         r3 = -r3;
265                         regs->ccr |= 0x10000000; /* Set SO bit in CR */
266                 }
267         }
268
269         if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
270                 if (ti_flags & _TIF_RESTOREALL)
271                         ret = _TIF_RESTOREALL;
272                 else
273                         regs->gpr[3] = r3;
274                 clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
275         } else {
276                 regs->gpr[3] = r3;
277         }
278
279         if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
280                 do_syscall_trace_leave(regs);
281                 ret |= _TIF_RESTOREALL;
282         }
283
284         local_irq_disable();
285
286 again:
287         ti_flags = READ_ONCE(current_thread_info()->flags);
288         while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
289                 local_irq_enable();
290                 if (ti_flags & _TIF_NEED_RESCHED) {
291                         schedule();
292                 } else {
293                         /*
294                          * SIGPENDING must restore signal handler function
295                          * argument GPRs, and some non-volatiles (e.g., r1).
296                          * Restore all for now. This could be made lighter.
297                          */
298                         if (ti_flags & _TIF_SIGPENDING)
299                                 ret |= _TIF_RESTOREALL;
300                         do_notify_resume(regs, ti_flags);
301                 }
302                 local_irq_disable();
303                 ti_flags = READ_ONCE(current_thread_info()->flags);
304         }
305
306         if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
307                 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
308                                 unlikely((ti_flags & _TIF_RESTORE_TM))) {
309                         restore_tm_state(regs);
310                 } else {
311                         unsigned long mathflags = MSR_FP;
312
313                         if (cpu_has_feature(CPU_FTR_VSX))
314                                 mathflags |= MSR_VEC | MSR_VSX;
315                         else if (cpu_has_feature(CPU_FTR_ALTIVEC))
316                                 mathflags |= MSR_VEC;
317
318                         /*
319                          * If userspace MSR has all available FP bits set,
320                          * then they are live and no need to restore. If not,
321                          * it means the regs were given up and restore_math
322                          * may decide to restore them (to avoid taking an FP
323                          * fault).
324                          */
325                         if ((regs->msr & mathflags) != mathflags)
326                                 restore_math(regs);
327                 }
328         }
329
330         user_enter_irqoff();
331
332         /* scv need not set RI=0 because SRRs are not used */
333         if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) {
334                 user_exit_irqoff();
335                 local_irq_enable();
336                 local_irq_disable();
337                 goto again;
338         }
339
340 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
341         local_paca->tm_scratch = regs->msr;
342 #endif
343
344         booke_load_dbcr0();
345
346         account_cpu_user_exit();
347
348         /* Restore user access locks last */
349         kuap_user_restore(regs);
350         kuep_unlock();
351
352         return ret;
353 }
354
355 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
356 {
357         unsigned long ti_flags;
358         unsigned long flags;
359         unsigned long ret = 0;
360
361         if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
362                 BUG_ON(!(regs->msr & MSR_RI));
363         BUG_ON(!(regs->msr & MSR_PR));
364         BUG_ON(arch_irq_disabled_regs(regs));
365         CT_WARN_ON(ct_state() == CONTEXT_USER);
366
367         /*
368          * We don't need to restore AMR on the way back to userspace for KUAP.
369          * AMR can only have been unlocked if we interrupted the kernel.
370          */
371         kuap_assert_locked();
372
373         local_irq_save(flags);
374
375 again:
376         ti_flags = READ_ONCE(current_thread_info()->flags);
377         while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
378                 local_irq_enable(); /* returning to user: may enable */
379                 if (ti_flags & _TIF_NEED_RESCHED) {
380                         schedule();
381                 } else {
382                         if (ti_flags & _TIF_SIGPENDING)
383                                 ret |= _TIF_RESTOREALL;
384                         do_notify_resume(regs, ti_flags);
385                 }
386                 local_irq_disable();
387                 ti_flags = READ_ONCE(current_thread_info()->flags);
388         }
389
390         if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
391                 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
392                                 unlikely((ti_flags & _TIF_RESTORE_TM))) {
393                         restore_tm_state(regs);
394                 } else {
395                         unsigned long mathflags = MSR_FP;
396
397                         if (cpu_has_feature(CPU_FTR_VSX))
398                                 mathflags |= MSR_VEC | MSR_VSX;
399                         else if (cpu_has_feature(CPU_FTR_ALTIVEC))
400                                 mathflags |= MSR_VEC;
401
402                         /* See above restore_math comment */
403                         if ((regs->msr & mathflags) != mathflags)
404                                 restore_math(regs);
405                 }
406         }
407
408         user_enter_irqoff();
409
410         if (unlikely(!__prep_irq_for_enabled_exit(true))) {
411                 user_exit_irqoff();
412                 local_irq_enable();
413                 local_irq_disable();
414                 goto again;
415         }
416
417         booke_load_dbcr0();
418
419 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
420         local_paca->tm_scratch = regs->msr;
421 #endif
422
423         account_cpu_user_exit();
424
425         /* Restore user access locks last */
426         kuap_user_restore(regs);
427         kuep_unlock();
428
429         return ret;
430 }
431
432 void preempt_schedule_irq(void);
433
434 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
435 {
436         unsigned long flags;
437         unsigned long ret = 0;
438         unsigned long kuap;
439
440         if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
441             unlikely(!(regs->msr & MSR_RI)))
442                 unrecoverable_exception(regs);
443         BUG_ON(regs->msr & MSR_PR);
444         /*
445          * CT_WARN_ON comes here via program_check_exception,
446          * so avoid recursion.
447          */
448         if (TRAP(regs) != INTERRUPT_PROGRAM)
449                 CT_WARN_ON(ct_state() == CONTEXT_USER);
450
451         kuap = kuap_get_and_assert_locked();
452
453         if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) {
454                 clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
455                 ret = 1;
456         }
457
458         local_irq_save(flags);
459
460         if (!arch_irq_disabled_regs(regs)) {
461                 /* Returning to a kernel context with local irqs enabled. */
462                 WARN_ON_ONCE(!(regs->msr & MSR_EE));
463 again:
464                 if (IS_ENABLED(CONFIG_PREEMPT)) {
465                         /* Return to preemptible kernel context */
466                         if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) {
467                                 if (preempt_count() == 0)
468                                         preempt_schedule_irq();
469                         }
470                 }
471
472                 if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags))))
473                         goto again;
474         } else {
475                 /* Returning to a kernel context with local irqs disabled. */
476                 __hard_EE_RI_disable();
477 #ifdef CONFIG_PPC64
478                 if (regs->msr & MSR_EE)
479                         local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
480 #endif
481         }
482
483
484 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
485         local_paca->tm_scratch = regs->msr;
486 #endif
487
488         /*
489          * 64s does not want to mfspr(SPRN_AMR) here, because this comes after
490          * mtmsr, which would cause Read-After-Write stalls. Hence, take the
491          * AMR value from the check above.
492          */
493         kuap_kernel_restore(regs, kuap);
494
495         return ret;
496 }