Merge tag 'amd-drm-next-5.10-2020-09-03' of git://people.freedesktop.org/~agd5f/linux...
[linux-2.6-microblaze.git] / arch / powerpc / kernel / syscall_64.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2
3 #include <linux/err.h>
4 #include <asm/asm-prototypes.h>
5 #include <asm/book3s/64/kup-radix.h>
6 #include <asm/cputime.h>
7 #include <asm/hw_irq.h>
8 #include <asm/kprobes.h>
9 #include <asm/paca.h>
10 #include <asm/ptrace.h>
11 #include <asm/reg.h>
12 #include <asm/signal.h>
13 #include <asm/switch_to.h>
14 #include <asm/syscall.h>
15 #include <asm/time.h>
16 #include <asm/unistd.h>
17
18 typedef long (*syscall_fn)(long, long, long, long, long, long);
19
20 /* Has to run notrace because it is entered not completely "reconciled" */
21 notrace long system_call_exception(long r3, long r4, long r5,
22                                    long r6, long r7, long r8,
23                                    unsigned long r0, struct pt_regs *regs)
24 {
25         syscall_fn f;
26
27         if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
28                 BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
29
30         trace_hardirqs_off(); /* finish reconciling */
31
32         if (IS_ENABLED(CONFIG_PPC_BOOK3S))
33                 BUG_ON(!(regs->msr & MSR_RI));
34         BUG_ON(!(regs->msr & MSR_PR));
35         BUG_ON(!FULL_REGS(regs));
36         BUG_ON(regs->softe != IRQS_ENABLED);
37
38         kuap_check_amr();
39
40         account_cpu_user_entry();
41
42 #ifdef CONFIG_PPC_SPLPAR
43         if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) &&
44             firmware_has_feature(FW_FEATURE_SPLPAR)) {
45                 struct lppaca *lp = local_paca->lppaca_ptr;
46
47                 if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx)))
48                         accumulate_stolen_time();
49         }
50 #endif
51
52         /*
53          * This is not required for the syscall exit path, but makes the
54          * stack frame look nicer. If this was initialised in the first stack
55          * frame, or if the unwinder was taught the first stack frame always
56          * returns to user with IRQS_ENABLED, this store could be avoided!
57          */
58         regs->softe = IRQS_ENABLED;
59
60         local_irq_enable();
61
62         if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
63                 if (unlikely(regs->trap == 0x7ff0)) {
64                         /* Unsupported scv vector */
65                         _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
66                         return regs->gpr[3];
67                 }
68                 /*
69                  * We use the return value of do_syscall_trace_enter() as the
70                  * syscall number. If the syscall was rejected for any reason
71                  * do_syscall_trace_enter() returns an invalid syscall number
72                  * and the test against NR_syscalls will fail and the return
73                  * value to be used is in regs->gpr[3].
74                  */
75                 r0 = do_syscall_trace_enter(regs);
76                 if (unlikely(r0 >= NR_syscalls))
77                         return regs->gpr[3];
78                 r3 = regs->gpr[3];
79                 r4 = regs->gpr[4];
80                 r5 = regs->gpr[5];
81                 r6 = regs->gpr[6];
82                 r7 = regs->gpr[7];
83                 r8 = regs->gpr[8];
84
85         } else if (unlikely(r0 >= NR_syscalls)) {
86                 if (unlikely(regs->trap == 0x7ff0)) {
87                         /* Unsupported scv vector */
88                         _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
89                         return regs->gpr[3];
90                 }
91                 return -ENOSYS;
92         }
93
94         /* May be faster to do array_index_nospec? */
95         barrier_nospec();
96
97         if (unlikely(is_32bit_task())) {
98                 f = (void *)compat_sys_call_table[r0];
99
100                 r3 &= 0x00000000ffffffffULL;
101                 r4 &= 0x00000000ffffffffULL;
102                 r5 &= 0x00000000ffffffffULL;
103                 r6 &= 0x00000000ffffffffULL;
104                 r7 &= 0x00000000ffffffffULL;
105                 r8 &= 0x00000000ffffffffULL;
106
107         } else {
108                 f = (void *)sys_call_table[r0];
109         }
110
111         return f(r3, r4, r5, r6, r7, r8);
112 }
113
114 /*
115  * local irqs must be disabled. Returns false if the caller must re-enable
116  * them, check for new work, and try again.
117  */
118 static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri)
119 {
120         /* This must be done with RI=1 because tracing may touch vmaps */
121         trace_hardirqs_on();
122
123         /* This pattern matches prep_irq_for_idle */
124         if (clear_ri)
125                 __hard_EE_RI_disable();
126         else
127                 __hard_irq_disable();
128         if (unlikely(lazy_irq_pending_nocheck())) {
129                 /* Took an interrupt, may have more exit work to do. */
130                 if (clear_ri)
131                         __hard_RI_enable();
132                 trace_hardirqs_off();
133                 local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
134
135                 return false;
136         }
137         local_paca->irq_happened = 0;
138         irq_soft_mask_set(IRQS_ENABLED);
139
140         return true;
141 }
142
143 /*
144  * This should be called after a syscall returns, with r3 the return value
145  * from the syscall. If this function returns non-zero, the system call
146  * exit assembly should additionally load all GPR registers and CTR and XER
147  * from the interrupt frame.
148  *
149  * The function graph tracer can not trace the return side of this function,
150  * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
151  */
152 notrace unsigned long syscall_exit_prepare(unsigned long r3,
153                                            struct pt_regs *regs,
154                                            long scv)
155 {
156         unsigned long *ti_flagsp = &current_thread_info()->flags;
157         unsigned long ti_flags;
158         unsigned long ret = 0;
159
160         kuap_check_amr();
161
162         regs->result = r3;
163
164         /* Check whether the syscall is issued inside a restartable sequence */
165         rseq_syscall(regs);
166
167         ti_flags = *ti_flagsp;
168
169         if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) {
170                 if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
171                         r3 = -r3;
172                         regs->ccr |= 0x10000000; /* Set SO bit in CR */
173                 }
174         }
175
176         if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
177                 if (ti_flags & _TIF_RESTOREALL)
178                         ret = _TIF_RESTOREALL;
179                 else
180                         regs->gpr[3] = r3;
181                 clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
182         } else {
183                 regs->gpr[3] = r3;
184         }
185
186         if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
187                 do_syscall_trace_leave(regs);
188                 ret |= _TIF_RESTOREALL;
189         }
190
191 again:
192         local_irq_disable();
193         ti_flags = READ_ONCE(*ti_flagsp);
194         while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
195                 local_irq_enable();
196                 if (ti_flags & _TIF_NEED_RESCHED) {
197                         schedule();
198                 } else {
199                         /*
200                          * SIGPENDING must restore signal handler function
201                          * argument GPRs, and some non-volatiles (e.g., r1).
202                          * Restore all for now. This could be made lighter.
203                          */
204                         if (ti_flags & _TIF_SIGPENDING)
205                                 ret |= _TIF_RESTOREALL;
206                         do_notify_resume(regs, ti_flags);
207                 }
208                 local_irq_disable();
209                 ti_flags = READ_ONCE(*ti_flagsp);
210         }
211
212         if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
213                 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
214                                 unlikely((ti_flags & _TIF_RESTORE_TM))) {
215                         restore_tm_state(regs);
216                 } else {
217                         unsigned long mathflags = MSR_FP;
218
219                         if (cpu_has_feature(CPU_FTR_VSX))
220                                 mathflags |= MSR_VEC | MSR_VSX;
221                         else if (cpu_has_feature(CPU_FTR_ALTIVEC))
222                                 mathflags |= MSR_VEC;
223
224                         /*
225                          * If userspace MSR has all available FP bits set,
226                          * then they are live and no need to restore. If not,
227                          * it means the regs were given up and restore_math
228                          * may decide to restore them (to avoid taking an FP
229                          * fault).
230                          */
231                         if ((regs->msr & mathflags) != mathflags)
232                                 restore_math(regs);
233                 }
234         }
235
236         /* scv need not set RI=0 because SRRs are not used */
237         if (unlikely(!prep_irq_for_enabled_exit(!scv))) {
238                 local_irq_enable();
239                 goto again;
240         }
241
242 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
243         local_paca->tm_scratch = regs->msr;
244 #endif
245
246         account_cpu_user_exit();
247
248         return ret;
249 }
250
251 #ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
252 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
253 {
254 #ifdef CONFIG_PPC_BOOK3E
255         struct thread_struct *ts = &current->thread;
256 #endif
257         unsigned long *ti_flagsp = &current_thread_info()->flags;
258         unsigned long ti_flags;
259         unsigned long flags;
260         unsigned long ret = 0;
261
262         if (IS_ENABLED(CONFIG_PPC_BOOK3S))
263                 BUG_ON(!(regs->msr & MSR_RI));
264         BUG_ON(!(regs->msr & MSR_PR));
265         BUG_ON(!FULL_REGS(regs));
266         BUG_ON(regs->softe != IRQS_ENABLED);
267
268         /*
269          * We don't need to restore AMR on the way back to userspace for KUAP.
270          * AMR can only have been unlocked if we interrupted the kernel.
271          */
272         kuap_check_amr();
273
274         local_irq_save(flags);
275
276 again:
277         ti_flags = READ_ONCE(*ti_flagsp);
278         while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
279                 local_irq_enable(); /* returning to user: may enable */
280                 if (ti_flags & _TIF_NEED_RESCHED) {
281                         schedule();
282                 } else {
283                         if (ti_flags & _TIF_SIGPENDING)
284                                 ret |= _TIF_RESTOREALL;
285                         do_notify_resume(regs, ti_flags);
286                 }
287                 local_irq_disable();
288                 ti_flags = READ_ONCE(*ti_flagsp);
289         }
290
291         if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
292                 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
293                                 unlikely((ti_flags & _TIF_RESTORE_TM))) {
294                         restore_tm_state(regs);
295                 } else {
296                         unsigned long mathflags = MSR_FP;
297
298                         if (cpu_has_feature(CPU_FTR_VSX))
299                                 mathflags |= MSR_VEC | MSR_VSX;
300                         else if (cpu_has_feature(CPU_FTR_ALTIVEC))
301                                 mathflags |= MSR_VEC;
302
303                         /* See above restore_math comment */
304                         if ((regs->msr & mathflags) != mathflags)
305                                 restore_math(regs);
306                 }
307         }
308
309         if (unlikely(!prep_irq_for_enabled_exit(true))) {
310                 local_irq_enable();
311                 local_irq_disable();
312                 goto again;
313         }
314
315 #ifdef CONFIG_PPC_BOOK3E
316         if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) {
317                 /*
318                  * Check to see if the dbcr0 register is set up to debug.
319                  * Use the internal debug mode bit to do this.
320                  */
321                 mtmsr(mfmsr() & ~MSR_DE);
322                 mtspr(SPRN_DBCR0, ts->debug.dbcr0);
323                 mtspr(SPRN_DBSR, -1);
324         }
325 #endif
326
327 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
328         local_paca->tm_scratch = regs->msr;
329 #endif
330
331         account_cpu_user_exit();
332
333         return ret;
334 }
335
336 void unrecoverable_exception(struct pt_regs *regs);
337 void preempt_schedule_irq(void);
338
339 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
340 {
341         unsigned long *ti_flagsp = &current_thread_info()->flags;
342         unsigned long flags;
343         unsigned long ret = 0;
344         unsigned long amr;
345
346         if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI)))
347                 unrecoverable_exception(regs);
348         BUG_ON(regs->msr & MSR_PR);
349         BUG_ON(!FULL_REGS(regs));
350
351         amr = kuap_get_and_check_amr();
352
353         if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) {
354                 clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp);
355                 ret = 1;
356         }
357
358         local_irq_save(flags);
359
360         if (regs->softe == IRQS_ENABLED) {
361                 /* Returning to a kernel context with local irqs enabled. */
362                 WARN_ON_ONCE(!(regs->msr & MSR_EE));
363 again:
364                 if (IS_ENABLED(CONFIG_PREEMPT)) {
365                         /* Return to preemptible kernel context */
366                         if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) {
367                                 if (preempt_count() == 0)
368                                         preempt_schedule_irq();
369                         }
370                 }
371
372                 if (unlikely(!prep_irq_for_enabled_exit(true))) {
373                         /*
374                          * Can't local_irq_restore to replay if we were in
375                          * interrupt context. Must replay directly.
376                          */
377                         if (irqs_disabled_flags(flags)) {
378                                 replay_soft_interrupts();
379                         } else {
380                                 local_irq_restore(flags);
381                                 local_irq_save(flags);
382                         }
383                         /* Took an interrupt, may have more exit work to do. */
384                         goto again;
385                 }
386         } else {
387                 /* Returning to a kernel context with local irqs disabled. */
388                 __hard_EE_RI_disable();
389                 if (regs->msr & MSR_EE)
390                         local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
391         }
392
393
394 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
395         local_paca->tm_scratch = regs->msr;
396 #endif
397
398         /*
399          * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr,
400          * which would cause Read-After-Write stalls. Hence, we take the AMR
401          * value from the check above.
402          */
403         kuap_restore_amr(regs, amr);
404
405         return ret;
406 }
407 #endif