1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 1994 Linus Torvalds
5 * Pentium III FXSR, SSE support
6 * General FPU state handling cleanups
7 * Gareth Hughes <gareth@valinux.com>, May 2000
9 #include <asm/fpu/api.h>
10 #include <asm/fpu/regset.h>
11 #include <asm/fpu/sched.h>
12 #include <asm/fpu/signal.h>
13 #include <asm/fpu/types.h>
14 #include <asm/traps.h>
15 #include <asm/irq_regs.h>
17 #include <linux/hardirq.h>
18 #include <linux/pkeys.h>
25 #define CREATE_TRACE_POINTS
26 #include <asm/trace/fpu.h>
28 /* The FPU state configuration data for kernel and user space */
29 struct fpu_state_config fpu_kernel_cfg __ro_after_init;
30 struct fpu_state_config fpu_user_cfg __ro_after_init;
33 * Represents the initial FPU state. It's mostly (but not completely) zeroes,
34 * depending on the FPU hardware format:
36 struct fpstate init_fpstate __ro_after_init;
39 * Track whether the kernel is using the FPU state
44 * - by IRQ context code to potentially use the FPU
47 * - to debug kernel_fpu_begin()/end() correctness
49 static DEFINE_PER_CPU(bool, in_kernel_fpu);
52 * Track which context is using the FPU on the CPU:
54 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
56 static bool kernel_fpu_disabled(void)
58 return this_cpu_read(in_kernel_fpu);
61 static bool interrupted_kernel_fpu_idle(void)
63 return !kernel_fpu_disabled();
67 * Were we in user mode (or vm86 mode) when we were
70 * Doing kernel_fpu_begin/end() is ok if we are running
71 * in an interrupt context from user mode - we'll just
72 * save the FPU state as required.
74 static bool interrupted_user_mode(void)
76 struct pt_regs *regs = get_irq_regs();
77 return regs && user_mode(regs);
81 * Can we use the FPU in kernel mode with the
82 * whole "kernel_fpu_begin/end()" sequence?
84 * It's always ok in process context (ie "not interrupt")
85 * but it is sometimes ok even from an irq.
87 bool irq_fpu_usable(void)
89 return !in_interrupt() ||
90 interrupted_user_mode() ||
91 interrupted_kernel_fpu_idle();
93 EXPORT_SYMBOL(irq_fpu_usable);
96 * Save the FPU register state in fpu->fpstate->regs. The register state is
99 * Must be called with fpregs_lock() held.
101 * The legacy FNSAVE instruction clears all FPU state unconditionally, so
102 * register state has to be reloaded. That might be a pointless exercise
103 * when the FPU is going to be used by another task right after that. But
104 * this only affects 20+ years old 32bit systems and avoids conditionals all
107 * FXSAVE and all XSAVE variants preserve the FPU register state.
109 void save_fpregs_to_fpstate(struct fpu *fpu)
111 if (likely(use_xsave())) {
112 os_xsave(fpu->fpstate);
115 * AVX512 state is tracked here because its use is
116 * known to slow the max clock speed of the core.
118 if (fpu->fpstate->regs.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
119 fpu->avx512_timestamp = jiffies;
123 if (likely(use_fxsr())) {
124 fxsave(&fpu->fpstate->regs.fxsave);
129 * Legacy FPU register saving, FNSAVE always clears FPU registers,
130 * so we have to reload them from the memory state.
132 asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->fpstate->regs.fsave));
133 frstor(&fpu->fpstate->regs.fsave);
136 void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask)
139 * AMD K7/K8 and later CPUs up to Zen don't save/restore
140 * FDP/FIP/FOP unless an exception is pending. Clear the x87 state
141 * here by setting it to fixed values. "m" is a random variable
142 * that should be in L1.
144 if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
148 "fildl %P[addr]" /* set F?P to defined value */
149 : : [addr] "m" (fpstate));
153 os_xrstor(&fpstate->regs.xsave, mask);
156 fxrstor(&fpstate->regs.fxsave);
158 frstor(&fpstate->regs.fsave);
162 void fpu_reset_from_exception_fixup(void)
164 restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate());
167 #if IS_ENABLED(CONFIG_KVM)
168 void fpu_swap_kvm_fpu(struct fpu *save, struct fpu *rstor, u64 restore_mask)
173 struct fpstate *fpcur = current->thread.fpu.fpstate;
175 if (test_thread_flag(TIF_NEED_FPU_LOAD))
176 memcpy(&save->fpstate->regs, &fpcur->regs, fpcur->size);
178 save_fpregs_to_fpstate(save);
182 restore_mask &= xfeatures_mask_fpstate();
183 restore_fpregs_from_fpstate(rstor->fpstate, restore_mask);
186 fpregs_mark_activate();
189 EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpu);
191 void fpu_copy_fpstate_to_kvm_uabi(struct fpu *fpu, void *buf,
192 unsigned int size, u32 pkru)
194 struct fpstate *kstate = fpu->fpstate;
195 union fpregs_state *ustate = buf;
196 struct membuf mb = { .p = buf, .left = size };
198 if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
199 __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
201 memcpy(&ustate->fxsave, &kstate->regs.fxsave,
202 sizeof(ustate->fxsave));
203 /* Make it restorable on a XSAVE enabled host */
204 ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE;
207 EXPORT_SYMBOL_GPL(fpu_copy_fpstate_to_kvm_uabi);
209 int fpu_copy_kvm_uabi_to_fpstate(struct fpu *fpu, const void *buf, u64 xcr0,
212 struct fpstate *kstate = fpu->fpstate;
213 const union fpregs_state *ustate = buf;
214 struct pkru_state *xpkru;
217 if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
218 if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE)
220 if (ustate->fxsave.mxcsr & ~mxcsr_feature_mask)
222 memcpy(&kstate->regs.fxsave, &ustate->fxsave, sizeof(ustate->fxsave));
226 if (ustate->xsave.header.xfeatures & ~xcr0)
229 ret = copy_uabi_from_kernel_to_xstate(kstate, ustate);
233 /* Retrieve PKRU if not in init state */
234 if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) {
235 xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU);
236 *vpkru = xpkru->pkru;
239 /* Ensure that XCOMP_BV is set up for XSAVES */
240 xstate_init_xcomp_bv(&kstate->regs.xsave, xfeatures_mask_uabi());
243 EXPORT_SYMBOL_GPL(fpu_copy_kvm_uabi_to_fpstate);
244 #endif /* CONFIG_KVM */
246 void kernel_fpu_begin_mask(unsigned int kfpu_mask)
250 WARN_ON_FPU(!irq_fpu_usable());
251 WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
253 this_cpu_write(in_kernel_fpu, true);
255 if (!(current->flags & PF_KTHREAD) &&
256 !test_thread_flag(TIF_NEED_FPU_LOAD)) {
257 set_thread_flag(TIF_NEED_FPU_LOAD);
258 save_fpregs_to_fpstate(¤t->thread.fpu);
260 __cpu_invalidate_fpregs_state();
262 /* Put sane initial values into the control registers. */
263 if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
264 ldmxcsr(MXCSR_DEFAULT);
266 if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
267 asm volatile ("fninit");
269 EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
271 void kernel_fpu_end(void)
273 WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
275 this_cpu_write(in_kernel_fpu, false);
278 EXPORT_SYMBOL_GPL(kernel_fpu_end);
281 * Sync the FPU register state to current's memory register state when the
282 * current task owns the FPU. The hardware register state is preserved.
284 void fpu_sync_fpstate(struct fpu *fpu)
286 WARN_ON_FPU(fpu != ¤t->thread.fpu);
289 trace_x86_fpu_before_save(fpu);
291 if (!test_thread_flag(TIF_NEED_FPU_LOAD))
292 save_fpregs_to_fpstate(fpu);
294 trace_x86_fpu_after_save(fpu);
298 static inline unsigned int init_fpstate_copy_size(void)
301 return fpu_kernel_cfg.default_size;
303 /* XSAVE(S) just needs the legacy and the xstate header part */
304 return sizeof(init_fpstate.regs.xsave);
307 static inline void fpstate_init_fxstate(struct fpstate *fpstate)
309 fpstate->regs.fxsave.cwd = 0x37f;
310 fpstate->regs.fxsave.mxcsr = MXCSR_DEFAULT;
314 * Legacy x87 fpstate state init:
316 static inline void fpstate_init_fstate(struct fpstate *fpstate)
318 fpstate->regs.fsave.cwd = 0xffff037fu;
319 fpstate->regs.fsave.swd = 0xffff0000u;
320 fpstate->regs.fsave.twd = 0xffffffffu;
321 fpstate->regs.fsave.fos = 0xffff0000u;
325 * Used in two places:
326 * 1) Early boot to setup init_fpstate for non XSAVE systems
327 * 2) fpu_init_fpstate_user() which is invoked from KVM
329 void fpstate_init_user(struct fpstate *fpstate)
331 if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
332 fpstate_init_soft(&fpstate->regs.soft);
336 xstate_init_xcomp_bv(&fpstate->regs.xsave, xfeatures_mask_uabi());
338 if (cpu_feature_enabled(X86_FEATURE_FXSR))
339 fpstate_init_fxstate(fpstate);
341 fpstate_init_fstate(fpstate);
344 void fpstate_reset(struct fpu *fpu)
346 /* Set the fpstate pointer to the default fpstate */
347 fpu->fpstate = &fpu->__fpstate;
349 /* Initialize sizes and feature masks */
350 fpu->fpstate->size = fpu_kernel_cfg.default_size;
351 fpu->fpstate->user_size = fpu_user_cfg.default_size;
352 fpu->fpstate->xfeatures = xfeatures_mask_all;
353 fpu->fpstate->user_xfeatures = xfeatures_mask_uabi();
356 #if IS_ENABLED(CONFIG_KVM)
357 void fpu_init_fpstate_user(struct fpu *fpu)
360 fpstate_init_user(fpu->fpstate);
362 EXPORT_SYMBOL_GPL(fpu_init_fpstate_user);
365 /* Clone current's FPU state on fork */
366 int fpu_clone(struct task_struct *dst)
368 struct fpu *src_fpu = ¤t->thread.fpu;
369 struct fpu *dst_fpu = &dst->thread.fpu;
371 /* The new task's FPU state cannot be valid in the hardware. */
372 dst_fpu->last_cpu = -1;
374 fpstate_reset(dst_fpu);
376 if (!cpu_feature_enabled(X86_FEATURE_FPU))
380 * Enforce reload for user space tasks and prevent kernel threads
381 * from trying to save the FPU registers on context switch.
383 set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
386 * No FPU state inheritance for kernel threads and IO
389 if (dst->flags & (PF_KTHREAD | PF_IO_WORKER)) {
390 /* Clear out the minimal state */
391 memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs,
392 init_fpstate_copy_size());
397 * If the FPU registers are not owned by current just memcpy() the
398 * state. Otherwise save the FPU registers directly into the
399 * child's FPU context, without any memory-to-memory copying.
402 if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
403 memcpy(&dst_fpu->fpstate->regs, &src_fpu->fpstate->regs,
404 dst_fpu->fpstate->size);
406 save_fpregs_to_fpstate(dst_fpu);
410 trace_x86_fpu_copy_src(src_fpu);
411 trace_x86_fpu_copy_dst(dst_fpu);
417 * Whitelist the FPU register state embedded into task_struct for hardened
420 void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
422 *offset = offsetof(struct thread_struct, fpu.__fpstate.regs);
423 *size = fpu_kernel_cfg.default_size;
427 * Drops current FPU state: deactivates the fpregs and
428 * the fpstate. NOTE: it still leaves previous contents
429 * in the fpregs in the eager-FPU case.
431 * This function can be used in cases where we know that
432 * a state-restore is coming: either an explicit one,
435 void fpu__drop(struct fpu *fpu)
439 if (fpu == ¤t->thread.fpu) {
440 /* Ignore delayed exceptions from user space */
441 asm volatile("1: fwait\n"
443 _ASM_EXTABLE(1b, 2b));
444 fpregs_deactivate(fpu);
447 trace_x86_fpu_dropped(fpu);
453 * Clear FPU registers by setting them up from the init fpstate.
454 * Caller must do fpregs_[un]lock() around it.
456 static inline void restore_fpregs_from_init_fpstate(u64 features_mask)
459 os_xrstor(&init_fpstate.regs.xsave, features_mask);
461 fxrstor(&init_fpstate.regs.fxsave);
463 frstor(&init_fpstate.regs.fsave);
465 pkru_write_default();
469 * Reset current->fpu memory state to the init values.
471 static void fpu_reset_fpstate(void)
473 struct fpu *fpu = ¤t->thread.fpu;
478 * This does not change the actual hardware registers. It just
479 * resets the memory image and sets TIF_NEED_FPU_LOAD so a
480 * subsequent return to usermode will reload the registers from the
481 * task's memory image.
483 * Do not use fpstate_init() here. Just copy init_fpstate which has
484 * the correct content already except for PKRU.
486 * PKRU handling does not rely on the xstate when restoring for
487 * user space as PKRU is eagerly written in switch_to() and
490 memcpy(&fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size());
491 set_thread_flag(TIF_NEED_FPU_LOAD);
496 * Reset current's user FPU states to the init states. current's
497 * supervisor states, if any, are not modified by this function. The
498 * caller guarantees that the XSTATE header in memory is intact.
500 void fpu__clear_user_states(struct fpu *fpu)
502 WARN_ON_FPU(fpu != ¤t->thread.fpu);
505 if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
512 * Ensure that current's supervisor states are loaded into their
513 * corresponding registers.
515 if (xfeatures_mask_supervisor() &&
516 !fpregs_state_valid(fpu, smp_processor_id())) {
517 os_xrstor(&fpu->fpstate->regs.xsave, xfeatures_mask_supervisor());
520 /* Reset user states in registers. */
521 restore_fpregs_from_init_fpstate(xfeatures_mask_restore_user());
524 * Now all FPU registers have their desired values. Inform the FPU
525 * state machine that current's FPU registers are in the hardware
526 * registers. The memory image does not need to be updated because
527 * any operation relying on it has to save the registers first when
528 * current's FPU is marked active.
530 fpregs_mark_activate();
534 void fpu_flush_thread(void)
539 * Load FPU context before returning to userspace.
541 void switch_fpu_return(void)
543 if (!static_cpu_has(X86_FEATURE_FPU))
546 fpregs_restore_userregs();
548 EXPORT_SYMBOL_GPL(switch_fpu_return);
550 #ifdef CONFIG_X86_DEBUG_FPU
552 * If current FPU state according to its tracking (loaded FPU context on this
553 * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is
554 * loaded on return to userland.
556 void fpregs_assert_state_consistent(void)
558 struct fpu *fpu = ¤t->thread.fpu;
560 if (test_thread_flag(TIF_NEED_FPU_LOAD))
563 WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));
565 EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
568 void fpregs_mark_activate(void)
570 struct fpu *fpu = ¤t->thread.fpu;
572 fpregs_activate(fpu);
573 fpu->last_cpu = smp_processor_id();
574 clear_thread_flag(TIF_NEED_FPU_LOAD);
578 * x87 math exception handling:
581 int fpu__exception_code(struct fpu *fpu, int trap_nr)
585 if (trap_nr == X86_TRAP_MF) {
586 unsigned short cwd, swd;
588 * (~cwd & swd) will mask out exceptions that are not set to unmasked
589 * status. 0x3f is the exception bits in these regs, 0x200 is the
590 * C1 reg you need in case of a stack fault, 0x040 is the stack
591 * fault bit. We should only be taking one exception at a time,
592 * so if this combination doesn't produce any single exception,
593 * then we have a bad program that isn't synchronizing its FPU usage
594 * and it will suffer the consequences since we won't be able to
595 * fully reproduce the context of the exception.
597 if (boot_cpu_has(X86_FEATURE_FXSR)) {
598 cwd = fpu->fpstate->regs.fxsave.cwd;
599 swd = fpu->fpstate->regs.fxsave.swd;
601 cwd = (unsigned short)fpu->fpstate->regs.fsave.cwd;
602 swd = (unsigned short)fpu->fpstate->regs.fsave.swd;
608 * The SIMD FPU exceptions are handled a little differently, as there
609 * is only a single status/control register. Thus, to determine which
610 * unmasked exception was caught we must mask the exception mask bits
611 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
613 unsigned short mxcsr = MXCSR_DEFAULT;
615 if (boot_cpu_has(X86_FEATURE_XMM))
616 mxcsr = fpu->fpstate->regs.fxsave.mxcsr;
618 err = ~(mxcsr >> 7) & mxcsr;
621 if (err & 0x001) { /* Invalid op */
623 * swd & 0x240 == 0x040: Stack Underflow
624 * swd & 0x240 == 0x240: Stack Overflow
625 * User must clear the SF bit (0x40) if set
628 } else if (err & 0x004) { /* Divide by Zero */
630 } else if (err & 0x008) { /* Overflow */
632 } else if (err & 0x012) { /* Denormal, Underflow */
634 } else if (err & 0x020) { /* Precision */
639 * If we're using IRQ 13, or supposedly even some trap
640 * X86_TRAP_MF implementations, it's possible
641 * we get a spurious trap, which is not an error.