* This represents the full set of bits that should ever be set in a kernel
* XSAVE buffer, both supervisor and user xstates.
*/
-u64 xfeatures_mask_all __read_mostly;
+u64 xfeatures_mask_all __ro_after_init;
-static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
-static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
-static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
-static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
+static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
+ { [ 0 ... XFEATURE_MAX - 1] = -1};
+static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
+ { [ 0 ... XFEATURE_MAX - 1] = -1};
+static unsigned int xstate_comp_offsets[XFEATURE_MAX] __ro_after_init =
+ { [ 0 ... XFEATURE_MAX - 1] = -1};
+static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] __ro_after_init =
+ { [ 0 ... XFEATURE_MAX - 1] = -1};
/*
* The XSAVE area of kernel can be in standard or compacted format;
* it is always in standard format for user mode. This is the user
* mode standard format size used for signal and ptrace frames.
*/
-unsigned int fpu_user_xstate_size;
+unsigned int fpu_user_xstate_size __ro_after_init;
/*
* Return whether the system supports a given xfeature.
*/
void fpu__init_cpu_xstate(void)
{
- u64 unsup_bits;
-
if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask_all)
return;
- /*
- * Unsupported supervisor xstates should not be found in
- * the xfeatures mask.
- */
- unsup_bits = xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
- WARN_ONCE(unsup_bits, "x86/fpu: Found unsupported supervisor xstates: 0x%llx\n",
- unsup_bits);
-
- xfeatures_mask_all &= ~XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
cr4_set_bits(X86_CR4_OSXSAVE);
}
}
+/*
+ * All supported features have either init state all zeros or are
+ * handled in setup_init_fpu() individually. This is an explicit
+ * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
+ * newly added supported features at build time and make people
+ * actually look at the init state for the new feature.
+ */
+#define XFEATURES_INIT_FPSTATE_HANDLED \
+ (XFEATURE_MASK_FP | \
+ XFEATURE_MASK_SSE | \
+ XFEATURE_MASK_YMM | \
+ XFEATURE_MASK_OPMASK | \
+ XFEATURE_MASK_ZMM_Hi256 | \
+ XFEATURE_MASK_Hi16_ZMM | \
+ XFEATURE_MASK_PKRU | \
+ XFEATURE_MASK_BNDREGS | \
+ XFEATURE_MASK_BNDCSR | \
+ XFEATURE_MASK_PASID)
+
/*
* setup the xstate image representing the init state
*/
{
static int on_boot_cpu __initdata = 1;
+ BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
+ XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
+ XFEATURES_INIT_FPSTATE_HANDLED);
+
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
copy_kernel_to_xregs_booting(&init_fpstate.xsave);
/*
- * Dump the init state again. This is to identify the init state
- * of any feature which is not represented by all zero's.
+ * All components are now in init state. Read the state back so
+ * that init_fpstate contains all non-zero init state. This only
+ * works with XSAVE, but not with XSAVEOPT and XSAVES because
+ * those use the init optimization which skips writing data for
+ * components in init state.
+ *
+ * XSAVE could be used, but that would require to reshuffle the
+ * data when XSAVES is available because XSAVES uses xstate
+ * compaction. But doing so is a pointless exercise because most
+ * components have an all zeros init state except for the legacy
+ * ones (FP and SSE). Those can be saved with FXSAVE into the
+ * legacy area. Adding new features requires to ensure that init
+ * state is all zeroes or if not to add the necessary handling
+ * here.
*/
- copy_xregs_to_kernel_booting(&init_fpstate.xsave);
+ fxsave(&init_fpstate.fxsave);
}
static int xfeature_uncompacted_offset(int xfeature_nr)
{
unsigned int eax, ebx, ecx, edx;
static int on_boot_cpu __initdata = 1;
+ u64 xfeatures;
int err;
int i;
xfeatures_mask_all &= ~BIT_ULL(i);
}
- xfeatures_mask_all &= fpu__get_supported_xfeatures_mask();
+ xfeatures_mask_all &= XFEATURE_MASK_USER_SUPPORTED |
+ XFEATURE_MASK_SUPERVISOR_SUPPORTED;
+
+ /* Store it for paranoia check at the end */
+ xfeatures = xfeatures_mask_all;
/* Enable xstate instructions to be able to continue with initialization: */
fpu__init_cpu_xstate();
setup_init_fpu_buf();
setup_xstate_comp_offsets();
setup_supervisor_only_offsets();
- print_xstate_offset_size();
+ /*
+ * Paranoia check whether something in the setup modified the
+ * xfeatures mask.
+ */
+ if (xfeatures != xfeatures_mask_all) {
+ pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
+ xfeatures, xfeatures_mask_all);
+ goto out_disable;
+ }
+
+ print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
xfeatures_mask_all,
fpu_kernel_xstate_size,
return true;
}
-static void fill_gap(struct membuf *to, unsigned *last, unsigned offset)
-{
- if (*last >= offset)
- return;
- membuf_write(to, (void *)&init_fpstate.xsave + *last, offset - *last);
- *last = offset;
-}
-
-static void copy_part(struct membuf *to, unsigned *last, unsigned offset,
- unsigned size, void *from)
+static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
+ void *init_xstate, unsigned int size)
{
- fill_gap(to, last, offset);
- membuf_write(to, from, size);
- *last = offset + size;
+ membuf_write(to, from_xstate ? xstate : init_xstate, size);
}
/*
*/
void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave)
{
+ const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
+ struct xregs_state *xinit = &init_fpstate.xsave;
struct xstate_header header;
- const unsigned off_mxcsr = offsetof(struct fxregs_state, mxcsr);
- unsigned size = to.left;
- unsigned last = 0;
+ unsigned int zerofrom;
int i;
/*
header.xfeatures = xsave->header.xfeatures;
header.xfeatures &= xfeatures_mask_user();
- if (header.xfeatures & XFEATURE_MASK_FP)
- copy_part(&to, &last, 0, off_mxcsr, &xsave->i387);
- if (header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM))
- copy_part(&to, &last, off_mxcsr,
- MXCSR_AND_FLAGS_SIZE, &xsave->i387.mxcsr);
- if (header.xfeatures & XFEATURE_MASK_FP)
- copy_part(&to, &last, offsetof(struct fxregs_state, st_space),
- 128, &xsave->i387.st_space);
- if (header.xfeatures & XFEATURE_MASK_SSE)
- copy_part(&to, &last, xstate_offsets[XFEATURE_SSE],
- 256, &xsave->i387.xmm_space);
- /*
- * Fill xsave->i387.sw_reserved value for ptrace frame:
- */
- copy_part(&to, &last, offsetof(struct fxregs_state, sw_reserved),
- 48, xstate_fx_sw_bytes);
- /*
- * Copy xregs_state->header:
- */
- copy_part(&to, &last, offsetof(struct xregs_state, header),
- sizeof(header), &header);
+ /* Copy FP state up to MXCSR */
+ copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
+ &xinit->i387, off_mxcsr);
+
+ /* Copy MXCSR when SSE or YMM are set in the feature mask */
+ copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
+ &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
+ MXCSR_AND_FLAGS_SIZE);
+
+ /* Copy the remaining FP state */
+ copy_feature(header.xfeatures & XFEATURE_MASK_FP,
+ &to, &xsave->i387.st_space, &xinit->i387.st_space,
+ sizeof(xsave->i387.st_space));
+
+ /* Copy the SSE state - shared with YMM, but independently managed */
+ copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
+ &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
+ sizeof(xsave->i387.xmm_space));
+
+ /* Zero the padding area */
+ membuf_zero(&to, sizeof(xsave->i387.padding));
+
+ /* Copy xsave->i387.sw_reserved */
+ membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
+
+ /* Copy the user space relevant state of @xsave->header */
+ membuf_write(&to, &header, sizeof(header));
+
+ zerofrom = offsetof(struct xregs_state, extended_state_area);
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
/*
- * Copy only in-use xstates:
+ * The ptrace buffer is in non-compacted XSAVE format.
+ * In non-compacted format disabled features still occupy
+ * state space, but there is no state to copy from in the
+ * compacted init_fpstate. The gap tracking will zero this
+ * later.
+ */
+ if (!(xfeatures_mask_user() & BIT_ULL(i)))
+ continue;
+
+ /*
+ * If there was a feature or alignment gap, zero the space
+ * in the destination buffer.
*/
- if ((header.xfeatures >> i) & 1) {
- void *src = __raw_xsave_addr(xsave, i);
+ if (zerofrom < xstate_offsets[i])
+ membuf_zero(&to, xstate_offsets[i] - zerofrom);
- copy_part(&to, &last, xstate_offsets[i],
- xstate_sizes[i], src);
- }
+ copy_feature(header.xfeatures & BIT_ULL(i), &to,
+ __raw_xsave_addr(xsave, i),
+ __raw_xsave_addr(xinit, i),
+ xstate_sizes[i]);
+ /*
+ * Keep track of the last copied state in the non-compacted
+ * target buffer for gap zeroing.
+ */
+ zerofrom = xstate_offsets[i] + xstate_sizes[i];
}
- fill_gap(&to, &last, size);
+
+ if (to.left)
+ membuf_zero(&to, to.left);
}
/*
offset = offsetof(struct xregs_state, header);
size = sizeof(hdr);
- if (__copy_from_user(&hdr, ubuf + offset, size))
+ if (copy_from_user(&hdr, ubuf + offset, size))
return -EFAULT;
if (validate_user_xstate_header(&hdr))
offset = xstate_offsets[i];
size = xstate_sizes[i];
- if (__copy_from_user(dst, ubuf + offset, size))
+ if (copy_from_user(dst, ubuf + offset, size))
return -EFAULT;
}
}
if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
offset = offsetof(struct fxregs_state, mxcsr);
size = MXCSR_AND_FLAGS_SIZE;
- if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
+ if (copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
return -EFAULT;
}
return 0;
}
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
-
-#ifdef CONFIG_IOMMU_SUPPORT
-void update_pasid(void)
-{
- u64 pasid_state;
- u32 pasid;
-
- if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
- return;
-
- if (!current->mm)
- return;
-
- pasid = READ_ONCE(current->mm->pasid);
- /* Set the valid bit in the PASID MSR/state only for valid pasid. */
- pasid_state = pasid == PASID_DISABLED ?
- pasid : pasid | MSR_IA32_PASID_VALID;
-
- /*
- * No need to hold fregs_lock() since the task's fpstate won't
- * be changed by others (e.g. ptrace) while the task is being
- * switched to or is in IPI.
- */
- if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
- /* The MSR is active and can be directly updated. */
- wrmsrl(MSR_IA32_PASID, pasid_state);
- } else {
- struct fpu *fpu = ¤t->thread.fpu;
- struct ia32_pasid_state *ppasid_state;
- struct xregs_state *xsave;
-
- /*
- * The CPU's xstate registers are not currently active. Just
- * update the PASID state in the memory buffer here. The
- * PASID MSR will be loaded when returning to user mode.
- */
- xsave = &fpu->state.xsave;
- xsave->header.xfeatures |= XFEATURE_MASK_PASID;
- ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID);
- /*
- * Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state
- * won't be NULL and no need to check its value.
- *
- * Only update the task's PASID state when it's different
- * from the mm's pasid.
- */
- if (ppasid_state->pasid != pasid_state) {
- /*
- * Invalid fpregs so that state restoring will pick up
- * the PASID state.
- */
- __fpu_invalidate_fpregs_state(fpu);
- ppasid_state->pasid = pasid_state;
- }
- }
-}
-#endif /* CONFIG_IOMMU_SUPPORT */