--- /dev/null
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================
+x86-specific ELF Auxiliary Vectors
+==================================
+
+This document describes the semantics of the x86 auxiliary vectors.
+
+Introduction
+============
+
+ELF Auxiliary vectors enable the kernel to efficiently provide
+configuration-specific parameters to userspace. In this example, a program
+allocates an alternate stack based on the kernel-provided size::
+
+ #include <sys/auxv.h>
+ #include <elf.h>
+ #include <signal.h>
+ #include <stdlib.h>
+ #include <assert.h>
+ #include <err.h>
+
+ #ifndef AT_MINSIGSTKSZ
+ #define AT_MINSIGSTKSZ 51
+ #endif
+
+ ....
+ stack_t ss;
+
+ ss.ss_sp = malloc(ss.ss_size);
+ assert(ss.ss_sp);
+
+ ss.ss_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ;
+ ss.ss_flags = 0;
+
+ if (sigaltstack(&ss, NULL))
+ err(1, "sigaltstack");
+
+
+The exposed auxiliary vectors
+=============================
+
+AT_SYSINFO is used for locating the vsyscall entry point. It is not
+exported on 64-bit mode.
+
+AT_SYSINFO_EHDR is the start address of the page containing the vDSO.
+
+AT_MINSIGSTKSZ denotes the minimum stack size required by the kernel to
+deliver a signal to user-space. AT_MINSIGSTKSZ comprehends the space
+consumed by the kernel to accommodate the user context for the current
+hardware configuration. It does not comprehend subsequent user-space stack
+consumption, which must be added by the user. (e.g. Above, user-space adds
+SIGSTKSZ to AT_MINSIGSTKSZ.)
sva
sgx
features
+ elf_auxvec
NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \
NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \
} \
+ NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \
} while (0)
/*
extern unsigned long task_size_64bit(int full_addr_space);
extern unsigned long get_mmap_base(int is_legacy);
extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
+extern unsigned long get_sigframe_size(void);
#ifdef CONFIG_X86_32
if (vdso64_enabled) \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(unsigned long __force)current->mm->context.vdso); \
+ NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \
} while (0)
/* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */
if (vdso64_enabled) \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(unsigned long __force)current->mm->context.vdso); \
+ NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \
} while (0)
#define AT_SYSINFO 32
fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long *buf_fx, unsigned long *size);
+unsigned long fpu__get_fpstate_size(void);
+
extern void fpu__init_prepare_fx_sw_frame(void);
#endif /* _ASM_X86_FPU_SIGNAL_H */
#endif /* CONFIG_X86_64 */
+void __init init_sigframe_size(void);
+
#endif /* _ASM_X86_SIGFRAME_H */
/* entries in ARCH_DLINFO: */
#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
-# define AT_VECTOR_SIZE_ARCH 2
+# define AT_VECTOR_SIZE_ARCH 3
#else /* else it's non-compat x86-64 */
-# define AT_VECTOR_SIZE_ARCH 1
+# define AT_VECTOR_SIZE_ARCH 2
#endif
#endif /* _ASM_X86_AUXVEC_H */
#include <asm/intel-family.h>
#include <asm/cpu_device_id.h>
#include <asm/uv/uv.h>
+#include <asm/sigframe.h>
#include "cpu.h"
fpu__init_system(c);
+ init_sigframe_size();
+
#ifdef CONFIG_X86_32
/*
* Regardless of whether PCID is enumerated, the SDM says
return sp;
}
+
+unsigned long fpu__get_fpstate_size(void)
+{
+ unsigned long ret = xstate_sigframe_size();
+
+ /*
+ * This space is needed on (most) 32-bit kernels, or when a 32-bit
+ * app is running on a 64-bit kernel. To keep things simple, just
+ * assume the worst case and always include space for 'freg_state',
+ * even for 64-bit apps on 64-bit kernels. This wastes a bit of
+ * space, but keeps the code simple.
+ */
+ if ((IS_ENABLED(CONFIG_IA32_EMULATION) ||
+ IS_ENABLED(CONFIG_X86_32)) && use_fxsr())
+ ret += sizeof(struct fregs_state);
+
+ return ret;
+}
+
/*
* Prepare the SW reserved portion of the fxsave memory layout, indicating
* the presence of the extended state information in the memory layout
offset = offsetof(struct xregs_state, header);
size = sizeof(hdr);
- if (__copy_from_user(&hdr, ubuf + offset, size))
+ if (copy_from_user(&hdr, ubuf + offset, size))
return -EFAULT;
if (validate_user_xstate_header(&hdr))
offset = xstate_offsets[i];
size = xstate_sizes[i];
- if (__copy_from_user(dst, ubuf + offset, size))
+ if (copy_from_user(dst, ubuf + offset, size))
return -EFAULT;
}
}
if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
offset = offsetof(struct fxregs_state, mxcsr);
size = MXCSR_AND_FLAGS_SIZE;
- if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
+ if (copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
return -EFAULT;
}
* Set up a signal frame.
*/
+/* x86 ABI requires 16-byte alignment */
+#define FRAME_ALIGNMENT 16UL
+
+#define MAX_FRAME_PADDING (FRAME_ALIGNMENT - 1)
+
/*
* Determine which stack to use..
*/
* Align the stack pointer according to the i386 ABI,
* i.e. so that on function entry ((sp + 4) & 15) == 0.
*/
- sp = ((sp + 4) & -16ul) - 4;
+ sp = ((sp + 4) & -FRAME_ALIGNMENT) - 4;
#else /* !CONFIG_X86_32 */
- sp = round_down(sp, 16) - 8;
+ sp = round_down(sp, FRAME_ALIGNMENT) - 8;
#endif
return sp;
}
void __user **fpstate)
{
/* Default to using normal stack */
+ bool nested_altstack = on_sig_stack(regs->sp);
+ bool entering_altstack = false;
unsigned long math_size = 0;
unsigned long sp = regs->sp;
unsigned long buf_fx = 0;
- int onsigstack = on_sig_stack(sp);
int ret;
/* redzone */
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
- if (sas_ss_flags(sp) == 0)
+ /*
+ * This checks nested_altstack via sas_ss_flags(). Sensible
+ * programs use SS_AUTODISARM, which disables that check, and
+ * programs that don't use SS_AUTODISARM get compatible.
+ */
+ if (sas_ss_flags(sp) == 0) {
sp = current->sas_ss_sp + current->sas_ss_size;
+ entering_altstack = true;
+ }
} else if (IS_ENABLED(CONFIG_X86_32) &&
- !onsigstack &&
+ !nested_altstack &&
regs->ss != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
ka->sa.sa_restorer) {
/* This is the legacy signal stack switching. */
sp = (unsigned long) ka->sa.sa_restorer;
+ entering_altstack = true;
}
sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
* If we are on the alternate signal stack and would overflow it, don't.
* Return an always-bogus address instead so we will die with SIGSEGV.
*/
- if (onsigstack && !likely(on_sig_stack(sp)))
+ if (unlikely((nested_altstack || entering_altstack) &&
+ !__on_sig_stack(sp))) {
+
+ if (show_unhandled_signals && printk_ratelimit())
+ pr_info("%s[%d] overflowed sigaltstack\n",
+ current->comm, task_pid_nr(current));
+
return (void __user *)-1L;
+ }
/* save i387 and extended state */
ret = copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size);
return 0;
}
+/*
+ * There are four different struct types for signal frame: sigframe_ia32,
+ * rt_sigframe_ia32, rt_sigframe_x32, and rt_sigframe. Use the worst case
+ * -- the largest size. It means the size for 64-bit apps is a bit more
+ * than needed, but this keeps the code simple.
+ */
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+# define MAX_FRAME_SIGINFO_UCTXT_SIZE sizeof(struct sigframe_ia32)
+#else
+# define MAX_FRAME_SIGINFO_UCTXT_SIZE sizeof(struct rt_sigframe)
+#endif
+
+/*
+ * The FP state frame contains an XSAVE buffer which must be 64-byte aligned.
+ * If a signal frame starts at an unaligned address, extra space is required.
+ * This is the max alignment padding, conservatively.
+ */
+#define MAX_XSAVE_PADDING 63UL
+
+/*
+ * The frame data is composed of the following areas and laid out as:
+ *
+ * -------------------------
+ * | alignment padding |
+ * -------------------------
+ * | (f)xsave frame |
+ * -------------------------
+ * | fsave header |
+ * -------------------------
+ * | alignment padding |
+ * -------------------------
+ * | siginfo + ucontext |
+ * -------------------------
+ */
+
+/* max_frame_size tells userspace the worst case signal stack size. */
+static unsigned long __ro_after_init max_frame_size;
+
+void __init init_sigframe_size(void)
+{
+ max_frame_size = MAX_FRAME_SIGINFO_UCTXT_SIZE + MAX_FRAME_PADDING;
+
+ max_frame_size += fpu__get_fpstate_size() + MAX_XSAVE_PADDING;
+
+ /* Userspace expects an aligned size. */
+ max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT);
+
+ pr_info("max sigframe size: %lu\n", max_frame_size);
+}
+
+unsigned long get_sigframe_size(void)
+{
+ return max_frame_size;
+}
+
static inline int is_ia32_compat_frame(struct ksignal *ksig)
{
return IS_ENABLED(CONFIG_IA32_EMULATION) &&
#define SEND_SIG_NOINFO ((struct kernel_siginfo *) 0)
#define SEND_SIG_PRIV ((struct kernel_siginfo *) 1)
+static inline int __on_sig_stack(unsigned long sp)
+{
+#ifdef CONFIG_STACK_GROWSUP
+ return sp >= current->sas_ss_sp &&
+ sp - current->sas_ss_sp < current->sas_ss_size;
+#else
+ return sp > current->sas_ss_sp &&
+ sp - current->sas_ss_sp <= current->sas_ss_size;
+#endif
+}
+
/*
* True if we are on the alternate signal stack.
*/
if (current->sas_ss_flags & SS_AUTODISARM)
return 0;
-#ifdef CONFIG_STACK_GROWSUP
- return sp >= current->sas_ss_sp &&
- sp - current->sas_ss_sp < current->sas_ss_size;
-#else
- return sp > current->sas_ss_sp &&
- sp - current->sas_ss_sp <= current->sas_ss_size;
-#endif
+ return __on_sig_stack(sp);
}
static inline int sas_ss_flags(unsigned long sp)
#define AT_EXECFN 31 /* filename of program */
+#ifndef AT_MINSIGSTKSZ
+#define AT_MINSIGSTKSZ 51 /* minimal stack size for signal delivery */
+#endif
#endif /* _UAPI_LINUX_AUXVEC_H */
#include <string.h>
#include <assert.h>
#include <errno.h>
+#include <sys/auxv.h>
#include "../kselftest.h"
#define SS_AUTODISARM (1U << 31)
#endif
+#ifndef AT_MINSIGSTKSZ
+#define AT_MINSIGSTKSZ 51
+#endif
+
+static unsigned int stack_size;
static void *sstack, *ustack;
static ucontext_t uc, sc;
static const char *msg = "[OK]\tStack preserved";
#endif
if (sp < (unsigned long)sstack ||
- sp >= (unsigned long)sstack + SIGSTKSZ) {
+ sp >= (unsigned long)sstack + stack_size) {
ksft_exit_fail_msg("SP is not on sigaltstack\n");
}
/* put some data on stack. other sighandler will try to overwrite it */
stack_t stk;
int err;
+ /* Make sure more than the required minimum. */
+ stack_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ;
+ ksft_print_msg("[NOTE]\tthe stack size is %lu\n", stack_size);
+
ksft_print_header();
ksft_set_plan(3);
sigaction(SIGUSR1, &act, NULL);
act.sa_sigaction = my_usr2;
sigaction(SIGUSR2, &act, NULL);
- sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ sstack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if (sstack == MAP_FAILED) {
ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
}
stk.ss_sp = sstack;
- stk.ss_size = SIGSTKSZ;
+ stk.ss_size = stack_size;
stk.ss_flags = SS_ONSTACK | SS_AUTODISARM;
err = sigaltstack(&stk, NULL);
if (err) {
}
}
- ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ ustack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if (ustack == MAP_FAILED) {
ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
getcontext(&uc);
uc.uc_link = NULL;
uc.uc_stack.ss_sp = ustack;
- uc.uc_stack.ss_size = SIGSTKSZ;
+ uc.uc_stack.ss_size = stack_size;
makecontext(&uc, switch_fn, 0);
raise(SIGUSR1);
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
test_vsyscall mov_ss_trap \
- syscall_arg_fault fsgsbase_restore
+ syscall_arg_fault fsgsbase_restore sigaltstack
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
+ corrupt_xstate_header
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Corrupt the XSTATE header in a signal frame
+ *
+ * Based on analysis and a test case from Thomas Gleixner.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <err.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/wait.h>
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+static inline int xsave_enabled(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ eax = 0x1;
+ ecx = 0x0;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ /* Is CR4.OSXSAVE enabled ? */
+ return ecx & (1U << 27);
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void sigusr1(int sig, siginfo_t *info, void *uc_void)
+{
+ ucontext_t *uc = uc_void;
+ uint8_t *fpstate = (uint8_t *)uc->uc_mcontext.fpregs;
+ uint64_t *xfeatures = (uint64_t *)(fpstate + 512);
+
+ printf("\tWreck XSTATE header\n");
+ /* Wreck the first reserved bytes in the header */
+ *(xfeatures + 2) = 0xfffffff;
+}
+
+static void sigsegv(int sig, siginfo_t *info, void *uc_void)
+{
+ printf("\tGot SIGSEGV\n");
+}
+
+int main(void)
+{
+ cpu_set_t set;
+
+ sethandler(SIGUSR1, sigusr1, 0);
+ sethandler(SIGSEGV, sigsegv, 0);
+
+ if (!xsave_enabled()) {
+ printf("[SKIP] CR4.OSXSAVE disabled.\n");
+ return 0;
+ }
+
+ CPU_ZERO(&set);
+ CPU_SET(0, &set);
+
+ /*
+ * Enforce that the child runs on the same CPU
+ * which in turn forces a schedule.
+ */
+ sched_setaffinity(getpid(), sizeof(set), &set);
+
+ printf("[RUN]\tSend ourselves a signal\n");
+ raise(SIGUSR1);
+
+ printf("[OK]\tBack from the signal. Now schedule.\n");
+ pid_t child = fork();
+ if (child < 0)
+ err(1, "fork");
+ if (child == 0)
+ return 0;
+ if (child)
+ waitpid(child, NULL, 0);
+ printf("[OK]\tBack in the main thread.\n");
+
+ /*
+ * We could try to confirm that extended state is still preserved
+ * when we schedule. For now, the only indication of failure is
+ * a warning in the kernel logs.
+ */
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include <signal.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <setjmp.h>
+
+/* sigaltstack()-enforced minimum stack */
+#define ENFORCED_MINSIGSTKSZ 2048
+
+#ifndef AT_MINSIGSTKSZ
+# define AT_MINSIGSTKSZ 51
+#endif
+
+static int nerrs;
+
+static bool sigalrm_expected;
+
+static unsigned long at_minstack_size;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static int setup_altstack(void *start, unsigned long size)
+{
+ stack_t ss;
+
+ memset(&ss, 0, sizeof(ss));
+ ss.ss_size = size;
+ ss.ss_sp = start;
+
+ return sigaltstack(&ss, NULL);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ if (sigalrm_expected) {
+ printf("[FAIL]\tWrong signal delivered: SIGSEGV (expected SIGALRM).");
+ nerrs++;
+ } else {
+ printf("[OK]\tSIGSEGV signal delivered.\n");
+ }
+
+ siglongjmp(jmpbuf, 1);
+}
+
+static void sigalrm(int sig, siginfo_t *info, void *ctx_void)
+{
+ if (!sigalrm_expected) {
+ printf("[FAIL]\tWrong signal delivered: SIGALRM (expected SIGSEGV).");
+ nerrs++;
+ } else {
+ printf("[OK]\tSIGALRM signal delivered.\n");
+ }
+}
+
+static void test_sigaltstack(void *altstack, unsigned long size)
+{
+ if (setup_altstack(altstack, size))
+ err(1, "sigaltstack()");
+
+ sigalrm_expected = (size > at_minstack_size) ? true : false;
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ sethandler(SIGALRM, sigalrm, SA_ONSTACK);
+
+ if (!sigsetjmp(jmpbuf, 1)) {
+ printf("[RUN]\tTest an alternate signal stack of %ssufficient size.\n",
+ sigalrm_expected ? "" : "in");
+ printf("\tRaise SIGALRM. %s is expected to be delivered.\n",
+ sigalrm_expected ? "It" : "SIGSEGV");
+ raise(SIGALRM);
+ }
+
+ clearhandler(SIGALRM);
+ clearhandler(SIGSEGV);
+}
+
+int main(void)
+{
+ void *altstack;
+
+ at_minstack_size = getauxval(AT_MINSIGSTKSZ);
+
+ altstack = mmap(NULL, at_minstack_size + SIGSTKSZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (altstack == MAP_FAILED)
+ err(1, "mmap()");
+
+ if ((ENFORCED_MINSIGSTKSZ + 1) < at_minstack_size)
+ test_sigaltstack(altstack, ENFORCED_MINSIGSTKSZ + 1);
+
+ test_sigaltstack(altstack, at_minstack_size + SIGSTKSZ);
+
+ return nerrs == 0 ? 0 : 1;
+}