1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
5 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
7 * Copyright (c) 2005 Keir Fraser
9 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
10 * privileged instructions:
12 * Copyright (C) 2006 Qumranet
13 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
15 * Avi Kivity <avi@qumranet.com>
16 * Yaniv Kamay <yaniv@qumranet.com>
18 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/kvm_host.h>
23 #include "kvm_cache_regs.h"
24 #include "kvm_emulate.h"
25 #include <linux/stringify.h>
26 #include <asm/debugreg.h>
27 #include <asm/nospec-branch.h>
39 #define OpImplicit 1ull /* No generic decode */
40 #define OpReg 2ull /* Register */
41 #define OpMem 3ull /* Memory */
42 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
43 #define OpDI 5ull /* ES:DI/EDI/RDI */
44 #define OpMem64 6ull /* Memory, 64-bit */
45 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
46 #define OpDX 8ull /* DX register */
47 #define OpCL 9ull /* CL register (for shifts) */
48 #define OpImmByte 10ull /* 8-bit sign extended immediate */
49 #define OpOne 11ull /* Implied 1 */
50 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
51 #define OpMem16 13ull /* Memory operand (16-bit). */
52 #define OpMem32 14ull /* Memory operand (32-bit). */
53 #define OpImmU 15ull /* Immediate operand, zero extended */
54 #define OpSI 16ull /* SI/ESI/RSI */
55 #define OpImmFAddr 17ull /* Immediate far address */
56 #define OpMemFAddr 18ull /* Far address in memory */
57 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
58 #define OpES 20ull /* ES */
59 #define OpCS 21ull /* CS */
60 #define OpSS 22ull /* SS */
61 #define OpDS 23ull /* DS */
62 #define OpFS 24ull /* FS */
63 #define OpGS 25ull /* GS */
64 #define OpMem8 26ull /* 8-bit zero extended memory operand */
65 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
66 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
67 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
68 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
70 #define OpBits 5 /* Width of operand field */
71 #define OpMask ((1ull << OpBits) - 1)
74 * Opcode effective-address decode tables.
75 * Note that we only emulate instructions that have at least one memory
76 * operand (excluding implicit stack references). We assume that stack
77 * references and instruction fetches will never occur in special memory
78 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
82 /* Operand sizes: 8-bit operands or specified/overridden size. */
83 #define ByteOp (1<<0) /* 8-bit operands. */
84 /* Destination operand type. */
86 #define ImplicitOps (OpImplicit << DstShift)
87 #define DstReg (OpReg << DstShift)
88 #define DstMem (OpMem << DstShift)
89 #define DstAcc (OpAcc << DstShift)
90 #define DstDI (OpDI << DstShift)
91 #define DstMem64 (OpMem64 << DstShift)
92 #define DstMem16 (OpMem16 << DstShift)
93 #define DstImmUByte (OpImmUByte << DstShift)
94 #define DstDX (OpDX << DstShift)
95 #define DstAccLo (OpAccLo << DstShift)
96 #define DstMask (OpMask << DstShift)
97 /* Source operand type. */
99 #define SrcNone (OpNone << SrcShift)
100 #define SrcReg (OpReg << SrcShift)
101 #define SrcMem (OpMem << SrcShift)
102 #define SrcMem16 (OpMem16 << SrcShift)
103 #define SrcMem32 (OpMem32 << SrcShift)
104 #define SrcImm (OpImm << SrcShift)
105 #define SrcImmByte (OpImmByte << SrcShift)
106 #define SrcOne (OpOne << SrcShift)
107 #define SrcImmUByte (OpImmUByte << SrcShift)
108 #define SrcImmU (OpImmU << SrcShift)
109 #define SrcSI (OpSI << SrcShift)
110 #define SrcXLat (OpXLat << SrcShift)
111 #define SrcImmFAddr (OpImmFAddr << SrcShift)
112 #define SrcMemFAddr (OpMemFAddr << SrcShift)
113 #define SrcAcc (OpAcc << SrcShift)
114 #define SrcImmU16 (OpImmU16 << SrcShift)
115 #define SrcImm64 (OpImm64 << SrcShift)
116 #define SrcDX (OpDX << SrcShift)
117 #define SrcMem8 (OpMem8 << SrcShift)
118 #define SrcAccHi (OpAccHi << SrcShift)
119 #define SrcMask (OpMask << SrcShift)
120 #define BitOp (1<<11)
121 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
122 #define String (1<<13) /* String instruction (rep capable) */
123 #define Stack (1<<14) /* Stack instruction (push/pop) */
124 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
125 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
126 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
127 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
128 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
129 #define Escape (5<<15) /* Escape to coprocessor instruction */
130 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
131 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
132 #define Sse (1<<18) /* SSE Vector instruction */
133 /* Generic ModRM decode. */
134 #define ModRM (1<<19)
135 /* Destination is only written; never read. */
138 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
139 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
140 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
141 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
142 #define Undefined (1<<25) /* No Such Instruction */
143 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
144 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
146 #define PageTable (1 << 29) /* instruction used to write page table */
147 #define NotImpl (1 << 30) /* instruction is not implemented */
148 /* Source 2 operand type */
149 #define Src2Shift (31)
150 #define Src2None (OpNone << Src2Shift)
151 #define Src2Mem (OpMem << Src2Shift)
152 #define Src2CL (OpCL << Src2Shift)
153 #define Src2ImmByte (OpImmByte << Src2Shift)
154 #define Src2One (OpOne << Src2Shift)
155 #define Src2Imm (OpImm << Src2Shift)
156 #define Src2ES (OpES << Src2Shift)
157 #define Src2CS (OpCS << Src2Shift)
158 #define Src2SS (OpSS << Src2Shift)
159 #define Src2DS (OpDS << Src2Shift)
160 #define Src2FS (OpFS << Src2Shift)
161 #define Src2GS (OpGS << Src2Shift)
162 #define Src2Mask (OpMask << Src2Shift)
163 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
164 #define AlignMask ((u64)7 << 41)
165 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
166 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
167 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
168 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
169 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
170 #define NoWrite ((u64)1 << 45) /* No writeback */
171 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
172 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
173 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
174 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
175 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
176 #define NearBranch ((u64)1 << 52) /* Near branches */
177 #define No16 ((u64)1 << 53) /* No 16 bit operand */
178 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
179 #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
180 #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
182 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
184 #define X2(x...) x, x
185 #define X3(x...) X2(x), x
186 #define X4(x...) X2(x), X2(x)
187 #define X5(x...) X4(x), x
188 #define X6(x...) X4(x), X2(x)
189 #define X7(x...) X4(x), X3(x)
190 #define X8(x...) X4(x), X4(x)
191 #define X16(x...) X8(x), X8(x)
198 int (*execute)(struct x86_emulate_ctxt *ctxt);
199 const struct opcode *group;
200 const struct group_dual *gdual;
201 const struct gprefix *gprefix;
202 const struct escape *esc;
203 const struct instr_dual *idual;
204 const struct mode_dual *mdual;
205 void (*fastop)(struct fastop *fake);
207 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
211 struct opcode mod012[8];
212 struct opcode mod3[8];
216 struct opcode pfx_no;
217 struct opcode pfx_66;
218 struct opcode pfx_f2;
219 struct opcode pfx_f3;
224 struct opcode high[64];
228 struct opcode mod012;
233 struct opcode mode32;
234 struct opcode mode64;
237 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
239 enum x86_transfer_type {
241 X86_TRANSFER_CALL_JMP,
243 X86_TRANSFER_TASK_SWITCH,
246 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
248 unsigned long dirty = ctxt->regs_dirty;
251 for_each_set_bit(reg, &dirty, NR_EMULATOR_GPRS)
252 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
255 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
257 ctxt->regs_dirty = 0;
258 ctxt->regs_valid = 0;
262 * These EFLAGS bits are restored from saved value during emulation, and
263 * any changes are written back to the saved value after emulation.
265 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
266 X86_EFLAGS_PF|X86_EFLAGS_CF)
275 * fastop functions have a special calling convention:
280 * flags: rflags (in/out)
281 * ex: rsi (in:fastop pointer, out:zero if exception)
283 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
284 * different operand sizes can be reached by calculation, rather than a jump
285 * table (which would be bigger than the code).
287 * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
288 * and 1 for the straight line speculation INT3, leaves 7 bytes for the
289 * body of the function. Currently none is larger than 4.
291 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
293 #define FASTOP_SIZE 16
295 #define __FOP_FUNC(name) \
296 ".align " __stringify(FASTOP_SIZE) " \n\t" \
297 ".type " name ", @function \n\t" \
302 #define FOP_FUNC(name) \
305 #define __FOP_RET(name) \
307 ".size " name ", .-" name "\n\t"
309 #define FOP_RET(name) \
312 #define __FOP_START(op, align) \
313 extern void em_##op(struct fastop *fake); \
314 asm(".pushsection .text, \"ax\" \n\t" \
315 ".global em_" #op " \n\t" \
316 ".align " __stringify(align) " \n\t" \
319 #define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
324 #define __FOPNOP(name) \
329 __FOPNOP(__stringify(__UNIQUE_ID(nop)))
331 #define FOP1E(op, dst) \
332 __FOP_FUNC(#op "_" #dst) \
333 "10: " #op " %" #dst " \n\t" \
334 __FOP_RET(#op "_" #dst)
336 #define FOP1EEX(op, dst) \
337 FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi)
339 #define FASTOP1(op) \
344 ON64(FOP1E(op##q, rax)) \
347 /* 1-operand, using src2 (for MUL/DIV r/m) */
348 #define FASTOP1SRC2(op, name) \
353 ON64(FOP1E(op, rcx)) \
356 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
357 #define FASTOP1SRC2EX(op, name) \
362 ON64(FOP1EEX(op, rcx)) \
365 #define FOP2E(op, dst, src) \
366 __FOP_FUNC(#op "_" #dst "_" #src) \
367 #op " %" #src ", %" #dst " \n\t" \
368 __FOP_RET(#op "_" #dst "_" #src)
370 #define FASTOP2(op) \
372 FOP2E(op##b, al, dl) \
373 FOP2E(op##w, ax, dx) \
374 FOP2E(op##l, eax, edx) \
375 ON64(FOP2E(op##q, rax, rdx)) \
378 /* 2 operand, word only */
379 #define FASTOP2W(op) \
382 FOP2E(op##w, ax, dx) \
383 FOP2E(op##l, eax, edx) \
384 ON64(FOP2E(op##q, rax, rdx)) \
387 /* 2 operand, src is CL */
388 #define FASTOP2CL(op) \
390 FOP2E(op##b, al, cl) \
391 FOP2E(op##w, ax, cl) \
392 FOP2E(op##l, eax, cl) \
393 ON64(FOP2E(op##q, rax, cl)) \
396 /* 2 operand, src and dest are reversed */
397 #define FASTOP2R(op, name) \
399 FOP2E(op##b, dl, al) \
400 FOP2E(op##w, dx, ax) \
401 FOP2E(op##l, edx, eax) \
402 ON64(FOP2E(op##q, rdx, rax)) \
405 #define FOP3E(op, dst, src, src2) \
406 __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
407 #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
408 __FOP_RET(#op "_" #dst "_" #src "_" #src2)
410 /* 3-operand, word-only, src2=cl */
411 #define FASTOP3WCL(op) \
414 FOP3E(op##w, ax, dx, cl) \
415 FOP3E(op##l, eax, edx, cl) \
416 ON64(FOP3E(op##q, rax, rdx, cl)) \
419 /* Special case for SETcc - 1 instruction per cc */
420 #define FOP_SETCC(op) \
446 "pushf; sbb %al, %al; popf \n\t"
451 * XXX: inoutclob user must know where the argument is being expanded.
452 * Using asm goto would allow us to remove _fault.
454 #define asm_safe(insn, inoutclob...) \
458 asm volatile("1:" insn "\n" \
460 _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \
461 : [_fault] "+r"(_fault) inoutclob ); \
463 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
466 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
467 enum x86_intercept intercept,
468 enum x86_intercept_stage stage)
470 struct x86_instruction_info info = {
471 .intercept = intercept,
472 .rep_prefix = ctxt->rep_prefix,
473 .modrm_mod = ctxt->modrm_mod,
474 .modrm_reg = ctxt->modrm_reg,
475 .modrm_rm = ctxt->modrm_rm,
476 .src_val = ctxt->src.val64,
477 .dst_val = ctxt->dst.val64,
478 .src_bytes = ctxt->src.bytes,
479 .dst_bytes = ctxt->dst.bytes,
480 .ad_bytes = ctxt->ad_bytes,
481 .next_rip = ctxt->eip,
484 return ctxt->ops->intercept(ctxt, &info, stage);
487 static void assign_masked(ulong *dest, ulong src, ulong mask)
489 *dest = (*dest & ~mask) | (src & mask);
492 static void assign_register(unsigned long *reg, u64 val, int bytes)
494 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
497 *(u8 *)reg = (u8)val;
500 *(u16 *)reg = (u16)val;
504 break; /* 64b: zero-extend */
511 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
513 return (1UL << (ctxt->ad_bytes << 3)) - 1;
516 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
519 struct desc_struct ss;
521 if (ctxt->mode == X86EMUL_MODE_PROT64)
523 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
524 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
527 static int stack_size(struct x86_emulate_ctxt *ctxt)
529 return (__fls(stack_mask(ctxt)) + 1) >> 3;
532 /* Access/update address held in a register, based on addressing mode. */
533 static inline unsigned long
534 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
536 if (ctxt->ad_bytes == sizeof(unsigned long))
539 return reg & ad_mask(ctxt);
542 static inline unsigned long
543 register_address(struct x86_emulate_ctxt *ctxt, int reg)
545 return address_mask(ctxt, reg_read(ctxt, reg));
548 static void masked_increment(ulong *reg, ulong mask, int inc)
550 assign_masked(reg, *reg + inc, mask);
554 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
556 ulong *preg = reg_rmw(ctxt, reg);
558 assign_register(preg, *preg + inc, ctxt->ad_bytes);
561 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
563 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
566 static u32 desc_limit_scaled(struct desc_struct *desc)
568 u32 limit = get_desc_limit(desc);
570 return desc->g ? (limit << 12) | 0xfff : limit;
573 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
575 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
578 return ctxt->ops->get_cached_segment_base(ctxt, seg);
581 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
582 u32 error, bool valid)
584 if (KVM_EMULATOR_BUG_ON(vec > 0x1f, ctxt))
585 return X86EMUL_UNHANDLEABLE;
587 ctxt->exception.vector = vec;
588 ctxt->exception.error_code = error;
589 ctxt->exception.error_code_valid = valid;
590 return X86EMUL_PROPAGATE_FAULT;
593 static int emulate_db(struct x86_emulate_ctxt *ctxt)
595 return emulate_exception(ctxt, DB_VECTOR, 0, false);
598 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
600 return emulate_exception(ctxt, GP_VECTOR, err, true);
603 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
605 return emulate_exception(ctxt, SS_VECTOR, err, true);
608 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
610 return emulate_exception(ctxt, UD_VECTOR, 0, false);
613 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
615 return emulate_exception(ctxt, TS_VECTOR, err, true);
618 static int emulate_de(struct x86_emulate_ctxt *ctxt)
620 return emulate_exception(ctxt, DE_VECTOR, 0, false);
623 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
625 return emulate_exception(ctxt, NM_VECTOR, 0, false);
628 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
631 struct desc_struct desc;
633 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
637 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
642 struct desc_struct desc;
644 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
645 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
648 static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
650 return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
653 static inline bool emul_is_noncanonical_address(u64 la,
654 struct x86_emulate_ctxt *ctxt)
656 return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt));
660 * x86 defines three classes of vector instructions: explicitly
661 * aligned, explicitly unaligned, and the rest, which change behaviour
662 * depending on whether they're AVX encoded or not.
664 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
665 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
666 * 512 bytes of data must be aligned to a 16 byte boundary.
668 static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
670 u64 alignment = ctxt->d & AlignMask;
672 if (likely(size < 16))
687 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
688 struct segmented_address addr,
689 unsigned *max_size, unsigned size,
690 enum x86emul_mode mode, ulong *linear,
693 struct desc_struct desc;
700 la = seg_base(ctxt, addr.seg) + addr.ea;
703 case X86EMUL_MODE_PROT64:
704 *linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags);
705 va_bits = ctxt_virt_addr_bits(ctxt);
706 if (!__is_canonical_address(la, va_bits))
709 *max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
710 if (size > *max_size)
714 *linear = la = (u32)la;
715 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
719 /* code segment in protected mode or read-only data segment */
720 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) &&
721 (flags & X86EMUL_F_WRITE))
723 /* unreadable code segment */
724 if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2))
726 lim = desc_limit_scaled(&desc);
727 if (!(desc.type & 8) && (desc.type & 4)) {
728 /* expand-down segment */
731 lim = desc.d ? 0xffffffff : 0xffff;
735 if (lim == 0xffffffff)
738 *max_size = (u64)lim + 1 - addr.ea;
739 if (size > *max_size)
744 if (la & (insn_alignment(ctxt, size) - 1))
745 return emulate_gp(ctxt, 0);
746 return X86EMUL_CONTINUE;
748 if (addr.seg == VCPU_SREG_SS)
749 return emulate_ss(ctxt, 0);
751 return emulate_gp(ctxt, 0);
754 static int linearize(struct x86_emulate_ctxt *ctxt,
755 struct segmented_address addr,
756 unsigned size, bool write,
760 return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear,
761 write ? X86EMUL_F_WRITE : 0);
764 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
769 struct segmented_address addr = { .seg = VCPU_SREG_CS,
772 if (ctxt->op_bytes != sizeof(unsigned long))
773 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
774 rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear,
776 if (rc == X86EMUL_CONTINUE)
777 ctxt->_eip = addr.ea;
781 static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
784 struct desc_struct cs;
788 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
790 if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
791 /* Real mode. cpu must not have long mode active */
793 return X86EMUL_UNHANDLEABLE;
794 ctxt->mode = X86EMUL_MODE_REAL;
795 return X86EMUL_CONTINUE;
798 if (ctxt->eflags & X86_EFLAGS_VM) {
799 /* Protected/VM86 mode. cpu must not have long mode active */
801 return X86EMUL_UNHANDLEABLE;
802 ctxt->mode = X86EMUL_MODE_VM86;
803 return X86EMUL_CONTINUE;
806 if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
807 return X86EMUL_UNHANDLEABLE;
809 if (efer & EFER_LMA) {
811 /* Proper long mode */
812 ctxt->mode = X86EMUL_MODE_PROT64;
814 /* 32 bit compatibility mode*/
815 ctxt->mode = X86EMUL_MODE_PROT32;
817 ctxt->mode = X86EMUL_MODE_PROT16;
820 /* Legacy 32 bit / 16 bit mode */
821 ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
824 return X86EMUL_CONTINUE;
827 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
829 return assign_eip(ctxt, dst);
832 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
834 int rc = emulator_recalc_and_set_mode(ctxt);
836 if (rc != X86EMUL_CONTINUE)
839 return assign_eip(ctxt, dst);
842 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
844 return assign_eip_near(ctxt, ctxt->_eip + rel);
847 static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
848 void *data, unsigned size)
850 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
853 static int linear_write_system(struct x86_emulate_ctxt *ctxt,
854 ulong linear, void *data,
857 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
860 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
861 struct segmented_address addr,
868 rc = linearize(ctxt, addr, size, false, &linear);
869 if (rc != X86EMUL_CONTINUE)
871 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
874 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
875 struct segmented_address addr,
882 rc = linearize(ctxt, addr, size, true, &linear);
883 if (rc != X86EMUL_CONTINUE)
885 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
889 * Prefetch the remaining bytes of the instruction without crossing page
890 * boundary if they are not in fetch_cache yet.
892 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
895 unsigned size, max_size;
896 unsigned long linear;
897 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
898 struct segmented_address addr = { .seg = VCPU_SREG_CS,
899 .ea = ctxt->eip + cur_size };
902 * We do not know exactly how many bytes will be needed, and
903 * __linearize is expensive, so fetch as much as possible. We
904 * just have to avoid going beyond the 15 byte limit, the end
905 * of the segment, or the end of the page.
907 * __linearize is called with size 0 so that it does not do any
908 * boundary check itself. Instead, we use max_size to check
911 rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear,
913 if (unlikely(rc != X86EMUL_CONTINUE))
916 size = min_t(unsigned, 15UL ^ cur_size, max_size);
917 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
920 * One instruction can only straddle two pages,
921 * and one has been loaded at the beginning of
922 * x86_decode_insn. So, if not enough bytes
923 * still, we must have hit the 15-byte boundary.
925 if (unlikely(size < op_size))
926 return emulate_gp(ctxt, 0);
928 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
929 size, &ctxt->exception);
930 if (unlikely(rc != X86EMUL_CONTINUE))
932 ctxt->fetch.end += size;
933 return X86EMUL_CONTINUE;
936 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
939 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
941 if (unlikely(done_size < size))
942 return __do_insn_fetch_bytes(ctxt, size - done_size);
944 return X86EMUL_CONTINUE;
947 /* Fetch next part of the instruction being emulated. */
948 #define insn_fetch(_type, _ctxt) \
951 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
952 if (rc != X86EMUL_CONTINUE) \
954 ctxt->_eip += sizeof(_type); \
955 memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
956 ctxt->fetch.ptr += sizeof(_type); \
960 #define insn_fetch_arr(_arr, _size, _ctxt) \
962 rc = do_insn_fetch_bytes(_ctxt, _size); \
963 if (rc != X86EMUL_CONTINUE) \
965 ctxt->_eip += (_size); \
966 memcpy(_arr, ctxt->fetch.ptr, _size); \
967 ctxt->fetch.ptr += (_size); \
971 * Given the 'reg' portion of a ModRM byte, and a register block, return a
972 * pointer into the block that addresses the relevant register.
973 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
975 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
979 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
981 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
982 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
984 p = reg_rmw(ctxt, modrm_reg);
988 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
989 struct segmented_address addr,
990 u16 *size, unsigned long *address, int op_bytes)
997 rc = segmented_read_std(ctxt, addr, size, 2);
998 if (rc != X86EMUL_CONTINUE)
1001 rc = segmented_read_std(ctxt, addr, address, op_bytes);
1015 FASTOP1SRC2(mul, mul_ex);
1016 FASTOP1SRC2(imul, imul_ex);
1017 FASTOP1SRC2EX(div, div_ex);
1018 FASTOP1SRC2EX(idiv, idiv_ex);
1047 FASTOP2R(cmp, cmp_r);
1049 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
1051 /* If src is zero, do not writeback, but update flags */
1052 if (ctxt->src.val == 0)
1053 ctxt->dst.type = OP_NONE;
1054 return fastop(ctxt, em_bsf);
1057 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1059 /* If src is zero, do not writeback, but update flags */
1060 if (ctxt->src.val == 0)
1061 ctxt->dst.type = OP_NONE;
1062 return fastop(ctxt, em_bsr);
1065 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1068 void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf);
1070 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1071 asm("push %[flags]; popf; " CALL_NOSPEC
1072 : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
1076 static void fetch_register_operand(struct operand *op)
1078 switch (op->bytes) {
1080 op->val = *(u8 *)op->addr.reg;
1083 op->val = *(u16 *)op->addr.reg;
1086 op->val = *(u32 *)op->addr.reg;
1089 op->val = *(u64 *)op->addr.reg;
1094 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1096 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1097 return emulate_nm(ctxt);
1100 asm volatile("fninit");
1102 return X86EMUL_CONTINUE;
1105 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1109 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1110 return emulate_nm(ctxt);
1113 asm volatile("fnstcw %0": "+m"(fcw));
1116 ctxt->dst.val = fcw;
1118 return X86EMUL_CONTINUE;
1121 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1125 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1126 return emulate_nm(ctxt);
1129 asm volatile("fnstsw %0": "+m"(fsw));
1132 ctxt->dst.val = fsw;
1134 return X86EMUL_CONTINUE;
1137 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1142 if (ctxt->d & ModRM)
1143 reg = ctxt->modrm_reg;
1145 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1147 if (ctxt->d & Sse) {
1151 kvm_read_sse_reg(reg, &op->vec_val);
1154 if (ctxt->d & Mmx) {
1163 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1164 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1166 fetch_register_operand(op);
1167 op->orig_val = op->val;
1170 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1172 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1173 ctxt->modrm_seg = VCPU_SREG_SS;
1176 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1180 int index_reg, base_reg, scale;
1181 int rc = X86EMUL_CONTINUE;
1184 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1185 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1186 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1188 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1189 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1190 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1191 ctxt->modrm_seg = VCPU_SREG_DS;
1193 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1195 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1196 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1198 if (ctxt->d & Sse) {
1201 op->addr.xmm = ctxt->modrm_rm;
1202 kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val);
1205 if (ctxt->d & Mmx) {
1208 op->addr.mm = ctxt->modrm_rm & 7;
1211 fetch_register_operand(op);
1217 if (ctxt->ad_bytes == 2) {
1218 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1219 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1220 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1221 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1223 /* 16-bit ModR/M decode. */
1224 switch (ctxt->modrm_mod) {
1226 if (ctxt->modrm_rm == 6)
1227 modrm_ea += insn_fetch(u16, ctxt);
1230 modrm_ea += insn_fetch(s8, ctxt);
1233 modrm_ea += insn_fetch(u16, ctxt);
1236 switch (ctxt->modrm_rm) {
1238 modrm_ea += bx + si;
1241 modrm_ea += bx + di;
1244 modrm_ea += bp + si;
1247 modrm_ea += bp + di;
1256 if (ctxt->modrm_mod != 0)
1263 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1264 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1265 ctxt->modrm_seg = VCPU_SREG_SS;
1266 modrm_ea = (u16)modrm_ea;
1268 /* 32/64-bit ModR/M decode. */
1269 if ((ctxt->modrm_rm & 7) == 4) {
1270 sib = insn_fetch(u8, ctxt);
1271 index_reg |= (sib >> 3) & 7;
1272 base_reg |= sib & 7;
1275 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1276 modrm_ea += insn_fetch(s32, ctxt);
1278 modrm_ea += reg_read(ctxt, base_reg);
1279 adjust_modrm_seg(ctxt, base_reg);
1280 /* Increment ESP on POP [ESP] */
1281 if ((ctxt->d & IncSP) &&
1282 base_reg == VCPU_REGS_RSP)
1283 modrm_ea += ctxt->op_bytes;
1286 modrm_ea += reg_read(ctxt, index_reg) << scale;
1287 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1288 modrm_ea += insn_fetch(s32, ctxt);
1289 if (ctxt->mode == X86EMUL_MODE_PROT64)
1290 ctxt->rip_relative = 1;
1292 base_reg = ctxt->modrm_rm;
1293 modrm_ea += reg_read(ctxt, base_reg);
1294 adjust_modrm_seg(ctxt, base_reg);
1296 switch (ctxt->modrm_mod) {
1298 modrm_ea += insn_fetch(s8, ctxt);
1301 modrm_ea += insn_fetch(s32, ctxt);
1305 op->addr.mem.ea = modrm_ea;
1306 if (ctxt->ad_bytes != 8)
1307 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1313 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1316 int rc = X86EMUL_CONTINUE;
1319 switch (ctxt->ad_bytes) {
1321 op->addr.mem.ea = insn_fetch(u16, ctxt);
1324 op->addr.mem.ea = insn_fetch(u32, ctxt);
1327 op->addr.mem.ea = insn_fetch(u64, ctxt);
1334 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1338 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1339 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1341 if (ctxt->src.bytes == 2)
1342 sv = (s16)ctxt->src.val & (s16)mask;
1343 else if (ctxt->src.bytes == 4)
1344 sv = (s32)ctxt->src.val & (s32)mask;
1346 sv = (s64)ctxt->src.val & (s64)mask;
1348 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1349 ctxt->dst.addr.mem.ea + (sv >> 3));
1352 /* only subword offset */
1353 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1356 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1357 unsigned long addr, void *dest, unsigned size)
1360 struct read_cache *mc = &ctxt->mem_read;
1362 if (mc->pos < mc->end)
1365 if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt))
1366 return X86EMUL_UNHANDLEABLE;
1368 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1370 if (rc != X86EMUL_CONTINUE)
1376 memcpy(dest, mc->data + mc->pos, size);
1378 return X86EMUL_CONTINUE;
1381 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1382 struct segmented_address addr,
1389 rc = linearize(ctxt, addr, size, false, &linear);
1390 if (rc != X86EMUL_CONTINUE)
1392 return read_emulated(ctxt, linear, data, size);
1395 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1396 struct segmented_address addr,
1403 rc = linearize(ctxt, addr, size, true, &linear);
1404 if (rc != X86EMUL_CONTINUE)
1406 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1410 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1411 struct segmented_address addr,
1412 const void *orig_data, const void *data,
1418 rc = linearize(ctxt, addr, size, true, &linear);
1419 if (rc != X86EMUL_CONTINUE)
1421 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1422 size, &ctxt->exception);
1425 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1426 unsigned int size, unsigned short port,
1429 struct read_cache *rc = &ctxt->io_read;
1431 if (rc->pos == rc->end) { /* refill pio read ahead */
1432 unsigned int in_page, n;
1433 unsigned int count = ctxt->rep_prefix ?
1434 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1435 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1436 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1437 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1438 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1441 rc->pos = rc->end = 0;
1442 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1447 if (ctxt->rep_prefix && (ctxt->d & String) &&
1448 !(ctxt->eflags & X86_EFLAGS_DF)) {
1449 ctxt->dst.data = rc->data + rc->pos;
1450 ctxt->dst.type = OP_MEM_STR;
1451 ctxt->dst.count = (rc->end - rc->pos) / size;
1454 memcpy(dest, rc->data + rc->pos, size);
1460 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1461 u16 index, struct desc_struct *desc)
1466 ctxt->ops->get_idt(ctxt, &dt);
1468 if (dt.size < index * 8 + 7)
1469 return emulate_gp(ctxt, index << 3 | 0x2);
1471 addr = dt.address + index * 8;
1472 return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1475 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1476 u16 selector, struct desc_ptr *dt)
1478 const struct x86_emulate_ops *ops = ctxt->ops;
1481 if (selector & 1 << 2) {
1482 struct desc_struct desc;
1485 memset(dt, 0, sizeof(*dt));
1486 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1490 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1491 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1493 ops->get_gdt(ctxt, dt);
1496 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1497 u16 selector, ulong *desc_addr_p)
1500 u16 index = selector >> 3;
1503 get_descriptor_table_ptr(ctxt, selector, &dt);
1505 if (dt.size < index * 8 + 7)
1506 return emulate_gp(ctxt, selector & 0xfffc);
1508 addr = dt.address + index * 8;
1510 #ifdef CONFIG_X86_64
1511 if (addr >> 32 != 0) {
1514 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1515 if (!(efer & EFER_LMA))
1520 *desc_addr_p = addr;
1521 return X86EMUL_CONTINUE;
1524 /* allowed just for 8 bytes segments */
1525 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1526 u16 selector, struct desc_struct *desc,
1531 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1532 if (rc != X86EMUL_CONTINUE)
1535 return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1538 /* allowed just for 8 bytes segments */
1539 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1540 u16 selector, struct desc_struct *desc)
1545 rc = get_descriptor_ptr(ctxt, selector, &addr);
1546 if (rc != X86EMUL_CONTINUE)
1549 return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1552 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1553 u16 selector, int seg, u8 cpl,
1554 enum x86_transfer_type transfer,
1555 struct desc_struct *desc)
1557 struct desc_struct seg_desc, old_desc;
1559 unsigned err_vec = GP_VECTOR;
1561 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1567 memset(&seg_desc, 0, sizeof(seg_desc));
1569 if (ctxt->mode == X86EMUL_MODE_REAL) {
1570 /* set real mode segment descriptor (keep limit etc. for
1572 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1573 set_desc_base(&seg_desc, selector << 4);
1575 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1576 /* VM86 needs a clean new segment descriptor */
1577 set_desc_base(&seg_desc, selector << 4);
1578 set_desc_limit(&seg_desc, 0xffff);
1588 /* TR should be in GDT only */
1589 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1592 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1593 if (null_selector) {
1594 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1597 if (seg == VCPU_SREG_SS) {
1598 if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1602 * ctxt->ops->set_segment expects the CPL to be in
1603 * SS.DPL, so fake an expand-up 32-bit data segment.
1613 /* Skip all following checks */
1617 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1618 if (ret != X86EMUL_CONTINUE)
1621 err_code = selector & 0xfffc;
1622 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1625 /* can't load system descriptor into segment selector */
1626 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1627 if (transfer == X86_TRANSFER_CALL_JMP)
1628 return X86EMUL_UNHANDLEABLE;
1637 * segment is not a writable data segment or segment
1638 * selector's RPL != CPL or DPL != CPL
1640 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1645 * KVM uses "none" when loading CS as part of emulating Real
1646 * Mode exceptions and IRET (handled above). In all other
1647 * cases, loading CS without a control transfer is a KVM bug.
1649 if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE))
1652 if (!(seg_desc.type & 8))
1655 if (transfer == X86_TRANSFER_RET) {
1656 /* RET can never return to an inner privilege level. */
1659 /* Outer-privilege level return is not implemented */
1661 return X86EMUL_UNHANDLEABLE;
1663 if (transfer == X86_TRANSFER_RET || transfer == X86_TRANSFER_TASK_SWITCH) {
1664 if (seg_desc.type & 4) {
1673 } else { /* X86_TRANSFER_CALL_JMP */
1674 if (seg_desc.type & 4) {
1680 if (rpl > cpl || dpl != cpl)
1684 /* in long-mode d/b must be clear if l is set */
1685 if (seg_desc.d && seg_desc.l) {
1688 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1689 if (efer & EFER_LMA)
1693 /* CS(RPL) <- CPL */
1694 selector = (selector & 0xfffc) | cpl;
1697 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1700 case VCPU_SREG_LDTR:
1701 if (seg_desc.s || seg_desc.type != 2)
1704 default: /* DS, ES, FS, or GS */
1706 * segment is not a data or readable code segment or
1707 * ((segment is a data or nonconforming code segment)
1708 * and ((RPL > DPL) or (CPL > DPL)))
1710 if ((seg_desc.type & 0xa) == 0x8 ||
1711 (((seg_desc.type & 0xc) != 0xc) &&
1712 (rpl > dpl || cpl > dpl)))
1718 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1723 /* mark segment as accessed */
1724 if (!(seg_desc.type & 1)) {
1726 ret = write_segment_descriptor(ctxt, selector,
1728 if (ret != X86EMUL_CONTINUE)
1731 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1732 ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1733 if (ret != X86EMUL_CONTINUE)
1735 if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1736 ((u64)base3 << 32), ctxt))
1737 return emulate_gp(ctxt, err_code);
1740 if (seg == VCPU_SREG_TR) {
1741 old_desc = seg_desc;
1742 seg_desc.type |= 2; /* busy */
1743 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1744 sizeof(seg_desc), &ctxt->exception);
1745 if (ret != X86EMUL_CONTINUE)
1749 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1752 return X86EMUL_CONTINUE;
1754 return emulate_exception(ctxt, err_vec, err_code, true);
1757 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1758 u16 selector, int seg)
1760 u8 cpl = ctxt->ops->cpl(ctxt);
1763 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1764 * they can load it at CPL<3 (Intel's manual says only LSS can,
1767 * However, the Intel manual says that putting IST=1/DPL=3 in
1768 * an interrupt gate will result in SS=3 (the AMD manual instead
1769 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1770 * and only forbid it here.
1772 if (seg == VCPU_SREG_SS && selector == 3 &&
1773 ctxt->mode == X86EMUL_MODE_PROT64)
1774 return emulate_exception(ctxt, GP_VECTOR, 0, true);
1776 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1777 X86_TRANSFER_NONE, NULL);
1780 static void write_register_operand(struct operand *op)
1782 return assign_register(op->addr.reg, op->val, op->bytes);
1785 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1789 write_register_operand(op);
1792 if (ctxt->lock_prefix)
1793 return segmented_cmpxchg(ctxt,
1799 return segmented_write(ctxt,
1804 return segmented_write(ctxt,
1807 op->bytes * op->count);
1809 kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1812 kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
1820 return X86EMUL_CONTINUE;
1823 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1825 struct segmented_address addr;
1827 rsp_increment(ctxt, -bytes);
1828 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1829 addr.seg = VCPU_SREG_SS;
1831 return segmented_write(ctxt, addr, data, bytes);
1834 static int em_push(struct x86_emulate_ctxt *ctxt)
1836 /* Disable writeback. */
1837 ctxt->dst.type = OP_NONE;
1838 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1841 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1842 void *dest, int len)
1845 struct segmented_address addr;
1847 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1848 addr.seg = VCPU_SREG_SS;
1849 rc = segmented_read(ctxt, addr, dest, len);
1850 if (rc != X86EMUL_CONTINUE)
1853 rsp_increment(ctxt, len);
1857 static int em_pop(struct x86_emulate_ctxt *ctxt)
1859 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1862 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1863 void *dest, int len)
1866 unsigned long val, change_mask;
1867 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1868 int cpl = ctxt->ops->cpl(ctxt);
1870 rc = emulate_pop(ctxt, &val, len);
1871 if (rc != X86EMUL_CONTINUE)
1874 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1875 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1876 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1877 X86_EFLAGS_AC | X86_EFLAGS_ID;
1879 switch(ctxt->mode) {
1880 case X86EMUL_MODE_PROT64:
1881 case X86EMUL_MODE_PROT32:
1882 case X86EMUL_MODE_PROT16:
1884 change_mask |= X86_EFLAGS_IOPL;
1886 change_mask |= X86_EFLAGS_IF;
1888 case X86EMUL_MODE_VM86:
1890 return emulate_gp(ctxt, 0);
1891 change_mask |= X86_EFLAGS_IF;
1893 default: /* real mode */
1894 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1898 *(unsigned long *)dest =
1899 (ctxt->eflags & ~change_mask) | (val & change_mask);
1904 static int em_popf(struct x86_emulate_ctxt *ctxt)
1906 ctxt->dst.type = OP_REG;
1907 ctxt->dst.addr.reg = &ctxt->eflags;
1908 ctxt->dst.bytes = ctxt->op_bytes;
1909 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1912 static int em_enter(struct x86_emulate_ctxt *ctxt)
1915 unsigned frame_size = ctxt->src.val;
1916 unsigned nesting_level = ctxt->src2.val & 31;
1920 return X86EMUL_UNHANDLEABLE;
1922 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1923 rc = push(ctxt, &rbp, stack_size(ctxt));
1924 if (rc != X86EMUL_CONTINUE)
1926 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1928 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1929 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1931 return X86EMUL_CONTINUE;
1934 static int em_leave(struct x86_emulate_ctxt *ctxt)
1936 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1938 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1941 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1943 int seg = ctxt->src2.val;
1945 ctxt->src.val = get_segment_selector(ctxt, seg);
1946 if (ctxt->op_bytes == 4) {
1947 rsp_increment(ctxt, -2);
1951 return em_push(ctxt);
1954 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1956 int seg = ctxt->src2.val;
1957 unsigned long selector;
1960 rc = emulate_pop(ctxt, &selector, 2);
1961 if (rc != X86EMUL_CONTINUE)
1964 if (seg == VCPU_SREG_SS)
1965 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1966 if (ctxt->op_bytes > 2)
1967 rsp_increment(ctxt, ctxt->op_bytes - 2);
1969 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1973 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1975 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1976 int rc = X86EMUL_CONTINUE;
1977 int reg = VCPU_REGS_RAX;
1979 while (reg <= VCPU_REGS_RDI) {
1980 (reg == VCPU_REGS_RSP) ?
1981 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1984 if (rc != X86EMUL_CONTINUE)
1993 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1995 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1996 return em_push(ctxt);
1999 static int em_popa(struct x86_emulate_ctxt *ctxt)
2001 int rc = X86EMUL_CONTINUE;
2002 int reg = VCPU_REGS_RDI;
2005 while (reg >= VCPU_REGS_RAX) {
2006 if (reg == VCPU_REGS_RSP) {
2007 rsp_increment(ctxt, ctxt->op_bytes);
2011 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
2012 if (rc != X86EMUL_CONTINUE)
2014 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
2020 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2022 const struct x86_emulate_ops *ops = ctxt->ops;
2029 /* TODO: Add limit checks */
2030 ctxt->src.val = ctxt->eflags;
2032 if (rc != X86EMUL_CONTINUE)
2035 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2037 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2039 if (rc != X86EMUL_CONTINUE)
2042 ctxt->src.val = ctxt->_eip;
2044 if (rc != X86EMUL_CONTINUE)
2047 ops->get_idt(ctxt, &dt);
2049 eip_addr = dt.address + (irq << 2);
2050 cs_addr = dt.address + (irq << 2) + 2;
2052 rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2053 if (rc != X86EMUL_CONTINUE)
2056 rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2057 if (rc != X86EMUL_CONTINUE)
2060 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2061 if (rc != X86EMUL_CONTINUE)
2069 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2073 invalidate_registers(ctxt);
2074 rc = __emulate_int_real(ctxt, irq);
2075 if (rc == X86EMUL_CONTINUE)
2076 writeback_registers(ctxt);
2080 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2082 switch(ctxt->mode) {
2083 case X86EMUL_MODE_REAL:
2084 return __emulate_int_real(ctxt, irq);
2085 case X86EMUL_MODE_VM86:
2086 case X86EMUL_MODE_PROT16:
2087 case X86EMUL_MODE_PROT32:
2088 case X86EMUL_MODE_PROT64:
2090 /* Protected mode interrupts unimplemented yet */
2091 return X86EMUL_UNHANDLEABLE;
2095 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2097 int rc = X86EMUL_CONTINUE;
2098 unsigned long temp_eip = 0;
2099 unsigned long temp_eflags = 0;
2100 unsigned long cs = 0;
2101 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2102 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2103 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2104 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2105 X86_EFLAGS_AC | X86_EFLAGS_ID |
2107 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2110 /* TODO: Add stack limit check */
2112 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2114 if (rc != X86EMUL_CONTINUE)
2117 if (temp_eip & ~0xffff)
2118 return emulate_gp(ctxt, 0);
2120 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2122 if (rc != X86EMUL_CONTINUE)
2125 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2127 if (rc != X86EMUL_CONTINUE)
2130 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2132 if (rc != X86EMUL_CONTINUE)
2135 ctxt->_eip = temp_eip;
2137 if (ctxt->op_bytes == 4)
2138 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2139 else if (ctxt->op_bytes == 2) {
2140 ctxt->eflags &= ~0xffff;
2141 ctxt->eflags |= temp_eflags;
2144 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2145 ctxt->eflags |= X86_EFLAGS_FIXED;
2146 ctxt->ops->set_nmi_mask(ctxt, false);
2151 static int em_iret(struct x86_emulate_ctxt *ctxt)
2153 switch(ctxt->mode) {
2154 case X86EMUL_MODE_REAL:
2155 return emulate_iret_real(ctxt);
2156 case X86EMUL_MODE_VM86:
2157 case X86EMUL_MODE_PROT16:
2158 case X86EMUL_MODE_PROT32:
2159 case X86EMUL_MODE_PROT64:
2161 /* iret from protected mode unimplemented yet */
2162 return X86EMUL_UNHANDLEABLE;
2166 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2170 struct desc_struct new_desc;
2171 u8 cpl = ctxt->ops->cpl(ctxt);
2173 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2175 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2176 X86_TRANSFER_CALL_JMP,
2178 if (rc != X86EMUL_CONTINUE)
2181 rc = assign_eip_far(ctxt, ctxt->src.val);
2182 /* Error handling is not implemented. */
2183 if (rc != X86EMUL_CONTINUE)
2184 return X86EMUL_UNHANDLEABLE;
2189 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2191 return assign_eip_near(ctxt, ctxt->src.val);
2194 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2199 old_eip = ctxt->_eip;
2200 rc = assign_eip_near(ctxt, ctxt->src.val);
2201 if (rc != X86EMUL_CONTINUE)
2203 ctxt->src.val = old_eip;
2208 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2210 u64 old = ctxt->dst.orig_val64;
2212 if (ctxt->dst.bytes == 16)
2213 return X86EMUL_UNHANDLEABLE;
2215 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2216 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2217 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2218 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2219 ctxt->eflags &= ~X86_EFLAGS_ZF;
2221 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2222 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2224 ctxt->eflags |= X86_EFLAGS_ZF;
2226 return X86EMUL_CONTINUE;
2229 static int em_ret(struct x86_emulate_ctxt *ctxt)
2234 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2235 if (rc != X86EMUL_CONTINUE)
2238 return assign_eip_near(ctxt, eip);
2241 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2244 unsigned long eip, cs;
2245 int cpl = ctxt->ops->cpl(ctxt);
2246 struct desc_struct new_desc;
2248 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2249 if (rc != X86EMUL_CONTINUE)
2251 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2252 if (rc != X86EMUL_CONTINUE)
2254 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2257 if (rc != X86EMUL_CONTINUE)
2259 rc = assign_eip_far(ctxt, eip);
2260 /* Error handling is not implemented. */
2261 if (rc != X86EMUL_CONTINUE)
2262 return X86EMUL_UNHANDLEABLE;
2267 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2271 rc = em_ret_far(ctxt);
2272 if (rc != X86EMUL_CONTINUE)
2274 rsp_increment(ctxt, ctxt->src.val);
2275 return X86EMUL_CONTINUE;
2278 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2280 /* Save real source value, then compare EAX against destination. */
2281 ctxt->dst.orig_val = ctxt->dst.val;
2282 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2283 ctxt->src.orig_val = ctxt->src.val;
2284 ctxt->src.val = ctxt->dst.orig_val;
2285 fastop(ctxt, em_cmp);
2287 if (ctxt->eflags & X86_EFLAGS_ZF) {
2288 /* Success: write back to memory; no update of EAX */
2289 ctxt->src.type = OP_NONE;
2290 ctxt->dst.val = ctxt->src.orig_val;
2292 /* Failure: write the value we saw to EAX. */
2293 ctxt->src.type = OP_REG;
2294 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2295 ctxt->src.val = ctxt->dst.orig_val;
2296 /* Create write-cycle to dest by writing the same value */
2297 ctxt->dst.val = ctxt->dst.orig_val;
2299 return X86EMUL_CONTINUE;
2302 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2304 int seg = ctxt->src2.val;
2308 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2310 rc = load_segment_descriptor(ctxt, sel, seg);
2311 if (rc != X86EMUL_CONTINUE)
2314 ctxt->dst.val = ctxt->src.val;
2318 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2320 if (!ctxt->ops->is_smm(ctxt))
2321 return emulate_ud(ctxt);
2323 if (ctxt->ops->leave_smm(ctxt))
2324 ctxt->ops->triple_fault(ctxt);
2326 return emulator_recalc_and_set_mode(ctxt);
2330 setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
2332 cs->l = 0; /* will be adjusted later */
2333 set_desc_base(cs, 0); /* flat segment */
2334 cs->g = 1; /* 4kb granularity */
2335 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2336 cs->type = 0x0b; /* Read, Execute, Accessed */
2338 cs->dpl = 0; /* will be adjusted later */
2343 set_desc_base(ss, 0); /* flat segment */
2344 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2345 ss->g = 1; /* 4kb granularity */
2347 ss->type = 0x03; /* Read/Write, Accessed */
2348 ss->d = 1; /* 32bit stack segment */
2355 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2357 u32 eax, ebx, ecx, edx;
2360 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2361 return is_guest_vendor_intel(ebx, ecx, edx);
2364 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2366 const struct x86_emulate_ops *ops = ctxt->ops;
2367 u32 eax, ebx, ecx, edx;
2370 * syscall should always be enabled in longmode - so only become
2371 * vendor specific (cpuid) if other modes are active...
2373 if (ctxt->mode == X86EMUL_MODE_PROT64)
2378 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2380 * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
2381 * 64bit guest with a 32bit compat-app running will #UD !! While this
2382 * behaviour can be fixed (by emulating) into AMD response - CPUs of
2383 * AMD can't behave like Intel.
2385 if (is_guest_vendor_intel(ebx, ecx, edx))
2388 if (is_guest_vendor_amd(ebx, ecx, edx) ||
2389 is_guest_vendor_hygon(ebx, ecx, edx))
2393 * default: (not Intel, not AMD, not Hygon), apply Intel's
2399 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2401 const struct x86_emulate_ops *ops = ctxt->ops;
2402 struct desc_struct cs, ss;
2407 /* syscall is not available in real mode */
2408 if (ctxt->mode == X86EMUL_MODE_REAL ||
2409 ctxt->mode == X86EMUL_MODE_VM86)
2410 return emulate_ud(ctxt);
2412 if (!(em_syscall_is_enabled(ctxt)))
2413 return emulate_ud(ctxt);
2415 ops->get_msr(ctxt, MSR_EFER, &efer);
2416 if (!(efer & EFER_SCE))
2417 return emulate_ud(ctxt);
2419 setup_syscalls_segments(&cs, &ss);
2420 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2422 cs_sel = (u16)(msr_data & 0xfffc);
2423 ss_sel = (u16)(msr_data + 8);
2425 if (efer & EFER_LMA) {
2429 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2430 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2432 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2433 if (efer & EFER_LMA) {
2434 #ifdef CONFIG_X86_64
2435 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2438 ctxt->mode == X86EMUL_MODE_PROT64 ?
2439 MSR_LSTAR : MSR_CSTAR, &msr_data);
2440 ctxt->_eip = msr_data;
2442 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2443 ctxt->eflags &= ~msr_data;
2444 ctxt->eflags |= X86_EFLAGS_FIXED;
2448 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2449 ctxt->_eip = (u32)msr_data;
2451 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2454 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2455 return X86EMUL_CONTINUE;
2458 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2460 const struct x86_emulate_ops *ops = ctxt->ops;
2461 struct desc_struct cs, ss;
2466 ops->get_msr(ctxt, MSR_EFER, &efer);
2467 /* inject #GP if in real mode */
2468 if (ctxt->mode == X86EMUL_MODE_REAL)
2469 return emulate_gp(ctxt, 0);
2472 * Not recognized on AMD in compat mode (but is recognized in legacy
2475 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2476 && !vendor_intel(ctxt))
2477 return emulate_ud(ctxt);
2479 /* sysenter/sysexit have not been tested in 64bit mode. */
2480 if (ctxt->mode == X86EMUL_MODE_PROT64)
2481 return X86EMUL_UNHANDLEABLE;
2483 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2484 if ((msr_data & 0xfffc) == 0x0)
2485 return emulate_gp(ctxt, 0);
2487 setup_syscalls_segments(&cs, &ss);
2488 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2489 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2490 ss_sel = cs_sel + 8;
2491 if (efer & EFER_LMA) {
2496 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2497 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2499 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2500 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2502 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2503 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2505 if (efer & EFER_LMA)
2506 ctxt->mode = X86EMUL_MODE_PROT64;
2508 return X86EMUL_CONTINUE;
2511 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2513 const struct x86_emulate_ops *ops = ctxt->ops;
2514 struct desc_struct cs, ss;
2515 u64 msr_data, rcx, rdx;
2517 u16 cs_sel = 0, ss_sel = 0;
2519 /* inject #GP if in real mode or Virtual 8086 mode */
2520 if (ctxt->mode == X86EMUL_MODE_REAL ||
2521 ctxt->mode == X86EMUL_MODE_VM86)
2522 return emulate_gp(ctxt, 0);
2524 setup_syscalls_segments(&cs, &ss);
2526 if ((ctxt->rex_prefix & 0x8) != 0x0)
2527 usermode = X86EMUL_MODE_PROT64;
2529 usermode = X86EMUL_MODE_PROT32;
2531 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2532 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2536 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2538 case X86EMUL_MODE_PROT32:
2539 cs_sel = (u16)(msr_data + 16);
2540 if ((msr_data & 0xfffc) == 0x0)
2541 return emulate_gp(ctxt, 0);
2542 ss_sel = (u16)(msr_data + 24);
2546 case X86EMUL_MODE_PROT64:
2547 cs_sel = (u16)(msr_data + 32);
2548 if (msr_data == 0x0)
2549 return emulate_gp(ctxt, 0);
2550 ss_sel = cs_sel + 8;
2553 if (emul_is_noncanonical_address(rcx, ctxt) ||
2554 emul_is_noncanonical_address(rdx, ctxt))
2555 return emulate_gp(ctxt, 0);
2558 cs_sel |= SEGMENT_RPL_MASK;
2559 ss_sel |= SEGMENT_RPL_MASK;
2561 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2562 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2565 ctxt->mode = usermode;
2566 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2568 return X86EMUL_CONTINUE;
2571 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2574 if (ctxt->mode == X86EMUL_MODE_REAL)
2576 if (ctxt->mode == X86EMUL_MODE_VM86)
2578 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2579 return ctxt->ops->cpl(ctxt) > iopl;
2582 #define VMWARE_PORT_VMPORT (0x5658)
2583 #define VMWARE_PORT_VMRPC (0x5659)
2585 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2588 const struct x86_emulate_ops *ops = ctxt->ops;
2589 struct desc_struct tr_seg;
2592 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2593 unsigned mask = (1 << len) - 1;
2597 * VMware allows access to these ports even if denied
2598 * by TSS I/O permission bitmap. Mimic behavior.
2600 if (enable_vmware_backdoor &&
2601 ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
2604 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2607 if (desc_limit_scaled(&tr_seg) < 103)
2609 base = get_desc_base(&tr_seg);
2610 #ifdef CONFIG_X86_64
2611 base |= ((u64)base3) << 32;
2613 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
2614 if (r != X86EMUL_CONTINUE)
2616 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2618 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
2619 if (r != X86EMUL_CONTINUE)
2621 if ((perm >> bit_idx) & mask)
2626 static bool emulator_io_permitted(struct x86_emulate_ctxt *ctxt,
2632 if (emulator_bad_iopl(ctxt))
2633 if (!emulator_io_port_access_allowed(ctxt, port, len))
2636 ctxt->perm_ok = true;
2641 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2644 * Intel CPUs mask the counter and pointers in quite strange
2645 * manner when ECX is zero due to REP-string optimizations.
2647 #ifdef CONFIG_X86_64
2648 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2651 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2654 case 0xa4: /* movsb */
2655 case 0xa5: /* movsd/w */
2656 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2658 case 0xaa: /* stosb */
2659 case 0xab: /* stosd/w */
2660 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2665 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2666 struct tss_segment_16 *tss)
2668 tss->ip = ctxt->_eip;
2669 tss->flag = ctxt->eflags;
2670 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2671 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2672 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2673 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2674 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2675 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2676 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2677 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2679 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2680 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2681 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2682 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2683 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2686 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2687 struct tss_segment_16 *tss)
2692 ctxt->_eip = tss->ip;
2693 ctxt->eflags = tss->flag | 2;
2694 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2695 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2696 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2697 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2698 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2699 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2700 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2701 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2704 * SDM says that segment selectors are loaded before segment
2707 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2708 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2709 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2710 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2711 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2716 * Now load segment descriptors. If fault happens at this stage
2717 * it is handled in a context of new task
2719 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2720 X86_TRANSFER_TASK_SWITCH, NULL);
2721 if (ret != X86EMUL_CONTINUE)
2723 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2724 X86_TRANSFER_TASK_SWITCH, NULL);
2725 if (ret != X86EMUL_CONTINUE)
2727 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2728 X86_TRANSFER_TASK_SWITCH, NULL);
2729 if (ret != X86EMUL_CONTINUE)
2731 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2732 X86_TRANSFER_TASK_SWITCH, NULL);
2733 if (ret != X86EMUL_CONTINUE)
2735 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2736 X86_TRANSFER_TASK_SWITCH, NULL);
2737 if (ret != X86EMUL_CONTINUE)
2740 return X86EMUL_CONTINUE;
2743 static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2744 ulong old_tss_base, struct desc_struct *new_desc)
2746 struct tss_segment_16 tss_seg;
2748 u32 new_tss_base = get_desc_base(new_desc);
2750 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2751 if (ret != X86EMUL_CONTINUE)
2754 save_state_to_tss16(ctxt, &tss_seg);
2756 ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2757 if (ret != X86EMUL_CONTINUE)
2760 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2761 if (ret != X86EMUL_CONTINUE)
2764 if (old_tss_sel != 0xffff) {
2765 tss_seg.prev_task_link = old_tss_sel;
2767 ret = linear_write_system(ctxt, new_tss_base,
2768 &tss_seg.prev_task_link,
2769 sizeof(tss_seg.prev_task_link));
2770 if (ret != X86EMUL_CONTINUE)
2774 return load_state_from_tss16(ctxt, &tss_seg);
2777 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2778 struct tss_segment_32 *tss)
2780 /* CR3 and ldt selector are not saved intentionally */
2781 tss->eip = ctxt->_eip;
2782 tss->eflags = ctxt->eflags;
2783 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
2784 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
2785 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
2786 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
2787 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
2788 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
2789 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
2790 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
2792 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2793 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2794 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2795 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2796 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
2797 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
2800 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2801 struct tss_segment_32 *tss)
2806 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
2807 return emulate_gp(ctxt, 0);
2808 ctxt->_eip = tss->eip;
2809 ctxt->eflags = tss->eflags | 2;
2811 /* General purpose registers */
2812 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
2813 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
2814 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
2815 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
2816 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
2817 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
2818 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
2819 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
2822 * SDM says that segment selectors are loaded before segment
2823 * descriptors. This is important because CPL checks will
2826 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
2827 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2828 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2829 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2830 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2831 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
2832 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2835 * If we're switching between Protected Mode and VM86, we need to make
2836 * sure to update the mode before loading the segment descriptors so
2837 * that the selectors are interpreted correctly.
2839 if (ctxt->eflags & X86_EFLAGS_VM) {
2840 ctxt->mode = X86EMUL_MODE_VM86;
2843 ctxt->mode = X86EMUL_MODE_PROT32;
2848 * Now load segment descriptors. If fault happens at this stage
2849 * it is handled in a context of new task
2851 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2852 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
2853 if (ret != X86EMUL_CONTINUE)
2855 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2856 X86_TRANSFER_TASK_SWITCH, NULL);
2857 if (ret != X86EMUL_CONTINUE)
2859 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2860 X86_TRANSFER_TASK_SWITCH, NULL);
2861 if (ret != X86EMUL_CONTINUE)
2863 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2864 X86_TRANSFER_TASK_SWITCH, NULL);
2865 if (ret != X86EMUL_CONTINUE)
2867 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2868 X86_TRANSFER_TASK_SWITCH, NULL);
2869 if (ret != X86EMUL_CONTINUE)
2871 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2872 X86_TRANSFER_TASK_SWITCH, NULL);
2873 if (ret != X86EMUL_CONTINUE)
2875 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2876 X86_TRANSFER_TASK_SWITCH, NULL);
2881 static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2882 ulong old_tss_base, struct desc_struct *new_desc)
2884 struct tss_segment_32 tss_seg;
2886 u32 new_tss_base = get_desc_base(new_desc);
2887 u32 eip_offset = offsetof(struct tss_segment_32, eip);
2888 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
2890 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2891 if (ret != X86EMUL_CONTINUE)
2894 save_state_to_tss32(ctxt, &tss_seg);
2896 /* Only GP registers and segment selectors are saved */
2897 ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
2898 ldt_sel_offset - eip_offset);
2899 if (ret != X86EMUL_CONTINUE)
2902 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2903 if (ret != X86EMUL_CONTINUE)
2906 if (old_tss_sel != 0xffff) {
2907 tss_seg.prev_task_link = old_tss_sel;
2909 ret = linear_write_system(ctxt, new_tss_base,
2910 &tss_seg.prev_task_link,
2911 sizeof(tss_seg.prev_task_link));
2912 if (ret != X86EMUL_CONTINUE)
2916 return load_state_from_tss32(ctxt, &tss_seg);
2919 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2920 u16 tss_selector, int idt_index, int reason,
2921 bool has_error_code, u32 error_code)
2923 const struct x86_emulate_ops *ops = ctxt->ops;
2924 struct desc_struct curr_tss_desc, next_tss_desc;
2926 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
2927 ulong old_tss_base =
2928 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
2930 ulong desc_addr, dr7;
2932 /* FIXME: old_tss_base == ~0 ? */
2934 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
2935 if (ret != X86EMUL_CONTINUE)
2937 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
2938 if (ret != X86EMUL_CONTINUE)
2941 /* FIXME: check that next_tss_desc is tss */
2944 * Check privileges. The three cases are task switch caused by...
2946 * 1. jmp/call/int to task gate: Check against DPL of the task gate
2947 * 2. Exception/IRQ/iret: No check is performed
2948 * 3. jmp/call to TSS/task-gate: No check is performed since the
2949 * hardware checks it before exiting.
2951 if (reason == TASK_SWITCH_GATE) {
2952 if (idt_index != -1) {
2953 /* Software interrupts */
2954 struct desc_struct task_gate_desc;
2957 ret = read_interrupt_descriptor(ctxt, idt_index,
2959 if (ret != X86EMUL_CONTINUE)
2962 dpl = task_gate_desc.dpl;
2963 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2964 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
2968 desc_limit = desc_limit_scaled(&next_tss_desc);
2969 if (!next_tss_desc.p ||
2970 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2971 desc_limit < 0x2b)) {
2972 return emulate_ts(ctxt, tss_selector & 0xfffc);
2975 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2976 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2977 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
2980 if (reason == TASK_SWITCH_IRET)
2981 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2983 /* set back link to prev task only if NT bit is set in eflags
2984 note that old_tss_sel is not used after this point */
2985 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2986 old_tss_sel = 0xffff;
2988 if (next_tss_desc.type & 8)
2989 ret = task_switch_32(ctxt, old_tss_sel, old_tss_base, &next_tss_desc);
2991 ret = task_switch_16(ctxt, old_tss_sel,
2992 old_tss_base, &next_tss_desc);
2993 if (ret != X86EMUL_CONTINUE)
2996 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2997 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2999 if (reason != TASK_SWITCH_IRET) {
3000 next_tss_desc.type |= (1 << 1); /* set busy flag */
3001 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3004 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3005 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3007 if (has_error_code) {
3008 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3009 ctxt->lock_prefix = 0;
3010 ctxt->src.val = (unsigned long) error_code;
3011 ret = em_push(ctxt);
3014 ops->get_dr(ctxt, 7, &dr7);
3015 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3020 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3021 u16 tss_selector, int idt_index, int reason,
3022 bool has_error_code, u32 error_code)
3026 invalidate_registers(ctxt);
3027 ctxt->_eip = ctxt->eip;
3028 ctxt->dst.type = OP_NONE;
3030 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3031 has_error_code, error_code);
3033 if (rc == X86EMUL_CONTINUE) {
3034 ctxt->eip = ctxt->_eip;
3035 writeback_registers(ctxt);
3038 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3041 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3044 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3046 register_address_increment(ctxt, reg, df * op->bytes);
3047 op->addr.mem.ea = register_address(ctxt, reg);
3050 static int em_das(struct x86_emulate_ctxt *ctxt)
3053 bool af, cf, old_cf;
3055 cf = ctxt->eflags & X86_EFLAGS_CF;
3061 af = ctxt->eflags & X86_EFLAGS_AF;
3062 if ((al & 0x0f) > 9 || af) {
3064 cf = old_cf | (al >= 250);
3069 if (old_al > 0x99 || old_cf) {
3075 /* Set PF, ZF, SF */
3076 ctxt->src.type = OP_IMM;
3078 ctxt->src.bytes = 1;
3079 fastop(ctxt, em_or);
3080 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3082 ctxt->eflags |= X86_EFLAGS_CF;
3084 ctxt->eflags |= X86_EFLAGS_AF;
3085 return X86EMUL_CONTINUE;
3088 static int em_aam(struct x86_emulate_ctxt *ctxt)
3092 if (ctxt->src.val == 0)
3093 return emulate_de(ctxt);
3095 al = ctxt->dst.val & 0xff;
3096 ah = al / ctxt->src.val;
3097 al %= ctxt->src.val;
3099 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3101 /* Set PF, ZF, SF */
3102 ctxt->src.type = OP_IMM;
3104 ctxt->src.bytes = 1;
3105 fastop(ctxt, em_or);
3107 return X86EMUL_CONTINUE;
3110 static int em_aad(struct x86_emulate_ctxt *ctxt)
3112 u8 al = ctxt->dst.val & 0xff;
3113 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3115 al = (al + (ah * ctxt->src.val)) & 0xff;
3117 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3119 /* Set PF, ZF, SF */
3120 ctxt->src.type = OP_IMM;
3122 ctxt->src.bytes = 1;
3123 fastop(ctxt, em_or);
3125 return X86EMUL_CONTINUE;
3128 static int em_call(struct x86_emulate_ctxt *ctxt)
3131 long rel = ctxt->src.val;
3133 ctxt->src.val = (unsigned long)ctxt->_eip;
3134 rc = jmp_rel(ctxt, rel);
3135 if (rc != X86EMUL_CONTINUE)
3137 return em_push(ctxt);
3140 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3145 struct desc_struct old_desc, new_desc;
3146 const struct x86_emulate_ops *ops = ctxt->ops;
3147 int cpl = ctxt->ops->cpl(ctxt);
3148 enum x86emul_mode prev_mode = ctxt->mode;
3150 old_eip = ctxt->_eip;
3151 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3153 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3154 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3155 X86_TRANSFER_CALL_JMP, &new_desc);
3156 if (rc != X86EMUL_CONTINUE)
3159 rc = assign_eip_far(ctxt, ctxt->src.val);
3160 if (rc != X86EMUL_CONTINUE)
3163 ctxt->src.val = old_cs;
3165 if (rc != X86EMUL_CONTINUE)
3168 ctxt->src.val = old_eip;
3170 /* If we failed, we tainted the memory, but the very least we should
3172 if (rc != X86EMUL_CONTINUE) {
3173 pr_warn_once("faulting far call emulation tainted memory\n");
3178 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3179 ctxt->mode = prev_mode;
3184 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3189 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3190 if (rc != X86EMUL_CONTINUE)
3192 rc = assign_eip_near(ctxt, eip);
3193 if (rc != X86EMUL_CONTINUE)
3195 rsp_increment(ctxt, ctxt->src.val);
3196 return X86EMUL_CONTINUE;
3199 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3201 /* Write back the register source. */
3202 ctxt->src.val = ctxt->dst.val;
3203 write_register_operand(&ctxt->src);
3205 /* Write back the memory destination with implicit LOCK prefix. */
3206 ctxt->dst.val = ctxt->src.orig_val;
3207 ctxt->lock_prefix = 1;
3208 return X86EMUL_CONTINUE;
3211 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3213 ctxt->dst.val = ctxt->src2.val;
3214 return fastop(ctxt, em_imul);
3217 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3219 ctxt->dst.type = OP_REG;
3220 ctxt->dst.bytes = ctxt->src.bytes;
3221 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3222 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3224 return X86EMUL_CONTINUE;
3227 static int em_rdpid(struct x86_emulate_ctxt *ctxt)
3231 if (!ctxt->ops->guest_has_rdpid(ctxt))
3232 return emulate_ud(ctxt);
3234 ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
3235 ctxt->dst.val = tsc_aux;
3236 return X86EMUL_CONTINUE;
3239 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3243 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3244 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3245 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3246 return X86EMUL_CONTINUE;
3249 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3253 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3254 return emulate_gp(ctxt, 0);
3255 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3256 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3257 return X86EMUL_CONTINUE;
3260 static int em_mov(struct x86_emulate_ctxt *ctxt)
3262 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3263 return X86EMUL_CONTINUE;
3266 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3270 if (!ctxt->ops->guest_has_movbe(ctxt))
3271 return emulate_ud(ctxt);
3273 switch (ctxt->op_bytes) {
3276 * From MOVBE definition: "...When the operand size is 16 bits,
3277 * the upper word of the destination register remains unchanged
3280 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3281 * rules so we have to do the operation almost per hand.
3283 tmp = (u16)ctxt->src.val;
3284 ctxt->dst.val &= ~0xffffUL;
3285 ctxt->dst.val |= (unsigned long)swab16(tmp);
3288 ctxt->dst.val = swab32((u32)ctxt->src.val);
3291 ctxt->dst.val = swab64(ctxt->src.val);
3296 return X86EMUL_CONTINUE;
3299 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3301 int cr_num = ctxt->modrm_reg;
3304 if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
3305 return emulate_gp(ctxt, 0);
3307 /* Disable writeback. */
3308 ctxt->dst.type = OP_NONE;
3312 * CR0 write might have updated CR0.PE and/or CR0.PG
3313 * which can affect the cpu's execution mode.
3315 r = emulator_recalc_and_set_mode(ctxt);
3316 if (r != X86EMUL_CONTINUE)
3320 return X86EMUL_CONTINUE;
3323 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3327 if (ctxt->mode == X86EMUL_MODE_PROT64)
3328 val = ctxt->src.val & ~0ULL;
3330 val = ctxt->src.val & ~0U;
3332 /* #UD condition is already handled. */
3333 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3334 return emulate_gp(ctxt, 0);
3336 /* Disable writeback. */
3337 ctxt->dst.type = OP_NONE;
3338 return X86EMUL_CONTINUE;
3341 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3343 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3347 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3348 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3349 r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data);
3351 if (r == X86EMUL_PROPAGATE_FAULT)
3352 return emulate_gp(ctxt, 0);
3357 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3359 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3363 r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data);
3365 if (r == X86EMUL_PROPAGATE_FAULT)
3366 return emulate_gp(ctxt, 0);
3368 if (r == X86EMUL_CONTINUE) {
3369 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3370 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3375 static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3377 if (segment > VCPU_SREG_GS &&
3378 (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3379 ctxt->ops->cpl(ctxt) > 0)
3380 return emulate_gp(ctxt, 0);
3382 ctxt->dst.val = get_segment_selector(ctxt, segment);
3383 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3384 ctxt->dst.bytes = 2;
3385 return X86EMUL_CONTINUE;
3388 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3390 if (ctxt->modrm_reg > VCPU_SREG_GS)
3391 return emulate_ud(ctxt);
3393 return em_store_sreg(ctxt, ctxt->modrm_reg);
3396 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3398 u16 sel = ctxt->src.val;
3400 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3401 return emulate_ud(ctxt);
3403 if (ctxt->modrm_reg == VCPU_SREG_SS)
3404 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3406 /* Disable writeback. */
3407 ctxt->dst.type = OP_NONE;
3408 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3411 static int em_sldt(struct x86_emulate_ctxt *ctxt)
3413 return em_store_sreg(ctxt, VCPU_SREG_LDTR);
3416 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3418 u16 sel = ctxt->src.val;
3420 /* Disable writeback. */
3421 ctxt->dst.type = OP_NONE;
3422 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3425 static int em_str(struct x86_emulate_ctxt *ctxt)
3427 return em_store_sreg(ctxt, VCPU_SREG_TR);
3430 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3432 u16 sel = ctxt->src.val;
3434 /* Disable writeback. */
3435 ctxt->dst.type = OP_NONE;
3436 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3439 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3443 unsigned int max_size;
3445 rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode,
3446 &linear, X86EMUL_F_INVLPG);
3447 if (rc == X86EMUL_CONTINUE)
3448 ctxt->ops->invlpg(ctxt, linear);
3449 /* Disable writeback. */
3450 ctxt->dst.type = OP_NONE;
3451 return X86EMUL_CONTINUE;
3454 static int em_clts(struct x86_emulate_ctxt *ctxt)
3458 cr0 = ctxt->ops->get_cr(ctxt, 0);
3460 ctxt->ops->set_cr(ctxt, 0, cr0);
3461 return X86EMUL_CONTINUE;
3464 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3466 int rc = ctxt->ops->fix_hypercall(ctxt);
3468 if (rc != X86EMUL_CONTINUE)
3471 /* Let the processor re-execute the fixed hypercall */
3472 ctxt->_eip = ctxt->eip;
3473 /* Disable writeback. */
3474 ctxt->dst.type = OP_NONE;
3475 return X86EMUL_CONTINUE;
3478 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3479 void (*get)(struct x86_emulate_ctxt *ctxt,
3480 struct desc_ptr *ptr))
3482 struct desc_ptr desc_ptr;
3484 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3485 ctxt->ops->cpl(ctxt) > 0)
3486 return emulate_gp(ctxt, 0);
3488 if (ctxt->mode == X86EMUL_MODE_PROT64)
3490 get(ctxt, &desc_ptr);
3491 if (ctxt->op_bytes == 2) {
3493 desc_ptr.address &= 0x00ffffff;
3495 /* Disable writeback. */
3496 ctxt->dst.type = OP_NONE;
3497 return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3498 &desc_ptr, 2 + ctxt->op_bytes);
3501 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3503 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3506 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3508 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3511 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3513 struct desc_ptr desc_ptr;
3516 if (ctxt->mode == X86EMUL_MODE_PROT64)
3518 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3519 &desc_ptr.size, &desc_ptr.address,
3521 if (rc != X86EMUL_CONTINUE)
3523 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3524 emul_is_noncanonical_address(desc_ptr.address, ctxt))
3525 return emulate_gp(ctxt, 0);
3527 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3529 ctxt->ops->set_idt(ctxt, &desc_ptr);
3530 /* Disable writeback. */
3531 ctxt->dst.type = OP_NONE;
3532 return X86EMUL_CONTINUE;
3535 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3537 return em_lgdt_lidt(ctxt, true);
3540 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3542 return em_lgdt_lidt(ctxt, false);
3545 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3547 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3548 ctxt->ops->cpl(ctxt) > 0)
3549 return emulate_gp(ctxt, 0);
3551 if (ctxt->dst.type == OP_MEM)
3552 ctxt->dst.bytes = 2;
3553 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3554 return X86EMUL_CONTINUE;
3557 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3559 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3560 | (ctxt->src.val & 0x0f));
3561 ctxt->dst.type = OP_NONE;
3562 return X86EMUL_CONTINUE;
3565 static int em_loop(struct x86_emulate_ctxt *ctxt)
3567 int rc = X86EMUL_CONTINUE;
3569 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3570 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3571 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3572 rc = jmp_rel(ctxt, ctxt->src.val);
3577 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3579 int rc = X86EMUL_CONTINUE;
3581 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3582 rc = jmp_rel(ctxt, ctxt->src.val);
3587 static int em_in(struct x86_emulate_ctxt *ctxt)
3589 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3591 return X86EMUL_IO_NEEDED;
3593 return X86EMUL_CONTINUE;
3596 static int em_out(struct x86_emulate_ctxt *ctxt)
3598 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3600 /* Disable writeback. */
3601 ctxt->dst.type = OP_NONE;
3602 return X86EMUL_CONTINUE;
3605 static int em_cli(struct x86_emulate_ctxt *ctxt)
3607 if (emulator_bad_iopl(ctxt))
3608 return emulate_gp(ctxt, 0);
3610 ctxt->eflags &= ~X86_EFLAGS_IF;
3611 return X86EMUL_CONTINUE;
3614 static int em_sti(struct x86_emulate_ctxt *ctxt)
3616 if (emulator_bad_iopl(ctxt))
3617 return emulate_gp(ctxt, 0);
3619 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3620 ctxt->eflags |= X86_EFLAGS_IF;
3621 return X86EMUL_CONTINUE;
3624 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3626 u32 eax, ebx, ecx, edx;
3629 ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
3630 if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3631 ctxt->ops->cpl(ctxt)) {
3632 return emulate_gp(ctxt, 0);
3635 eax = reg_read(ctxt, VCPU_REGS_RAX);
3636 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3637 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3638 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3639 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3640 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3641 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3642 return X86EMUL_CONTINUE;
3645 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3649 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3651 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3653 ctxt->eflags &= ~0xffUL;
3654 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3655 return X86EMUL_CONTINUE;
3658 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3660 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3661 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3662 return X86EMUL_CONTINUE;
3665 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3667 switch (ctxt->op_bytes) {
3668 #ifdef CONFIG_X86_64
3670 asm("bswap %0" : "+r"(ctxt->dst.val));
3674 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3677 return X86EMUL_CONTINUE;
3680 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3682 /* emulating clflush regardless of cpuid */
3683 return X86EMUL_CONTINUE;
3686 static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
3688 /* emulating clflushopt regardless of cpuid */
3689 return X86EMUL_CONTINUE;
3692 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3694 ctxt->dst.val = (s32) ctxt->src.val;
3695 return X86EMUL_CONTINUE;
3698 static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3700 if (!ctxt->ops->guest_has_fxsr(ctxt))
3701 return emulate_ud(ctxt);
3703 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3704 return emulate_nm(ctxt);
3707 * Don't emulate a case that should never be hit, instead of working
3708 * around a lack of fxsave64/fxrstor64 on old compilers.
3710 if (ctxt->mode >= X86EMUL_MODE_PROT64)
3711 return X86EMUL_UNHANDLEABLE;
3713 return X86EMUL_CONTINUE;
3717 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
3718 * and restore MXCSR.
3720 static size_t __fxstate_size(int nregs)
3722 return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
3725 static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
3728 if (ctxt->mode == X86EMUL_MODE_PROT64)
3729 return __fxstate_size(16);
3731 cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
3732 return __fxstate_size(cr4_osfxsr ? 8 : 0);
3736 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
3739 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
3740 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
3742 * 3) 64-bit mode with REX.W prefix
3743 * - like (2), but XMM 8-15 are being saved and restored
3744 * 4) 64-bit mode without REX.W prefix
3745 * - like (3), but FIP and FDP are 64 bit
3747 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
3748 * desired result. (4) is not emulated.
3750 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
3751 * and FPU DS) should match.
3753 static int em_fxsave(struct x86_emulate_ctxt *ctxt)
3755 struct fxregs_state fx_state;
3758 rc = check_fxsr(ctxt);
3759 if (rc != X86EMUL_CONTINUE)
3764 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
3768 if (rc != X86EMUL_CONTINUE)
3771 return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
3772 fxstate_size(ctxt));
3776 * FXRSTOR might restore XMM registers not provided by the guest. Fill
3777 * in the host registers (via FXSAVE) instead, so they won't be modified.
3778 * (preemption has to stay disabled until FXRSTOR).
3780 * Use noinline to keep the stack for other functions called by callers small.
3782 static noinline int fxregs_fixup(struct fxregs_state *fx_state,
3783 const size_t used_size)
3785 struct fxregs_state fx_tmp;
3788 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
3789 memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
3790 __fxstate_size(16) - used_size);
3795 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
3797 struct fxregs_state fx_state;
3801 rc = check_fxsr(ctxt);
3802 if (rc != X86EMUL_CONTINUE)
3805 size = fxstate_size(ctxt);
3806 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
3807 if (rc != X86EMUL_CONTINUE)
3812 if (size < __fxstate_size(16)) {
3813 rc = fxregs_fixup(&fx_state, size);
3814 if (rc != X86EMUL_CONTINUE)
3818 if (fx_state.mxcsr >> 16) {
3819 rc = emulate_gp(ctxt, 0);
3823 if (rc == X86EMUL_CONTINUE)
3824 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
3832 static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
3836 if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE))
3837 return emulate_ud(ctxt);
3839 eax = reg_read(ctxt, VCPU_REGS_RAX);
3840 edx = reg_read(ctxt, VCPU_REGS_RDX);
3841 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3843 if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
3844 return emulate_gp(ctxt, 0);
3846 return X86EMUL_CONTINUE;
3849 static bool valid_cr(int nr)
3861 static int check_cr_access(struct x86_emulate_ctxt *ctxt)
3863 if (!valid_cr(ctxt->modrm_reg))
3864 return emulate_ud(ctxt);
3866 return X86EMUL_CONTINUE;
3869 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
3873 ctxt->ops->get_dr(ctxt, 7, &dr7);
3875 return dr7 & DR7_GD;
3878 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3880 int dr = ctxt->modrm_reg;
3884 return emulate_ud(ctxt);
3886 cr4 = ctxt->ops->get_cr(ctxt, 4);
3887 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3888 return emulate_ud(ctxt);
3890 if (check_dr7_gd(ctxt)) {
3893 ctxt->ops->get_dr(ctxt, 6, &dr6);
3894 dr6 &= ~DR_TRAP_BITS;
3895 dr6 |= DR6_BD | DR6_ACTIVE_LOW;
3896 ctxt->ops->set_dr(ctxt, 6, dr6);
3897 return emulate_db(ctxt);
3900 return X86EMUL_CONTINUE;
3903 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
3905 u64 new_val = ctxt->src.val64;
3906 int dr = ctxt->modrm_reg;
3908 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
3909 return emulate_gp(ctxt, 0);
3911 return check_dr_read(ctxt);
3914 static int check_svme(struct x86_emulate_ctxt *ctxt)
3918 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3920 if (!(efer & EFER_SVME))
3921 return emulate_ud(ctxt);
3923 return X86EMUL_CONTINUE;
3926 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
3928 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
3930 /* Valid physical address? */
3931 if (rax & 0xffff000000000000ULL)
3932 return emulate_gp(ctxt, 0);
3934 return check_svme(ctxt);
3937 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
3939 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3941 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
3942 return emulate_gp(ctxt, 0);
3944 return X86EMUL_CONTINUE;
3947 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
3949 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3950 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3953 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
3954 * in Ring3 when CR4.PCE=0.
3956 if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
3957 return X86EMUL_CONTINUE;
3960 * If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0. The CR0.PE
3961 * check however is unnecessary because CPL is always 0 outside
3964 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3965 ctxt->ops->check_pmc(ctxt, rcx))
3966 return emulate_gp(ctxt, 0);
3968 return X86EMUL_CONTINUE;
3971 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
3973 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
3974 if (!emulator_io_permitted(ctxt, ctxt->src.val, ctxt->dst.bytes))
3975 return emulate_gp(ctxt, 0);
3977 return X86EMUL_CONTINUE;
3980 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3982 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
3983 if (!emulator_io_permitted(ctxt, ctxt->dst.val, ctxt->src.bytes))
3984 return emulate_gp(ctxt, 0);
3986 return X86EMUL_CONTINUE;
3989 #define D(_y) { .flags = (_y) }
3990 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
3991 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
3992 .intercept = x86_intercept_##_i, .check_perm = (_p) }
3993 #define N D(NotImpl)
3994 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3995 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3996 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3997 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
3998 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
3999 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4000 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4001 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4002 #define II(_f, _e, _i) \
4003 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4004 #define IIP(_f, _e, _i, _p) \
4005 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4006 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4007 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4009 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4010 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4011 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4012 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4013 #define I2bvIP(_f, _e, _i, _p) \
4014 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4016 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4017 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4018 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4020 static const struct opcode group7_rm0[] = {
4022 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4026 static const struct opcode group7_rm1[] = {
4027 DI(SrcNone | Priv, monitor),
4028 DI(SrcNone | Priv, mwait),
4032 static const struct opcode group7_rm2[] = {
4034 II(ImplicitOps | Priv, em_xsetbv, xsetbv),
4038 static const struct opcode group7_rm3[] = {
4039 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4040 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4041 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4042 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4043 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4044 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4045 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4046 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4049 static const struct opcode group7_rm7[] = {
4051 DIP(SrcNone, rdtscp, check_rdtsc),
4055 static const struct opcode group1[] = {
4057 F(Lock | PageTable, em_or),
4060 F(Lock | PageTable, em_and),
4066 static const struct opcode group1A[] = {
4067 I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4070 static const struct opcode group2[] = {
4071 F(DstMem | ModRM, em_rol),
4072 F(DstMem | ModRM, em_ror),
4073 F(DstMem | ModRM, em_rcl),
4074 F(DstMem | ModRM, em_rcr),
4075 F(DstMem | ModRM, em_shl),
4076 F(DstMem | ModRM, em_shr),
4077 F(DstMem | ModRM, em_shl),
4078 F(DstMem | ModRM, em_sar),
4081 static const struct opcode group3[] = {
4082 F(DstMem | SrcImm | NoWrite, em_test),
4083 F(DstMem | SrcImm | NoWrite, em_test),
4084 F(DstMem | SrcNone | Lock, em_not),
4085 F(DstMem | SrcNone | Lock, em_neg),
4086 F(DstXacc | Src2Mem, em_mul_ex),
4087 F(DstXacc | Src2Mem, em_imul_ex),
4088 F(DstXacc | Src2Mem, em_div_ex),
4089 F(DstXacc | Src2Mem, em_idiv_ex),
4092 static const struct opcode group4[] = {
4093 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4094 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4098 static const struct opcode group5[] = {
4099 F(DstMem | SrcNone | Lock, em_inc),
4100 F(DstMem | SrcNone | Lock, em_dec),
4101 I(SrcMem | NearBranch | IsBranch, em_call_near_abs),
4102 I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
4103 I(SrcMem | NearBranch | IsBranch, em_jmp_abs),
4104 I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
4105 I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
4108 static const struct opcode group6[] = {
4109 II(Prot | DstMem, em_sldt, sldt),
4110 II(Prot | DstMem, em_str, str),
4111 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4112 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4116 static const struct group_dual group7 = { {
4117 II(Mov | DstMem, em_sgdt, sgdt),
4118 II(Mov | DstMem, em_sidt, sidt),
4119 II(SrcMem | Priv, em_lgdt, lgdt),
4120 II(SrcMem | Priv, em_lidt, lidt),
4121 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4122 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4123 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4129 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4130 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4134 static const struct opcode group8[] = {
4136 F(DstMem | SrcImmByte | NoWrite, em_bt),
4137 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4138 F(DstMem | SrcImmByte | Lock, em_btr),
4139 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4143 * The "memory" destination is actually always a register, since we come
4144 * from the register case of group9.
4146 static const struct gprefix pfx_0f_c7_7 = {
4147 N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
4151 static const struct group_dual group9 = { {
4152 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4154 N, N, N, N, N, N, N,
4155 GP(0, &pfx_0f_c7_7),
4158 static const struct opcode group11[] = {
4159 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4163 static const struct gprefix pfx_0f_ae_7 = {
4164 I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4167 static const struct group_dual group15 = { {
4168 I(ModRM | Aligned16, em_fxsave),
4169 I(ModRM | Aligned16, em_fxrstor),
4170 N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4172 N, N, N, N, N, N, N, N,
4175 static const struct gprefix pfx_0f_6f_0f_7f = {
4176 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4179 static const struct instr_dual instr_dual_0f_2b = {
4183 static const struct gprefix pfx_0f_2b = {
4184 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4187 static const struct gprefix pfx_0f_10_0f_11 = {
4188 I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
4191 static const struct gprefix pfx_0f_28_0f_29 = {
4192 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4195 static const struct gprefix pfx_0f_e7 = {
4196 N, I(Sse, em_mov), N, N,
4199 static const struct escape escape_d9 = { {
4200 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4203 N, N, N, N, N, N, N, N,
4205 N, N, N, N, N, N, N, N,
4207 N, N, N, N, N, N, N, N,
4209 N, N, N, N, N, N, N, N,
4211 N, N, N, N, N, N, N, N,
4213 N, N, N, N, N, N, N, N,
4215 N, N, N, N, N, N, N, N,
4217 N, N, N, N, N, N, N, N,
4220 static const struct escape escape_db = { {
4221 N, N, N, N, N, N, N, N,
4224 N, N, N, N, N, N, N, N,
4226 N, N, N, N, N, N, N, N,
4228 N, N, N, N, N, N, N, N,
4230 N, N, N, N, N, N, N, N,
4232 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4234 N, N, N, N, N, N, N, N,
4236 N, N, N, N, N, N, N, N,
4238 N, N, N, N, N, N, N, N,
4241 static const struct escape escape_dd = { {
4242 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4245 N, N, N, N, N, N, N, N,
4247 N, N, N, N, N, N, N, N,
4249 N, N, N, N, N, N, N, N,
4251 N, N, N, N, N, N, N, N,
4253 N, N, N, N, N, N, N, N,
4255 N, N, N, N, N, N, N, N,
4257 N, N, N, N, N, N, N, N,
4259 N, N, N, N, N, N, N, N,
4262 static const struct instr_dual instr_dual_0f_c3 = {
4263 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4266 static const struct mode_dual mode_dual_63 = {
4267 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4270 static const struct instr_dual instr_dual_8d = {
4271 D(DstReg | SrcMem | ModRM | NoAccess), N
4274 static const struct opcode opcode_table[256] = {
4276 F6ALU(Lock, em_add),
4277 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4278 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4280 F6ALU(Lock | PageTable, em_or),
4281 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4284 F6ALU(Lock, em_adc),
4285 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4286 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4288 F6ALU(Lock, em_sbb),
4289 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4290 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4292 F6ALU(Lock | PageTable, em_and), N, N,
4294 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4296 F6ALU(Lock, em_xor), N, N,
4298 F6ALU(NoWrite, em_cmp), N, N,
4300 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4302 X8(I(SrcReg | Stack, em_push)),
4304 X8(I(DstReg | Stack, em_pop)),
4306 I(ImplicitOps | Stack | No64, em_pusha),
4307 I(ImplicitOps | Stack | No64, em_popa),
4308 N, MD(ModRM, &mode_dual_63),
4311 I(SrcImm | Mov | Stack, em_push),
4312 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4313 I(SrcImmByte | Mov | Stack, em_push),
4314 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4315 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4316 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4318 X16(D(SrcImmByte | NearBranch | IsBranch)),
4320 G(ByteOp | DstMem | SrcImm, group1),
4321 G(DstMem | SrcImm, group1),
4322 G(ByteOp | DstMem | SrcImm | No64, group1),
4323 G(DstMem | SrcImmByte, group1),
4324 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4325 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4327 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4328 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4329 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4330 ID(0, &instr_dual_8d),
4331 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4334 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4336 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4337 I(SrcImmFAddr | No64 | IsBranch, em_call_far), N,
4338 II(ImplicitOps | Stack, em_pushf, pushf),
4339 II(ImplicitOps | Stack, em_popf, popf),
4340 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4342 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4343 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4344 I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
4345 F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4347 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4348 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4349 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4350 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4352 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4354 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4356 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4357 I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm),
4358 I(ImplicitOps | NearBranch | IsBranch, em_ret),
4359 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4360 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4361 G(ByteOp, group11), G(0, group11),
4363 I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
4364 I(Stack | IsBranch, em_leave),
4365 I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
4366 I(ImplicitOps | IsBranch, em_ret_far),
4367 D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
4368 D(ImplicitOps | No64 | IsBranch),
4369 II(ImplicitOps | IsBranch, em_iret, iret),
4371 G(Src2One | ByteOp, group2), G(Src2One, group2),
4372 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4373 I(DstAcc | SrcImmUByte | No64, em_aam),
4374 I(DstAcc | SrcImmUByte | No64, em_aad),
4375 F(DstAcc | ByteOp | No64, em_salc),
4376 I(DstAcc | SrcXLat | ByteOp, em_mov),
4378 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4380 X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
4381 I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
4382 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4383 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4385 I(SrcImm | NearBranch | IsBranch, em_call),
4386 D(SrcImm | ImplicitOps | NearBranch | IsBranch),
4387 I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
4388 D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
4389 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4390 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4392 N, DI(ImplicitOps, icebp), N, N,
4393 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4394 G(ByteOp, group3), G(0, group3),
4396 D(ImplicitOps), D(ImplicitOps),
4397 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4398 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4401 static const struct opcode twobyte_table[256] = {
4403 G(0, group6), GD(0, &group7), N, N,
4404 N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall),
4405 II(ImplicitOps | Priv, em_clts, clts), N,
4406 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4407 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4409 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
4410 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
4412 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
4413 D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4414 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4415 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4416 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4417 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4419 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4420 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4421 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4423 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4426 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4427 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4428 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4431 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4432 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4433 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4434 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4435 I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter),
4436 I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit),
4438 N, N, N, N, N, N, N, N,
4440 X16(D(DstReg | SrcMem | ModRM)),
4442 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4447 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4452 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4454 X16(D(SrcImm | NearBranch | IsBranch)),
4456 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4458 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4459 II(ImplicitOps, em_cpuid, cpuid),
4460 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4461 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4462 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4464 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4465 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4466 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4467 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4468 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4469 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4471 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4472 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4473 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4474 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4475 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4476 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4480 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4481 I(DstReg | SrcMem | ModRM, em_bsf_c),
4482 I(DstReg | SrcMem | ModRM, em_bsr_c),
4483 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4485 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4486 N, ID(0, &instr_dual_0f_c3),
4487 N, N, N, GD(0, &group9),
4489 X8(I(DstReg, em_bswap)),
4491 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4493 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4494 N, N, N, N, N, N, N, N,
4496 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4499 static const struct instr_dual instr_dual_0f_38_f0 = {
4500 I(DstReg | SrcMem | Mov, em_movbe), N
4503 static const struct instr_dual instr_dual_0f_38_f1 = {
4504 I(DstMem | SrcReg | Mov, em_movbe), N
4507 static const struct gprefix three_byte_0f_38_f0 = {
4508 ID(0, &instr_dual_0f_38_f0), N, N, N
4511 static const struct gprefix three_byte_0f_38_f1 = {
4512 ID(0, &instr_dual_0f_38_f1), N, N, N
4516 * Insns below are selected by the prefix which indexed by the third opcode
4519 static const struct opcode opcode_map_0f_38[256] = {
4521 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4523 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4525 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4526 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4547 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4551 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4557 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4558 unsigned size, bool sign_extension)
4560 int rc = X86EMUL_CONTINUE;
4564 op->addr.mem.ea = ctxt->_eip;
4565 /* NB. Immediates are sign-extended as necessary. */
4566 switch (op->bytes) {
4568 op->val = insn_fetch(s8, ctxt);
4571 op->val = insn_fetch(s16, ctxt);
4574 op->val = insn_fetch(s32, ctxt);
4577 op->val = insn_fetch(s64, ctxt);
4580 if (!sign_extension) {
4581 switch (op->bytes) {
4589 op->val &= 0xffffffff;
4597 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4600 int rc = X86EMUL_CONTINUE;
4604 decode_register_operand(ctxt, op);
4607 rc = decode_imm(ctxt, op, 1, false);
4610 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4614 if (ctxt->d & BitOp)
4615 fetch_bit_operand(ctxt);
4616 op->orig_val = op->val;
4619 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4623 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4624 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4625 fetch_register_operand(op);
4626 op->orig_val = op->val;
4630 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4631 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4632 fetch_register_operand(op);
4633 op->orig_val = op->val;
4636 if (ctxt->d & ByteOp) {
4641 op->bytes = ctxt->op_bytes;
4642 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4643 fetch_register_operand(op);
4644 op->orig_val = op->val;
4648 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4650 register_address(ctxt, VCPU_REGS_RDI);
4651 op->addr.mem.seg = VCPU_SREG_ES;
4658 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4659 fetch_register_operand(op);
4664 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4667 rc = decode_imm(ctxt, op, 1, true);
4675 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4678 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4681 ctxt->memop.bytes = 1;
4682 if (ctxt->memop.type == OP_REG) {
4683 ctxt->memop.addr.reg = decode_register(ctxt,
4684 ctxt->modrm_rm, true);
4685 fetch_register_operand(&ctxt->memop);
4689 ctxt->memop.bytes = 2;
4692 ctxt->memop.bytes = 4;
4695 rc = decode_imm(ctxt, op, 2, false);
4698 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4702 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4704 register_address(ctxt, VCPU_REGS_RSI);
4705 op->addr.mem.seg = ctxt->seg_override;
4711 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4714 reg_read(ctxt, VCPU_REGS_RBX) +
4715 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4716 op->addr.mem.seg = ctxt->seg_override;
4721 op->addr.mem.ea = ctxt->_eip;
4722 op->bytes = ctxt->op_bytes + 2;
4723 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4726 ctxt->memop.bytes = ctxt->op_bytes + 2;
4730 op->val = VCPU_SREG_ES;
4734 op->val = VCPU_SREG_CS;
4738 op->val = VCPU_SREG_SS;
4742 op->val = VCPU_SREG_DS;
4746 op->val = VCPU_SREG_FS;
4750 op->val = VCPU_SREG_GS;
4753 /* Special instructions do their own operand decoding. */
4755 op->type = OP_NONE; /* Disable writeback. */
4763 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
4765 int rc = X86EMUL_CONTINUE;
4766 int mode = ctxt->mode;
4767 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4768 bool op_prefix = false;
4769 bool has_seg_override = false;
4770 struct opcode opcode;
4772 struct desc_struct desc;
4774 ctxt->memop.type = OP_NONE;
4775 ctxt->memopp = NULL;
4776 ctxt->_eip = ctxt->eip;
4777 ctxt->fetch.ptr = ctxt->fetch.data;
4778 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4779 ctxt->opcode_len = 1;
4780 ctxt->intercept = x86_intercept_none;
4782 memcpy(ctxt->fetch.data, insn, insn_len);
4784 rc = __do_insn_fetch_bytes(ctxt, 1);
4785 if (rc != X86EMUL_CONTINUE)
4790 case X86EMUL_MODE_REAL:
4791 case X86EMUL_MODE_VM86:
4792 def_op_bytes = def_ad_bytes = 2;
4793 ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
4795 def_op_bytes = def_ad_bytes = 4;
4797 case X86EMUL_MODE_PROT16:
4798 def_op_bytes = def_ad_bytes = 2;
4800 case X86EMUL_MODE_PROT32:
4801 def_op_bytes = def_ad_bytes = 4;
4803 #ifdef CONFIG_X86_64
4804 case X86EMUL_MODE_PROT64:
4810 return EMULATION_FAILED;
4813 ctxt->op_bytes = def_op_bytes;
4814 ctxt->ad_bytes = def_ad_bytes;
4816 /* Legacy prefixes. */
4818 switch (ctxt->b = insn_fetch(u8, ctxt)) {
4819 case 0x66: /* operand-size override */
4821 /* switch between 2/4 bytes */
4822 ctxt->op_bytes = def_op_bytes ^ 6;
4824 case 0x67: /* address-size override */
4825 if (mode == X86EMUL_MODE_PROT64)
4826 /* switch between 4/8 bytes */
4827 ctxt->ad_bytes = def_ad_bytes ^ 12;
4829 /* switch between 2/4 bytes */
4830 ctxt->ad_bytes = def_ad_bytes ^ 6;
4832 case 0x26: /* ES override */
4833 has_seg_override = true;
4834 ctxt->seg_override = VCPU_SREG_ES;
4836 case 0x2e: /* CS override */
4837 has_seg_override = true;
4838 ctxt->seg_override = VCPU_SREG_CS;
4840 case 0x36: /* SS override */
4841 has_seg_override = true;
4842 ctxt->seg_override = VCPU_SREG_SS;
4844 case 0x3e: /* DS override */
4845 has_seg_override = true;
4846 ctxt->seg_override = VCPU_SREG_DS;
4848 case 0x64: /* FS override */
4849 has_seg_override = true;
4850 ctxt->seg_override = VCPU_SREG_FS;
4852 case 0x65: /* GS override */
4853 has_seg_override = true;
4854 ctxt->seg_override = VCPU_SREG_GS;
4856 case 0x40 ... 0x4f: /* REX */
4857 if (mode != X86EMUL_MODE_PROT64)
4859 ctxt->rex_prefix = ctxt->b;
4861 case 0xf0: /* LOCK */
4862 ctxt->lock_prefix = 1;
4864 case 0xf2: /* REPNE/REPNZ */
4865 case 0xf3: /* REP/REPE/REPZ */
4866 ctxt->rep_prefix = ctxt->b;
4872 /* Any legacy prefix after a REX prefix nullifies its effect. */
4874 ctxt->rex_prefix = 0;
4880 if (ctxt->rex_prefix & 8)
4881 ctxt->op_bytes = 8; /* REX.W */
4883 /* Opcode byte(s). */
4884 opcode = opcode_table[ctxt->b];
4885 /* Two-byte opcode? */
4886 if (ctxt->b == 0x0f) {
4887 ctxt->opcode_len = 2;
4888 ctxt->b = insn_fetch(u8, ctxt);
4889 opcode = twobyte_table[ctxt->b];
4891 /* 0F_38 opcode map */
4892 if (ctxt->b == 0x38) {
4893 ctxt->opcode_len = 3;
4894 ctxt->b = insn_fetch(u8, ctxt);
4895 opcode = opcode_map_0f_38[ctxt->b];
4898 ctxt->d = opcode.flags;
4900 if (ctxt->d & ModRM)
4901 ctxt->modrm = insn_fetch(u8, ctxt);
4903 /* vex-prefix instructions are not implemented */
4904 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4905 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
4909 while (ctxt->d & GroupMask) {
4910 switch (ctxt->d & GroupMask) {
4912 goffset = (ctxt->modrm >> 3) & 7;
4913 opcode = opcode.u.group[goffset];
4916 goffset = (ctxt->modrm >> 3) & 7;
4917 if ((ctxt->modrm >> 6) == 3)
4918 opcode = opcode.u.gdual->mod3[goffset];
4920 opcode = opcode.u.gdual->mod012[goffset];
4923 goffset = ctxt->modrm & 7;
4924 opcode = opcode.u.group[goffset];
4927 if (ctxt->rep_prefix && op_prefix)
4928 return EMULATION_FAILED;
4929 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
4930 switch (simd_prefix) {
4931 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
4932 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
4933 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
4934 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4938 if (ctxt->modrm > 0xbf) {
4939 size_t size = ARRAY_SIZE(opcode.u.esc->high);
4940 u32 index = array_index_nospec(
4941 ctxt->modrm - 0xc0, size);
4943 opcode = opcode.u.esc->high[index];
4945 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
4949 if ((ctxt->modrm >> 6) == 3)
4950 opcode = opcode.u.idual->mod3;
4952 opcode = opcode.u.idual->mod012;
4955 if (ctxt->mode == X86EMUL_MODE_PROT64)
4956 opcode = opcode.u.mdual->mode64;
4958 opcode = opcode.u.mdual->mode32;
4961 return EMULATION_FAILED;
4964 ctxt->d &= ~(u64)GroupMask;
4965 ctxt->d |= opcode.flags;
4968 ctxt->is_branch = opcode.flags & IsBranch;
4972 return EMULATION_FAILED;
4974 ctxt->execute = opcode.u.execute;
4976 if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
4977 likely(!(ctxt->d & EmulateOnUD)))
4978 return EMULATION_FAILED;
4980 if (unlikely(ctxt->d &
4981 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
4984 * These are copied unconditionally here, and checked unconditionally
4985 * in x86_emulate_insn.
4987 ctxt->check_perm = opcode.check_perm;
4988 ctxt->intercept = opcode.intercept;
4990 if (ctxt->d & NotImpl)
4991 return EMULATION_FAILED;
4993 if (mode == X86EMUL_MODE_PROT64) {
4994 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
4996 else if (ctxt->d & NearBranch)
5000 if (ctxt->d & Op3264) {
5001 if (mode == X86EMUL_MODE_PROT64)
5007 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5011 ctxt->op_bytes = 16;
5012 else if (ctxt->d & Mmx)
5016 /* ModRM and SIB bytes. */
5017 if (ctxt->d & ModRM) {
5018 rc = decode_modrm(ctxt, &ctxt->memop);
5019 if (!has_seg_override) {
5020 has_seg_override = true;
5021 ctxt->seg_override = ctxt->modrm_seg;
5023 } else if (ctxt->d & MemAbs)
5024 rc = decode_abs(ctxt, &ctxt->memop);
5025 if (rc != X86EMUL_CONTINUE)
5028 if (!has_seg_override)
5029 ctxt->seg_override = VCPU_SREG_DS;
5031 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5034 * Decode and fetch the source operand: register, memory
5037 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5038 if (rc != X86EMUL_CONTINUE)
5042 * Decode and fetch the second source operand: register, memory
5045 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5046 if (rc != X86EMUL_CONTINUE)
5049 /* Decode and fetch the destination operand: register or memory. */
5050 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5052 if (ctxt->rip_relative && likely(ctxt->memopp))
5053 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5054 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5057 if (rc == X86EMUL_PROPAGATE_FAULT)
5058 ctxt->have_exception = true;
5059 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5062 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5064 return ctxt->d & PageTable;
5067 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5069 /* The second termination condition only applies for REPE
5070 * and REPNE. Test if the repeat string operation prefix is
5071 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5072 * corresponding termination condition according to:
5073 * - if REPE/REPZ and ZF = 0 then done
5074 * - if REPNE/REPNZ and ZF = 1 then done
5076 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5077 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5078 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5079 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5080 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5081 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5087 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5092 rc = asm_safe("fwait");
5095 if (unlikely(rc != X86EMUL_CONTINUE))
5096 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5098 return X86EMUL_CONTINUE;
5101 static void fetch_possible_mmx_operand(struct operand *op)
5103 if (op->type == OP_MM)
5104 kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
5107 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
5109 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5111 if (!(ctxt->d & ByteOp))
5112 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5114 asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
5115 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5116 [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
5117 : "c"(ctxt->src2.val));
5119 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5120 if (!fop) /* exception is returned in fop variable */
5121 return emulate_de(ctxt);
5122 return X86EMUL_CONTINUE;
5125 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5127 /* Clear fields that are set conditionally but read without a guard. */
5128 ctxt->rip_relative = false;
5129 ctxt->rex_prefix = 0;
5130 ctxt->lock_prefix = 0;
5131 ctxt->rep_prefix = 0;
5132 ctxt->regs_valid = 0;
5133 ctxt->regs_dirty = 0;
5135 ctxt->io_read.pos = 0;
5136 ctxt->io_read.end = 0;
5137 ctxt->mem_read.end = 0;
5140 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5142 const struct x86_emulate_ops *ops = ctxt->ops;
5143 int rc = X86EMUL_CONTINUE;
5144 int saved_dst_type = ctxt->dst.type;
5145 bool is_guest_mode = ctxt->ops->is_guest_mode(ctxt);
5147 ctxt->mem_read.pos = 0;
5149 /* LOCK prefix is allowed only with some instructions */
5150 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5151 rc = emulate_ud(ctxt);
5155 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5156 rc = emulate_ud(ctxt);
5160 if (unlikely(ctxt->d &
5161 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5162 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5163 (ctxt->d & Undefined)) {
5164 rc = emulate_ud(ctxt);
5168 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5169 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5170 rc = emulate_ud(ctxt);
5174 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5175 rc = emulate_nm(ctxt);
5179 if (ctxt->d & Mmx) {
5180 rc = flush_pending_x87_faults(ctxt);
5181 if (rc != X86EMUL_CONTINUE)
5184 * Now that we know the fpu is exception safe, we can fetch
5187 fetch_possible_mmx_operand(&ctxt->src);
5188 fetch_possible_mmx_operand(&ctxt->src2);
5189 if (!(ctxt->d & Mov))
5190 fetch_possible_mmx_operand(&ctxt->dst);
5193 if (unlikely(is_guest_mode) && ctxt->intercept) {
5194 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5195 X86_ICPT_PRE_EXCEPT);
5196 if (rc != X86EMUL_CONTINUE)
5200 /* Instruction can only be executed in protected mode */
5201 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5202 rc = emulate_ud(ctxt);
5206 /* Privileged instruction can be executed only in CPL=0 */
5207 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5208 if (ctxt->d & PrivUD)
5209 rc = emulate_ud(ctxt);
5211 rc = emulate_gp(ctxt, 0);
5215 /* Do instruction specific permission checks */
5216 if (ctxt->d & CheckPerm) {
5217 rc = ctxt->check_perm(ctxt);
5218 if (rc != X86EMUL_CONTINUE)
5222 if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
5223 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5224 X86_ICPT_POST_EXCEPT);
5225 if (rc != X86EMUL_CONTINUE)
5229 if (ctxt->rep_prefix && (ctxt->d & String)) {
5230 /* All REP prefixes have the same first termination condition */
5231 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5232 string_registers_quirk(ctxt);
5233 ctxt->eip = ctxt->_eip;
5234 ctxt->eflags &= ~X86_EFLAGS_RF;
5240 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5241 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5242 ctxt->src.valptr, ctxt->src.bytes);
5243 if (rc != X86EMUL_CONTINUE)
5245 ctxt->src.orig_val64 = ctxt->src.val64;
5248 if (ctxt->src2.type == OP_MEM) {
5249 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5250 &ctxt->src2.val, ctxt->src2.bytes);
5251 if (rc != X86EMUL_CONTINUE)
5255 if ((ctxt->d & DstMask) == ImplicitOps)
5259 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5260 /* optimisation - avoid slow emulated read if Mov */
5261 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5262 &ctxt->dst.val, ctxt->dst.bytes);
5263 if (rc != X86EMUL_CONTINUE) {
5264 if (!(ctxt->d & NoWrite) &&
5265 rc == X86EMUL_PROPAGATE_FAULT &&
5266 ctxt->exception.vector == PF_VECTOR)
5267 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5271 /* Copy full 64-bit value for CMPXCHG8B. */
5272 ctxt->dst.orig_val64 = ctxt->dst.val64;
5276 if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
5277 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5278 X86_ICPT_POST_MEMACCESS);
5279 if (rc != X86EMUL_CONTINUE)
5283 if (ctxt->rep_prefix && (ctxt->d & String))
5284 ctxt->eflags |= X86_EFLAGS_RF;
5286 ctxt->eflags &= ~X86_EFLAGS_RF;
5288 if (ctxt->execute) {
5289 if (ctxt->d & Fastop)
5290 rc = fastop(ctxt, ctxt->fop);
5292 rc = ctxt->execute(ctxt);
5293 if (rc != X86EMUL_CONTINUE)
5298 if (ctxt->opcode_len == 2)
5300 else if (ctxt->opcode_len == 3)
5301 goto threebyte_insn;
5304 case 0x70 ... 0x7f: /* jcc (short) */
5305 if (test_cc(ctxt->b, ctxt->eflags))
5306 rc = jmp_rel(ctxt, ctxt->src.val);
5308 case 0x8d: /* lea r16/r32, m */
5309 ctxt->dst.val = ctxt->src.addr.mem.ea;
5311 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5312 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5313 ctxt->dst.type = OP_NONE;
5317 case 0x98: /* cbw/cwde/cdqe */
5318 switch (ctxt->op_bytes) {
5319 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5320 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5321 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5324 case 0xcc: /* int3 */
5325 rc = emulate_int(ctxt, 3);
5327 case 0xcd: /* int n */
5328 rc = emulate_int(ctxt, ctxt->src.val);
5330 case 0xce: /* into */
5331 if (ctxt->eflags & X86_EFLAGS_OF)
5332 rc = emulate_int(ctxt, 4);
5334 case 0xe9: /* jmp rel */
5335 case 0xeb: /* jmp rel short */
5336 rc = jmp_rel(ctxt, ctxt->src.val);
5337 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5339 case 0xf4: /* hlt */
5340 ctxt->ops->halt(ctxt);
5342 case 0xf5: /* cmc */
5343 /* complement carry flag from eflags reg */
5344 ctxt->eflags ^= X86_EFLAGS_CF;
5346 case 0xf8: /* clc */
5347 ctxt->eflags &= ~X86_EFLAGS_CF;
5349 case 0xf9: /* stc */
5350 ctxt->eflags |= X86_EFLAGS_CF;
5352 case 0xfc: /* cld */
5353 ctxt->eflags &= ~X86_EFLAGS_DF;
5355 case 0xfd: /* std */
5356 ctxt->eflags |= X86_EFLAGS_DF;
5359 goto cannot_emulate;
5362 if (rc != X86EMUL_CONTINUE)
5366 if (ctxt->d & SrcWrite) {
5367 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5368 rc = writeback(ctxt, &ctxt->src);
5369 if (rc != X86EMUL_CONTINUE)
5372 if (!(ctxt->d & NoWrite)) {
5373 rc = writeback(ctxt, &ctxt->dst);
5374 if (rc != X86EMUL_CONTINUE)
5379 * restore dst type in case the decoding will be reused
5380 * (happens for string instruction )
5382 ctxt->dst.type = saved_dst_type;
5384 if ((ctxt->d & SrcMask) == SrcSI)
5385 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5387 if ((ctxt->d & DstMask) == DstDI)
5388 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5390 if (ctxt->rep_prefix && (ctxt->d & String)) {
5392 struct read_cache *r = &ctxt->io_read;
5393 if ((ctxt->d & SrcMask) == SrcSI)
5394 count = ctxt->src.count;
5396 count = ctxt->dst.count;
5397 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5399 if (!string_insn_completed(ctxt)) {
5401 * Re-enter guest when pio read ahead buffer is empty
5402 * or, if it is not used, after each 1024 iteration.
5404 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5405 (r->end == 0 || r->end != r->pos)) {
5407 * Reset read cache. Usually happens before
5408 * decode, but since instruction is restarted
5409 * we have to do it here.
5411 ctxt->mem_read.end = 0;
5412 writeback_registers(ctxt);
5413 return EMULATION_RESTART;
5415 goto done; /* skip rip writeback */
5417 ctxt->eflags &= ~X86_EFLAGS_RF;
5420 ctxt->eip = ctxt->_eip;
5421 if (ctxt->mode != X86EMUL_MODE_PROT64)
5422 ctxt->eip = (u32)ctxt->_eip;
5425 if (rc == X86EMUL_PROPAGATE_FAULT) {
5426 if (KVM_EMULATOR_BUG_ON(ctxt->exception.vector > 0x1f, ctxt))
5427 return EMULATION_FAILED;
5428 ctxt->have_exception = true;
5430 if (rc == X86EMUL_INTERCEPTED)
5431 return EMULATION_INTERCEPTED;
5433 if (rc == X86EMUL_CONTINUE)
5434 writeback_registers(ctxt);
5436 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5440 case 0x09: /* wbinvd */
5441 (ctxt->ops->wbinvd)(ctxt);
5443 case 0x08: /* invd */
5444 case 0x0d: /* GrpP (prefetch) */
5445 case 0x18: /* Grp16 (prefetch/nop) */
5446 case 0x1f: /* nop */
5448 case 0x20: /* mov cr, reg */
5449 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5451 case 0x21: /* mov from dr to reg */
5452 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5454 case 0x40 ... 0x4f: /* cmov */
5455 if (test_cc(ctxt->b, ctxt->eflags))
5456 ctxt->dst.val = ctxt->src.val;
5457 else if (ctxt->op_bytes != 4)
5458 ctxt->dst.type = OP_NONE; /* no writeback */
5460 case 0x80 ... 0x8f: /* jnz rel, etc*/
5461 if (test_cc(ctxt->b, ctxt->eflags))
5462 rc = jmp_rel(ctxt, ctxt->src.val);
5464 case 0x90 ... 0x9f: /* setcc r/m8 */
5465 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5467 case 0xb6 ... 0xb7: /* movzx */
5468 ctxt->dst.bytes = ctxt->op_bytes;
5469 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5470 : (u16) ctxt->src.val;
5472 case 0xbe ... 0xbf: /* movsx */
5473 ctxt->dst.bytes = ctxt->op_bytes;
5474 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5475 (s16) ctxt->src.val;
5478 goto cannot_emulate;
5483 if (rc != X86EMUL_CONTINUE)
5489 return EMULATION_FAILED;
5492 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5494 invalidate_registers(ctxt);
5497 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5499 writeback_registers(ctxt);
5502 bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
5504 if (ctxt->rep_prefix && (ctxt->d & String))
5507 if (ctxt->d & TwoMemOp)