kvm: x86: Allow userspace to handle emulation errors
authorAaron Lewis <aaronlewis@google.com>
Mon, 10 May 2021 14:48:33 +0000 (07:48 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 24 Jun 2021 22:00:48 +0000 (18:00 -0400)
Add a fallback mechanism to the in-kernel instruction emulator that
allows userspace the opportunity to process an instruction the emulator
was unable to.  When the in-kernel instruction emulator fails to process
an instruction it will either inject a #UD into the guest or exit to
userspace with exit reason KVM_INTERNAL_ERROR.  This is because it does
not know how to proceed in an appropriate manner.  This feature lets
userspace get involved to see if it can figure out a better path
forward.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Reviewed-by: David Edmondson <david.edmondson@oracle.com>
Message-Id: <20210510144834.658457-2-aaronlewis@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Documentation/virt/kvm/api.rst
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/x86.c
include/uapi/linux/kvm.h

index 5d8db49..3b6e3b1 100644 (file)
@@ -6546,6 +6546,7 @@ KVM_RUN_BUS_LOCK flag is used to distinguish between them.
 This capability can be used to check / enable 2nd DAWR feature provided
 by POWER10 processor.
 
+
 7.24 KVM_CAP_VM_COPY_ENC_CONTEXT_FROM
 -------------------------------------
 
@@ -6603,6 +6604,25 @@ present in the "ibm,hypertas-functions" device-tree property.
 This capability is enabled for hypervisors on platforms like POWER9
 that support radix MMU.
 
+7.27 KVM_CAP_EXIT_ON_EMULATION_FAILURE
+--------------------------------------
+
+:Architectures: x86
+:Parameters: args[0] whether the feature should be enabled or not
+
+When this capability is enabled, an emulation failure will result in an exit
+to userspace with KVM_INTERNAL_ERROR (except when the emulator was invoked
+to handle a VMware backdoor instruction). Furthermore, KVM will now provide up
+to 15 instruction bytes for any exit to userspace resulting from an emulation
+failure.  When these exits to userspace occur use the emulation_failure struct
+instead of the internal struct.  They both have the same layout, but the
+emulation_failure struct matches the content better.  It also explicitly
+defines the 'flags' field which is used to describe the fields in the struct
+that are valid (ie: if KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES is
+set in the 'flags' field then both 'insn_size' and 'insn_bytes' have valid data
+in them.)
+
+
 8. Other capabilities.
 ======================
 
index 3cd496c..c9ec5c7 100644 (file)
@@ -1114,6 +1114,12 @@ struct kvm_arch {
        bool exception_payload_enabled;
 
        bool bus_lock_detection_enabled;
+       /*
+        * If exit_on_emulation_error is set, and the in-kernel instruction
+        * emulator fails to emulate an instruction, allow userspace
+        * the opportunity to look at it.
+        */
+       bool exit_on_emulation_error;
 
        /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
        u32 user_space_msr_mask;
index a7c7b2b..17468d9 100644 (file)
@@ -4010,6 +4010,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 #endif
        case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
        case KVM_CAP_SREGS2:
+       case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
                r = 1;
                break;
        case KVM_CAP_EXIT_HYPERCALL:
@@ -5649,6 +5650,13 @@ split_irqchip_unlock:
                kvm->arch.hypercall_exit_enabled = cap->args[0];
                r = 0;
                break;
+       case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
+               r = -EINVAL;
+               if (cap->args[0] & ~1)
+                       break;
+               kvm->arch.exit_on_emulation_error = cap->args[0];
+               r = 0;
+               break;
        default:
                r = -EINVAL;
                break;
@@ -7444,8 +7452,33 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 }
 EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
 
+static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
+{
+       struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
+       u32 insn_size = ctxt->fetch.end - ctxt->fetch.data;
+       struct kvm_run *run = vcpu->run;
+
+       run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+       run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION;
+       run->emulation_failure.ndata = 0;
+       run->emulation_failure.flags = 0;
+
+       if (insn_size) {
+               run->emulation_failure.ndata = 3;
+               run->emulation_failure.flags |=
+                       KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
+               run->emulation_failure.insn_size = insn_size;
+               memset(run->emulation_failure.insn_bytes, 0x90,
+                      sizeof(run->emulation_failure.insn_bytes));
+               memcpy(run->emulation_failure.insn_bytes,
+                      ctxt->fetch.data, insn_size);
+       }
+}
+
 static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
 {
+       struct kvm *kvm = vcpu->kvm;
+
        ++vcpu->stat.insn_emulation_fail;
        trace_kvm_emulate_insn_failed(vcpu);
 
@@ -7454,10 +7487,9 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
                return 1;
        }
 
-       if (emulation_type & EMULTYPE_SKIP) {
-               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-               vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-               vcpu->run->internal.ndata = 0;
+       if (kvm->arch.exit_on_emulation_error ||
+           (emulation_type & EMULTYPE_SKIP)) {
+               prepare_emulation_failure_exit(vcpu);
                return 0;
        }
 
index f1ba602..68c9e6d 100644 (file)
@@ -280,6 +280,9 @@ struct kvm_xen_exit {
 /* Encounter unexpected vm-exit reason */
 #define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON      4
 
+/* Flags that describe what fields in emulation_failure hold valid data. */
+#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
        /* in */
@@ -383,6 +386,25 @@ struct kvm_run {
                        __u32 ndata;
                        __u64 data[16];
                } internal;
+               /*
+                * KVM_INTERNAL_ERROR_EMULATION
+                *
+                * "struct emulation_failure" is an overlay of "struct internal"
+                * that is used for the KVM_INTERNAL_ERROR_EMULATION sub-type of
+                * KVM_EXIT_INTERNAL_ERROR.  Note, unlike other internal error
+                * sub-types, this struct is ABI!  It also needs to be backwards
+                * compatible with "struct internal".  Take special care that
+                * "ndata" is correct, that new fields are enumerated in "flags",
+                * and that each flag enumerates fields that are 64-bit aligned
+                * and sized (so that ndata+internal.data[] is valid/accurate).
+                */
+               struct {
+                       __u32 suberror;
+                       __u32 ndata;
+                       __u64 flags;
+                       __u8  insn_size;
+                       __u8  insn_bytes[15];
+               } emulation_failure;
                /* KVM_EXIT_OSI */
                struct {
                        __u64 gprs[32];
@@ -1088,6 +1110,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_EXIT_HYPERCALL 201
 #define KVM_CAP_PPC_RPT_INVALIDATE 202
 #define KVM_CAP_BINARY_STATS_FD 203
+#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
 
 #ifdef KVM_CAP_IRQ_ROUTING