Merge branch 'kvm-fix-svm-races' into HEAD
authorPaolo Bonzini <pbonzini@redhat.com>
Wed, 31 Mar 2021 11:35:52 +0000 (07:35 -0400)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 1 Apr 2021 09:19:48 +0000 (05:19 -0400)
1  2 
arch/x86/kvm/svm/nested.c

@@@ -247,60 -225,50 +247,59 @@@ static bool nested_vmcb_check_controls(
        return true;
  }
  
 -static bool nested_vmcb_check_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
 +static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu,
 +                                    struct vmcb_save_area *save)
  {
 -      bool vmcb12_lma;
 +      /*
 +       * These checks are also performed by KVM_SET_SREGS,
 +       * except that EFER.LMA is not checked by SVM against
 +       * CR0.PG && EFER.LME.
 +       */
 +      if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
 +              if (CC(!(save->cr4 & X86_CR4_PAE)) ||
 +                  CC(!(save->cr0 & X86_CR0_PE)) ||
 +                  CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
 +                      return false;
 +      }
 +
 +      if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
 +              return false;
  
 -      if ((vmcb12->save.efer & EFER_SVME) == 0)
 +      return true;
 +}
 +
 +/* Common checks that apply to both L1 and L2 state.  */
 +static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
 +                                  struct vmcb_save_area *save)
 +{
+       /*
+        * FIXME: these should be done after copying the fields,
+        * to avoid TOC/TOU races.  For these save area checks
+        * the possible damage is limited since kvm_set_cr0 and
+        * kvm_set_cr4 handle failure; EFER_SVME is an exception
+        * so it is force-set later in nested_prepare_vmcb_save.
+        */
 +      if (CC(!(save->efer & EFER_SVME)))
                return false;
  
 -      if (((vmcb12->save.cr0 & X86_CR0_CD) == 0) && (vmcb12->save.cr0 & X86_CR0_NW))
 +      if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) ||
 +          CC(save->cr0 & ~0xffffffffULL))
                return false;
  
 -      if (!kvm_dr6_valid(vmcb12->save.dr6) || !kvm_dr7_valid(vmcb12->save.dr7))
 +      if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
                return false;
  
 -      vmcb12_lma = (vmcb12->save.efer & EFER_LME) && (vmcb12->save.cr0 & X86_CR0_PG);
 +      if (!nested_vmcb_check_cr3_cr4(vcpu, save))
 +              return false;
  
 -      if (!vmcb12_lma) {
 -              if (vmcb12->save.cr4 & X86_CR4_PAE) {
 -                      if (vmcb12->save.cr3 & MSR_CR3_LEGACY_PAE_RESERVED_MASK)
 -                              return false;
 -              } else {
 -                      if (vmcb12->save.cr3 & MSR_CR3_LEGACY_RESERVED_MASK)
 -                              return false;
 -              }
 -      } else {
 -              if (!(vmcb12->save.cr4 & X86_CR4_PAE) ||
 -                  !(vmcb12->save.cr0 & X86_CR0_PE) ||
 -                  (vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK))
 -                      return false;
 -      }
 -      if (kvm_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
 +      if (CC(!kvm_valid_efer(vcpu, save->efer)))
                return false;
  
        return true;
  }
  
- static bool nested_vmcb_checks(struct kvm_vcpu *vcpu, struct vmcb *vmcb12)
- {
-       if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save))
-               return false;
-       return nested_vmcb_check_controls(&vmcb12->control);
- }
 -static void load_nested_vmcb_control(struct vcpu_svm *svm,
 -                                   struct vmcb_control_area *control)
 +static void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
 +                                          struct vmcb_control_area *control)
  {
        copy_vmcb_control_area(&svm->nested.ctl, control);
  
@@@ -411,50 -379,27 +410,57 @@@ static int nested_svm_load_cr3(struct k
        return 0;
  }
  
 -static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
 +void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
 +{
 +      if (!svm->nested.vmcb02.ptr)
 +              return;
 +
 +      /* FIXME: merge g_pat from vmcb01 and vmcb12.  */
 +      svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
 +}
 +
 +static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
  {
 +      bool new_vmcb12 = false;
 +
 +      nested_vmcb02_compute_g_pat(svm);
 +
        /* Load the nested guest state */
 -      svm->vmcb->save.es = vmcb12->save.es;
 -      svm->vmcb->save.cs = vmcb12->save.cs;
 -      svm->vmcb->save.ss = vmcb12->save.ss;
 -      svm->vmcb->save.ds = vmcb12->save.ds;
 -      svm->vmcb->save.gdtr = vmcb12->save.gdtr;
 -      svm->vmcb->save.idtr = vmcb12->save.idtr;
 -      kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags);
 +
 +      if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) {
 +              new_vmcb12 = true;
 +              svm->nested.last_vmcb12_gpa = svm->nested.vmcb12_gpa;
 +      }
 +
 +      if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) {
 +              svm->vmcb->save.es = vmcb12->save.es;
 +              svm->vmcb->save.cs = vmcb12->save.cs;
 +              svm->vmcb->save.ss = vmcb12->save.ss;
 +              svm->vmcb->save.ds = vmcb12->save.ds;
 +              svm->vmcb->save.cpl = vmcb12->save.cpl;
 +              vmcb_mark_dirty(svm->vmcb, VMCB_SEG);
 +      }
 +
 +      if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) {
 +              svm->vmcb->save.gdtr = vmcb12->save.gdtr;
 +              svm->vmcb->save.idtr = vmcb12->save.idtr;
 +              vmcb_mark_dirty(svm->vmcb, VMCB_DT);
 +      }
 +
 +      kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
-       svm_set_efer(&svm->vcpu, vmcb12->save.efer);
+       /*
+        * Force-set EFER_SVME even though it is checked earlier on the
+        * VMCB12, because the guest can flip the bit between the check
+        * and now.  Clearing EFER_SVME would call svm_free_nested.
+        */
+       svm_set_efer(&svm->vcpu, vmcb12->save.efer | EFER_SVME);
        svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
        svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
 -      svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = vmcb12->save.cr2;
 +
 +      svm->vcpu.arch.cr2 = vmcb12->save.cr2;
 +
        kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
        kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
        kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
@@@ -521,54 -440,22 +527,53 @@@ static void nested_vmcb02_prepare_contr
        enter_guest_mode(&svm->vcpu);
  
        /*
 -       * Merge guest and host intercepts - must be called  with vcpu in
 -       * guest-mode to take affect here
 +       * Merge guest and host intercepts - must be called with vcpu in
 +       * guest-mode to take effect.
         */
        recalc_intercepts(svm);
 +}
  
 -      vmcb_mark_all_dirty(svm->vmcb);
 +static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
 +{
 +      /*
 +       * Some VMCB state is shared between L1 and L2 and thus has to be
 +       * moved at the time of nested vmrun and vmexit.
 +       *
 +       * VMLOAD/VMSAVE state would also belong in this category, but KVM
 +       * always performs VMLOAD and VMSAVE from the VMCB01.
 +       */
 +      to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl;
  }
  
 -int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb12_gpa,
 +int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
                         struct vmcb *vmcb12)
  {
 +      struct vcpu_svm *svm = to_svm(vcpu);
        int ret;
  
 +      trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa,
 +                             vmcb12->save.rip,
 +                             vmcb12->control.int_ctl,
 +                             vmcb12->control.event_inj,
 +                             vmcb12->control.nested_ctl);
 +
 +      trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
 +                                  vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
 +                                  vmcb12->control.intercepts[INTERCEPT_EXCEPTION],
 +                                  vmcb12->control.intercepts[INTERCEPT_WORD3],
 +                                  vmcb12->control.intercepts[INTERCEPT_WORD4],
 +                                  vmcb12->control.intercepts[INTERCEPT_WORD5]);
 +
 +
        svm->nested.vmcb12_gpa = vmcb12_gpa;
 -      nested_prepare_vmcb_save(svm, vmcb12);
 -      nested_prepare_vmcb_control(svm);
 +
 +      WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr);
 +
 +      nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
-       nested_load_control_from_vmcb12(svm, &vmcb12->control);
 +
 +      svm_switch_vmcb(svm, &svm->nested.vmcb02);
 +      nested_vmcb02_prepare_control(svm);
 +      nested_vmcb02_prepare_save(svm, vmcb12);
  
        ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
                                  nested_npt_enabled(svm));
@@@ -614,7 -497,10 +619,10 @@@ int nested_svm_vmrun(struct kvm_vcpu *v
        if (WARN_ON_ONCE(!svm->nested.initialized))
                return -EINVAL;
  
-       if (!nested_vmcb_checks(vcpu, vmcb12)) {
 -      load_nested_vmcb_control(svm, &vmcb12->control);
++      nested_load_control_from_vmcb12(svm, &vmcb12->control);
 -      if (!nested_vmcb_check_save(svm, vmcb12) ||
++      if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) ||
+           !nested_vmcb_check_controls(&svm->nested.ctl)) {
                vmcb12->control.exit_code    = SVM_EXIT_ERR;
                vmcb12->control.exit_code_hi = 0;
                vmcb12->control.exit_info_1  = 0;
@@@ -1251,7 -1189,7 +1259,7 @@@ static int svm_set_nested_state(struct 
  
        /*
         * Processor state contains L2 state.  Check that it is
--       * valid for guest mode (see nested_vmcb_checks).
++       * valid for guest mode (see nested_vmcb_check_save).
         */
        cr0 = kvm_read_cr0(vcpu);
          if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))