Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus...
authorPaolo Bonzini <pbonzini@redhat.com>
Thu, 2 Nov 2017 17:21:26 +0000 (18:21 +0100)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 2 Nov 2017 17:21:26 +0000 (18:21 +0100)
Apart from various bugfixes and code cleanups, the major new feature
is the ability to run guests using the hashed page table (HPT) MMU
mode on a host that is using the radix MMU mode.  Because of limitations
in the current POWER9 chip (all SMT threads in each core must use the
same MMU mode, HPT or radix), this requires the host to be configured
to run similar to POWER8: the host runs in single-threaded mode (only
thread 0 of each core online), and have KVM be able to wake up the other
threads when a KVM guest is to be run, and use the other threads for
running guest VCPUs.  A new module parameter, called "indep_threads_mode",
is normally Y on POWER9 but must be set to N before any HPT guests can
be run on a radix host:

    # echo N >/sys/module/kvm_hv/parameters/indep_threads_mode
    # ppc64_cpu --smt=off

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c

index 8700b84..7233445 100644 (file)
@@ -1065,6 +1065,7 @@ struct kvm_x86_ops {
        int (*smi_allowed)(struct kvm_vcpu *vcpu);
        int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
        int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
+       int (*enable_smi_window)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
index ff94552..b71daed 100644 (file)
@@ -3187,7 +3187,7 @@ static int stgi_interception(struct vcpu_svm *svm)
 
        /*
         * If VGIF is enabled, the STGI intercept is only added to
-        * detect the opening of the NMI window; remove it now.
+        * detect the opening of the SMI/NMI window; remove it now.
         */
        if (vgif_enabled(svm))
                clr_intercept(svm, INTERCEPT_STGI);
@@ -5476,6 +5476,19 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
        return ret;
 }
 
+static int enable_smi_window(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       if (!gif_set(svm)) {
+               if (vgif_enabled(svm))
+                       set_intercept(svm, INTERCEPT_STGI);
+               /* STGI will cause a vm exit */
+               return 1;
+       }
+       return 0;
+}
+
 static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
        .cpu_has_kvm_support = has_svm,
        .disabled_by_bios = is_disabled,
@@ -5590,6 +5603,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
        .smi_allowed = svm_smi_allowed,
        .pre_enter_smm = svm_pre_enter_smm,
        .pre_leave_smm = svm_pre_leave_smm,
+       .enable_smi_window = enable_smi_window,
 };
 
 static int __init svm_init(void)
index c460b0b..e6c8ffa 100644 (file)
@@ -2842,8 +2842,9 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                 * Advertise EPTP switching unconditionally
                 * since we emulate it
                 */
-               vmx->nested.nested_vmx_vmfunc_controls =
-                       VMX_VMFUNC_EPTP_SWITCHING;
+               if (enable_ept)
+                       vmx->nested.nested_vmx_vmfunc_controls =
+                               VMX_VMFUNC_EPTP_SWITCHING;
        }
 
        /*
@@ -3680,14 +3681,25 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                                SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
                                SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
 
+       rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
+               &vmx_capability.ept, &vmx_capability.vpid);
+
        if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
                /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
                   enabled */
                _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
                                             CPU_BASED_CR3_STORE_EXITING |
                                             CPU_BASED_INVLPG_EXITING);
-               rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
-                     vmx_capability.ept, vmx_capability.vpid);
+       } else if (vmx_capability.ept) {
+               vmx_capability.ept = 0;
+               pr_warn_once("EPT CAP should not exist if not support "
+                               "1-setting enable EPT VM-execution control\n");
+       }
+       if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
+               vmx_capability.vpid) {
+               vmx_capability.vpid = 0;
+               pr_warn_once("VPID CAP should not exist if not support "
+                               "1-setting enable VPID VM-execution control\n");
        }
 
        min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
@@ -11973,6 +11985,11 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
        return 0;
 }
 
+static int enable_smi_window(struct kvm_vcpu *vcpu)
+{
+       return 0;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
        .cpu_has_kvm_support = cpu_has_kvm_support,
        .disabled_by_bios = vmx_disabled_by_bios,
@@ -12102,6 +12119,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
        .smi_allowed = vmx_smi_allowed,
        .pre_enter_smm = vmx_pre_enter_smm,
        .pre_leave_smm = vmx_pre_leave_smm,
+       .enable_smi_window = enable_smi_window,
 };
 
 static int __init vmx_init(void)
index 5669af0..34c85aa 100644 (file)
@@ -2006,10 +2006,12 @@ static void kvmclock_sync_fn(struct work_struct *work)
                                        KVMCLOCK_SYNC_PERIOD);
 }
 
-static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
        u64 mcg_cap = vcpu->arch.mcg_cap;
        unsigned bank_num = mcg_cap & 0xff;
+       u32 msr = msr_info->index;
+       u64 data = msr_info->data;
 
        switch (msr) {
        case MSR_IA32_MCG_STATUS:
@@ -2034,6 +2036,9 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                        if ((offset & 0x3) == 0 &&
                            data != 0 && (data | (1 << 10)) != ~(u64)0)
                                return -1;
+                       if (!msr_info->host_initiated &&
+                               (offset & 0x3) == 1 && data != 0)
+                               return -1;
                        vcpu->arch.mce_banks[offset] = data;
                        break;
                }
@@ -2283,7 +2288,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_IA32_MCG_CTL:
        case MSR_IA32_MCG_STATUS:
        case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
-               return set_msr_mce(vcpu, msr, data);
+               return set_msr_mce(vcpu, msr_info);
 
        case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
        case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
@@ -6892,17 +6897,23 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                if (inject_pending_event(vcpu, req_int_win) != 0)
                        req_immediate_exit = true;
                else {
-                       /* Enable NMI/IRQ window open exits if needed.
+                       /* Enable SMI/NMI/IRQ window open exits if needed.
                         *
-                        * SMIs have two cases: 1) they can be nested, and
-                        * then there is nothing to do here because RSM will
-                        * cause a vmexit anyway; 2) or the SMI can be pending
-                        * because inject_pending_event has completed the
-                        * injection of an IRQ or NMI from the previous vmexit,
-                        * and then we request an immediate exit to inject the SMI.
+                        * SMIs have three cases:
+                        * 1) They can be nested, and then there is nothing to
+                        *    do here because RSM will cause a vmexit anyway.
+                        * 2) There is an ISA-specific reason why SMI cannot be
+                        *    injected, and the moment when this changes can be
+                        *    intercepted.
+                        * 3) Or the SMI can be pending because
+                        *    inject_pending_event has completed the injection
+                        *    of an IRQ or NMI from the previous vmexit, and
+                        *    then we request an immediate exit to inject the
+                        *    SMI.
                         */
                        if (vcpu->arch.smi_pending && !is_smm(vcpu))
-                               req_immediate_exit = true;
+                               if (!kvm_x86_ops->enable_smi_window(vcpu))
+                                       req_immediate_exit = true;
                        if (vcpu->arch.nmi_pending)
                                kvm_x86_ops->enable_nmi_window(vcpu);
                        if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)