Merge branch 'kvm-lapic-fix-and-cleanup' into HEAD
[linux-2.6-microblaze.git] / arch / x86 / kvm / vmx / vmx.c
index ad2ac66..c788aa3 100644 (file)
@@ -12,6 +12,7 @@
  *   Avi Kivity   <avi@qumranet.com>
  *   Yaniv Kamay  <yaniv@qumranet.com>
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/highmem.h>
 #include <linux/hrtimer.h>
@@ -444,36 +445,36 @@ void vmread_error(unsigned long field, bool fault)
        if (fault)
                kvm_spurious_fault();
        else
-               vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
+               vmx_insn_failed("vmread failed: field=%lx\n", field);
 }
 
 noinline void vmwrite_error(unsigned long field, unsigned long value)
 {
-       vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%u\n",
+       vmx_insn_failed("vmwrite failed: field=%lx val=%lx err=%u\n",
                        field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
 }
 
 noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
 {
-       vmx_insn_failed("kvm: vmclear failed: %p/%llx err=%u\n",
+       vmx_insn_failed("vmclear failed: %p/%llx err=%u\n",
                        vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR));
 }
 
 noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
 {
-       vmx_insn_failed("kvm: vmptrld failed: %p/%llx err=%u\n",
+       vmx_insn_failed("vmptrld failed: %p/%llx err=%u\n",
                        vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR));
 }
 
 noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
 {
-       vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
+       vmx_insn_failed("invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
                        ext, vpid, gva);
 }
 
 noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
 {
-       vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
+       vmx_insn_failed("invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
                        ext, eptp, gpa);
 }
 
@@ -488,8 +489,8 @@ static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
 
-struct vmcs_config vmcs_config;
-struct vmx_capability vmx_capability;
+struct vmcs_config vmcs_config __ro_after_init;
+struct vmx_capability vmx_capability __ro_after_init;
 
 #define VMX_SEGMENT_FIELD(seg)                                 \
        [VCPU_SREG_##seg] = {                                   \
@@ -523,6 +524,8 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
 static unsigned long host_idt_base;
 
 #if IS_ENABLED(CONFIG_HYPERV)
+static struct kvm_x86_ops vmx_x86_ops __initdata;
+
 static bool __read_mostly enlightened_vmcs = true;
 module_param(enlightened_vmcs, bool, 0444);
 
@@ -551,6 +554,71 @@ static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static __init void hv_init_evmcs(void)
+{
+       int cpu;
+
+       if (!enlightened_vmcs)
+               return;
+
+       /*
+        * Enlightened VMCS usage should be recommended and the host needs
+        * to support eVMCS v1 or above.
+        */
+       if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
+           (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
+            KVM_EVMCS_VERSION) {
+
+               /* Check that we have assist pages on all online CPUs */
+               for_each_online_cpu(cpu) {
+                       if (!hv_get_vp_assist_page(cpu)) {
+                               enlightened_vmcs = false;
+                               break;
+                       }
+               }
+
+               if (enlightened_vmcs) {
+                       pr_info("Using Hyper-V Enlightened VMCS\n");
+                       static_branch_enable(&enable_evmcs);
+               }
+
+               if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
+                       vmx_x86_ops.enable_l2_tlb_flush
+                               = hv_enable_l2_tlb_flush;
+
+       } else {
+               enlightened_vmcs = false;
+       }
+}
+
+static void hv_reset_evmcs(void)
+{
+       struct hv_vp_assist_page *vp_ap;
+
+       if (!static_branch_unlikely(&enable_evmcs))
+               return;
+
+       /*
+        * KVM should enable eVMCS if and only if all CPUs have a VP assist
+        * page, and should reject CPU onlining if eVMCS is enabled the CPU
+        * doesn't have a VP assist page allocated.
+        */
+       vp_ap = hv_get_vp_assist_page(smp_processor_id());
+       if (WARN_ON_ONCE(!vp_ap))
+               return;
+
+       /*
+        * Reset everything to support using non-enlightened VMCS access later
+        * (e.g. when we reload the module with enlightened_vmcs=0)
+        */
+       vp_ap->nested_control.features.directhypercall = 0;
+       vp_ap->current_nested_vmcs = 0;
+       vp_ap->enlighten_vmentry = 0;
+}
+
+#else /* IS_ENABLED(CONFIG_HYPERV) */
+static void hv_init_evmcs(void) {}
+static void hv_reset_evmcs(void) {}
 #endif /* IS_ENABLED(CONFIG_HYPERV) */
 
 /*
@@ -1613,8 +1681,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
                if (!instr_len)
                        goto rip_updated;
 
-               WARN(exit_reason.enclave_mode,
-                    "KVM: skipping instruction after SGX enclave VM-Exit");
+               WARN_ONCE(exit_reason.enclave_mode,
+                         "skipping instruction after SGX enclave VM-Exit");
 
                orig_rip = kvm_rip_read(vcpu);
                rip = orig_rip + instr_len;
@@ -2448,88 +2516,6 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
        }
 }
 
-static __init int cpu_has_kvm_support(void)
-{
-       return cpu_has_vmx();
-}
-
-static __init int vmx_disabled_by_bios(void)
-{
-       return !boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
-              !boot_cpu_has(X86_FEATURE_VMX);
-}
-
-static int kvm_cpu_vmxon(u64 vmxon_pointer)
-{
-       u64 msr;
-
-       cr4_set_bits(X86_CR4_VMXE);
-
-       asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
-                         _ASM_EXTABLE(1b, %l[fault])
-                         : : [vmxon_pointer] "m"(vmxon_pointer)
-                         : : fault);
-       return 0;
-
-fault:
-       WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
-                 rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
-       cr4_clear_bits(X86_CR4_VMXE);
-
-       return -EFAULT;
-}
-
-static int vmx_hardware_enable(void)
-{
-       int cpu = raw_smp_processor_id();
-       u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
-       int r;
-
-       if (cr4_read_shadow() & X86_CR4_VMXE)
-               return -EBUSY;
-
-       /*
-        * This can happen if we hot-added a CPU but failed to allocate
-        * VP assist page for it.
-        */
-       if (static_branch_unlikely(&enable_evmcs) &&
-           !hv_get_vp_assist_page(cpu))
-               return -EFAULT;
-
-       intel_pt_handle_vmx(1);
-
-       r = kvm_cpu_vmxon(phys_addr);
-       if (r) {
-               intel_pt_handle_vmx(0);
-               return r;
-       }
-
-       if (enable_ept)
-               ept_sync_global();
-
-       return 0;
-}
-
-static void vmclear_local_loaded_vmcss(void)
-{
-       int cpu = raw_smp_processor_id();
-       struct loaded_vmcs *v, *n;
-
-       list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
-                                loaded_vmcss_on_cpu_link)
-               __loaded_vmcs_clear(v);
-}
-
-static void vmx_hardware_disable(void)
-{
-       vmclear_local_loaded_vmcss();
-
-       if (cpu_vmxoff())
-               kvm_spurious_fault();
-
-       intel_pt_handle_vmx(0);
-}
-
 /*
  * There is no X86_FEATURE for SGX yet, but anyway we need to query CPUID
  * directly instead of going through cpu_has(), to ensure KVM is trapping
@@ -2565,8 +2551,7 @@ static bool cpu_has_perf_global_ctrl_bug(void)
        return false;
 }
 
-static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
-                                     u32 msr, u32 *result)
+static int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr, u32 *result)
 {
        u32 vmx_msr_low, vmx_msr_high;
        u32 ctl = ctl_min | ctl_opt;
@@ -2584,7 +2569,7 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
        return 0;
 }
 
-static __init u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
+static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
 {
        u64 allowed;
 
@@ -2593,8 +2578,8 @@ static __init u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
        return  ctl_opt & allowed;
 }
 
-static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
-                                   struct vmx_capability *vmx_cap)
+static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
+                            struct vmx_capability *vmx_cap)
 {
        u32 vmx_msr_low, vmx_msr_high;
        u32 _pin_based_exec_control = 0;
@@ -2760,6 +2745,119 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
        return 0;
 }
 
+static bool kvm_is_vmx_supported(void)
+{
+       int cpu = raw_smp_processor_id();
+
+       if (!cpu_has_vmx()) {
+               pr_err("VMX not supported by CPU %d\n", cpu);
+               return false;
+       }
+
+       if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
+           !this_cpu_has(X86_FEATURE_VMX)) {
+               pr_err("VMX not enabled (by BIOS) in MSR_IA32_FEAT_CTL on CPU %d\n", cpu);
+               return false;
+       }
+
+       return true;
+}
+
+static int vmx_check_processor_compat(void)
+{
+       int cpu = raw_smp_processor_id();
+       struct vmcs_config vmcs_conf;
+       struct vmx_capability vmx_cap;
+
+       if (!kvm_is_vmx_supported())
+               return -EIO;
+
+       if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) {
+               pr_err("Failed to setup VMCS config on CPU %d\n", cpu);
+               return -EIO;
+       }
+       if (nested)
+               nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept);
+       if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config))) {
+               pr_err("Inconsistent VMCS config on CPU %d\n", cpu);
+               return -EIO;
+       }
+       return 0;
+}
+
+static int kvm_cpu_vmxon(u64 vmxon_pointer)
+{
+       u64 msr;
+
+       cr4_set_bits(X86_CR4_VMXE);
+
+       asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
+                         _ASM_EXTABLE(1b, %l[fault])
+                         : : [vmxon_pointer] "m"(vmxon_pointer)
+                         : : fault);
+       return 0;
+
+fault:
+       WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
+                 rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
+       cr4_clear_bits(X86_CR4_VMXE);
+
+       return -EFAULT;
+}
+
+static int vmx_hardware_enable(void)
+{
+       int cpu = raw_smp_processor_id();
+       u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
+       int r;
+
+       if (cr4_read_shadow() & X86_CR4_VMXE)
+               return -EBUSY;
+
+       /*
+        * This can happen if we hot-added a CPU but failed to allocate
+        * VP assist page for it.
+        */
+       if (static_branch_unlikely(&enable_evmcs) &&
+           !hv_get_vp_assist_page(cpu))
+               return -EFAULT;
+
+       intel_pt_handle_vmx(1);
+
+       r = kvm_cpu_vmxon(phys_addr);
+       if (r) {
+               intel_pt_handle_vmx(0);
+               return r;
+       }
+
+       if (enable_ept)
+               ept_sync_global();
+
+       return 0;
+}
+
+static void vmclear_local_loaded_vmcss(void)
+{
+       int cpu = raw_smp_processor_id();
+       struct loaded_vmcs *v, *n;
+
+       list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
+                                loaded_vmcss_on_cpu_link)
+               __loaded_vmcs_clear(v);
+}
+
+static void vmx_hardware_disable(void)
+{
+       vmclear_local_loaded_vmcss();
+
+       if (cpu_vmxoff())
+               kvm_spurious_fault();
+
+       hv_reset_evmcs();
+
+       intel_pt_handle_vmx(0);
+}
+
 struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
 {
        int node = cpu_to_node(cpu);
@@ -2955,9 +3053,8 @@ static void fix_rmode_seg(int seg, struct kvm_segment *save)
                var.type = 0x3;
                var.avl = 0;
                if (save->base & 0xf)
-                       printk_once(KERN_WARNING "kvm: segment base is not "
-                                       "paragraph aligned when entering "
-                                       "protected mode (seg=%d)", seg);
+                       pr_warn_once("segment base is not paragraph aligned "
+                                    "when entering protected mode (seg=%d)", seg);
        }
 
        vmcs_write16(sf->selector, var.selector);
@@ -2987,8 +3084,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
         * vcpu. Warn the user that an update is overdue.
         */
        if (!kvm_vmx->tss_addr)
-               printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
-                            "called before entering vcpu\n");
+               pr_warn_once("KVM_SET_TSS_ADDR needs to be called before running vCPU\n");
 
        vmx_segment_cache_clear(vmx);
 
@@ -6823,7 +6919,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
        gate_desc *desc = (gate_desc *)host_idt_base + vector;
 
        if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
-           "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
+           "unexpected VM-Exit interrupt info: 0x%x", intr_info))
                return;
 
        handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
@@ -7421,29 +7517,6 @@ static int vmx_vm_init(struct kvm *kvm)
        return 0;
 }
 
-static int __init vmx_check_processor_compat(void)
-{
-       struct vmcs_config vmcs_conf;
-       struct vmx_capability vmx_cap;
-
-       if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
-           !this_cpu_has(X86_FEATURE_VMX)) {
-               pr_err("kvm: VMX is disabled on CPU %d\n", smp_processor_id());
-               return -EIO;
-       }
-
-       if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
-               return -EIO;
-       if (nested)
-               nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept);
-       if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
-               printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
-                               smp_processor_id());
-               return -EIO;
-       }
-       return 0;
-}
-
 static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
        u8 cache;
@@ -8042,7 +8115,9 @@ static void vmx_vm_destroy(struct kvm *kvm)
 }
 
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
-       .name = "kvm_intel",
+       .name = KBUILD_MODNAME,
+
+       .check_processor_compatibility = vmx_check_processor_compat,
 
        .hardware_unsetup = vmx_hardware_unsetup,
 
@@ -8262,7 +8337,7 @@ static __init int hardware_setup(void)
                return -EIO;
 
        if (cpu_has_perf_global_ctrl_bug())
-               pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
+               pr_warn_once("VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
                             "does not work properly. Using workaround\n");
 
        if (boot_cpu_has(X86_FEATURE_NX))
@@ -8270,7 +8345,7 @@ static __init int hardware_setup(void)
 
        if (boot_cpu_has(X86_FEATURE_MPX)) {
                rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
-               WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
+               WARN_ONCE(host_bndcfgs, "BNDCFGS in host will be lost");
        }
 
        if (!cpu_has_vmx_mpx())
@@ -8289,7 +8364,7 @@ static __init int hardware_setup(void)
 
        /* NX support is required for shadow paging. */
        if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) {
-               pr_err_ratelimited("kvm: NX (Execute Disable) not supported\n");
+               pr_err_ratelimited("NX (Execute Disable) not supported\n");
                return -EOPNOTSUPP;
        }
 
@@ -8441,9 +8516,6 @@ static __init int hardware_setup(void)
 }
 
 static struct kvm_x86_init_ops vmx_init_ops __initdata = {
-       .cpu_has_kvm_support = cpu_has_kvm_support,
-       .disabled_by_bios = vmx_disabled_by_bios,
-       .check_processor_compatibility = vmx_check_processor_compat,
        .hardware_setup = hardware_setup,
        .handle_intel_pt_intr = NULL,
 
@@ -8461,41 +8533,23 @@ static void vmx_cleanup_l1d_flush(void)
        l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
 }
 
-static void vmx_exit(void)
+static void __vmx_exit(void)
 {
+       allow_smaller_maxphyaddr = false;
+
 #ifdef CONFIG_KEXEC_CORE
        RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
        synchronize_rcu();
 #endif
+       vmx_cleanup_l1d_flush();
+}
 
+static void vmx_exit(void)
+{
        kvm_exit();
+       kvm_x86_vendor_exit();
 
-#if IS_ENABLED(CONFIG_HYPERV)
-       if (static_branch_unlikely(&enable_evmcs)) {
-               int cpu;
-               struct hv_vp_assist_page *vp_ap;
-               /*
-                * Reset everything to support using non-enlightened VMCS
-                * access later (e.g. when we reload the module with
-                * enlightened_vmcs=0)
-                */
-               for_each_online_cpu(cpu) {
-                       vp_ap = hv_get_vp_assist_page(cpu);
-
-                       if (!vp_ap)
-                               continue;
-
-                       vp_ap->nested_control.features.directhypercall = 0;
-                       vp_ap->current_nested_vmcs = 0;
-                       vp_ap->enlighten_vmentry = 0;
-               }
-
-               static_branch_disable(&enable_evmcs);
-       }
-#endif
-       vmx_cleanup_l1d_flush();
-
-       allow_smaller_maxphyaddr = false;
+       __vmx_exit();
 }
 module_exit(vmx_exit);
 
@@ -8503,56 +8557,29 @@ static int __init vmx_init(void)
 {
        int r, cpu;
 
-#if IS_ENABLED(CONFIG_HYPERV)
+       if (!kvm_is_vmx_supported())
+               return -EOPNOTSUPP;
+
        /*
-        * Enlightened VMCS usage should be recommended and the host needs
-        * to support eVMCS v1 or above. We can also disable eVMCS support
-        * with module parameter.
+        * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
+        * to unwind if a later step fails.
         */
-       if (enlightened_vmcs &&
-           ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
-           (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
-           KVM_EVMCS_VERSION) {
+       hv_init_evmcs();
 
-               /* Check that we have assist pages on all online CPUs */
-               for_each_online_cpu(cpu) {
-                       if (!hv_get_vp_assist_page(cpu)) {
-                               enlightened_vmcs = false;
-                               break;
-                       }
-               }
-
-               if (enlightened_vmcs) {
-                       pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
-                       static_branch_enable(&enable_evmcs);
-               }
-
-               if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
-                       vmx_x86_ops.enable_l2_tlb_flush
-                               = hv_enable_l2_tlb_flush;
-
-       } else {
-               enlightened_vmcs = false;
-       }
-#endif
-
-       r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
-                    __alignof__(struct vcpu_vmx), THIS_MODULE);
+       r = kvm_x86_vendor_init(&vmx_init_ops);
        if (r)
                return r;
 
        /*
-        * Must be called after kvm_init() so enable_ept is properly set
+        * Must be called after common x86 init so enable_ept is properly set
         * up. Hand the parameter mitigation value in which was stored in
         * the pre module init parser. If no parameter was given, it will
         * contain 'auto' which will be turned into the default 'cond'
         * mitigation mode.
         */
        r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
-       if (r) {
-               vmx_exit();
-               return r;
-       }
+       if (r)
+               goto err_l1d_flush;
 
        vmx_setup_fb_clear_ctrl();
 
@@ -8576,6 +8603,21 @@ static int __init vmx_init(void)
        if (!enable_ept)
                allow_smaller_maxphyaddr = true;
 
+       /*
+        * Common KVM initialization _must_ come last, after this, /dev/kvm is
+        * exposed to userspace!
+        */
+       r = kvm_init(sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx),
+                    THIS_MODULE);
+       if (r)
+               goto err_kvm_init;
+
        return 0;
+
+err_kvm_init:
+       __vmx_exit();
+err_l1d_flush:
+       kvm_x86_vendor_exit();
+       return r;
 }
 module_init(vmx_init);