KVM: nVMX: Don't leak L1 MMIO regions to L2
[linux-2.6-microblaze.git] / arch / x86 / kvm / x86.c
index 92b6690..ff395f8 100644 (file)
@@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
 static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 #endif
 
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
+#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
 
 #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
                                     KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
@@ -212,7 +212,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
        { "mmu_unsync", VM_STAT(mmu_unsync) },
        { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
-       { "largepages", VM_STAT(lpages) },
+       { "largepages", VM_STAT(lpages, .mode = 0444) },
        { "max_mmu_page_hash_collisions",
                VM_STAT(max_mmu_page_hash_collisions) },
        { NULL }
@@ -360,7 +360,7 @@ EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 asmlinkage __visible void kvm_spurious_fault(void)
 {
        /* Fault while not rebooting.  We want the trace. */
-       BUG();
+       BUG_ON(!kvm_rebooting);
 }
 EXPORT_SYMBOL_GPL(kvm_spurious_fault);
 
@@ -884,34 +884,42 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 }
 EXPORT_SYMBOL_GPL(kvm_set_xcr);
 
-int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-       unsigned long old_cr4 = kvm_read_cr4(vcpu);
-       unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
-                                  X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
-
        if (cr4 & CR4_RESERVED_BITS)
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
-               return 1;
+               return -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
+               return -EINVAL;
+
+       return 0;
+}
+
+int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+       unsigned long old_cr4 = kvm_read_cr4(vcpu);
+       unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+                                  X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
+
+       if (kvm_valid_cr4(vcpu, cr4))
                return 1;
 
        if (is_long_mode(vcpu)) {
@@ -1145,6 +1153,8 @@ static u32 msrs_to_save[] = {
        MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
        MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
        MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
+       MSR_IA32_UMWAIT_CONTROL,
+
        MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
        MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
        MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
@@ -1158,13 +1168,6 @@ static u32 msrs_to_save[] = {
        MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
        MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
        MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
-       MSR_ARCH_PERFMON_PERFCTR0 + 18, MSR_ARCH_PERFMON_PERFCTR0 + 19,
-       MSR_ARCH_PERFMON_PERFCTR0 + 20, MSR_ARCH_PERFMON_PERFCTR0 + 21,
-       MSR_ARCH_PERFMON_PERFCTR0 + 22, MSR_ARCH_PERFMON_PERFCTR0 + 23,
-       MSR_ARCH_PERFMON_PERFCTR0 + 24, MSR_ARCH_PERFMON_PERFCTR0 + 25,
-       MSR_ARCH_PERFMON_PERFCTR0 + 26, MSR_ARCH_PERFMON_PERFCTR0 + 27,
-       MSR_ARCH_PERFMON_PERFCTR0 + 28, MSR_ARCH_PERFMON_PERFCTR0 + 29,
-       MSR_ARCH_PERFMON_PERFCTR0 + 30, MSR_ARCH_PERFMON_PERFCTR0 + 31,
        MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
        MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
        MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
@@ -1174,13 +1177,6 @@ static u32 msrs_to_save[] = {
        MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
        MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
        MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
-       MSR_ARCH_PERFMON_EVENTSEL0 + 18, MSR_ARCH_PERFMON_EVENTSEL0 + 19,
-       MSR_ARCH_PERFMON_EVENTSEL0 + 20, MSR_ARCH_PERFMON_EVENTSEL0 + 21,
-       MSR_ARCH_PERFMON_EVENTSEL0 + 22, MSR_ARCH_PERFMON_EVENTSEL0 + 23,
-       MSR_ARCH_PERFMON_EVENTSEL0 + 24, MSR_ARCH_PERFMON_EVENTSEL0 + 25,
-       MSR_ARCH_PERFMON_EVENTSEL0 + 26, MSR_ARCH_PERFMON_EVENTSEL0 + 27,
-       MSR_ARCH_PERFMON_EVENTSEL0 + 28, MSR_ARCH_PERFMON_EVENTSEL0 + 29,
-       MSR_ARCH_PERFMON_EVENTSEL0 + 30, MSR_ARCH_PERFMON_EVENTSEL0 + 31,
 };
 
 static unsigned num_msrs_to_save;
@@ -2540,6 +2536,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 static void kvmclock_reset(struct kvm_vcpu *vcpu)
 {
        vcpu->arch.pv_time_enabled = false;
+       vcpu->arch.time = 0;
 }
 
 static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
@@ -2705,8 +2702,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_KVM_SYSTEM_TIME: {
                struct kvm_arch *ka = &vcpu->kvm->arch;
 
-               kvmclock_reset(vcpu);
-
                if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
                        bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
 
@@ -2720,14 +2715,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
 
                /* we verify if the enable bit is set... */
+               vcpu->arch.pv_time_enabled = false;
                if (!(data & 1))
                        break;
 
-               if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+               if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
                     &vcpu->arch.pv_time, data & ~1ULL,
                     sizeof(struct pvclock_vcpu_time_info)))
-                       vcpu->arch.pv_time_enabled = false;
-               else
                        vcpu->arch.pv_time_enabled = true;
 
                break;
@@ -5094,13 +5088,14 @@ out:
 
 static void kvm_init_msr_list(void)
 {
+       struct x86_pmu_capability x86_pmu;
        u32 dummy[2];
        unsigned i, j;
 
        BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
                         "Please update the fixed PMCs in msrs_to_save[]");
-       BUILD_BUG_ON_MSG(INTEL_PMC_MAX_GENERIC != 32,
-                        "Please update the generic perfctr/eventsel MSRs in msrs_to_save[]");
+
+       perf_get_x86_pmu_capability(&x86_pmu);
 
        for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
                if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
@@ -5142,6 +5137,15 @@ static void kvm_init_msr_list(void)
                                intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
                                continue;
                        break;
+               case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
+                       if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
+                           min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
+                               continue;
+                       break;
+               case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
+                       if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
+                           min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
+                               continue;
                }
                default:
                        break;
@@ -5451,7 +5455,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
                emul_type = EMULTYPE_TRAP_UD_FORCED;
        }
 
-       return kvm_emulate_instruction(vcpu, emul_type) != EMULATE_USER_EXIT;
+       return kvm_emulate_instruction(vcpu, emul_type);
 }
 EXPORT_SYMBOL_GPL(handle_ud);
 
@@ -6302,14 +6306,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
 
        if (emulation_type & EMULTYPE_VMWARE_GP) {
                kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
-               return EMULATE_DONE;
+               return 1;
        }
 
        if (emulation_type & EMULTYPE_SKIP) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                vcpu->run->internal.ndata = 0;
-               return EMULATE_USER_EXIT;
+               return 0;
        }
 
        kvm_queue_exception(vcpu, UD_VECTOR);
@@ -6318,10 +6322,10 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                vcpu->run->internal.ndata = 0;
-               return EMULATE_USER_EXIT;
+               return 0;
        }
 
-       return EMULATE_DONE;
+       return 1;
 }
 
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
@@ -6485,10 +6489,10 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
                kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
                kvm_run->debug.arch.exception = DB_VECTOR;
                kvm_run->exit_reason = KVM_EXIT_DEBUG;
-               return EMULATE_USER_EXIT;
+               return 0;
        }
        kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
-       return EMULATE_DONE;
+       return 1;
 }
 
 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
@@ -6497,7 +6501,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
        int r;
 
        r = kvm_x86_ops->skip_emulated_instruction(vcpu);
-       if (unlikely(r != EMULATE_DONE))
+       if (unlikely(!r))
                return 0;
 
        /*
@@ -6510,7 +6514,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
         */
        if (unlikely(rflags & X86_EFLAGS_TF))
                r = kvm_vcpu_do_singlestep(vcpu);
-       return r == EMULATE_DONE;
+       return r;
 }
 EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
 
@@ -6529,7 +6533,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
                        kvm_run->debug.arch.pc = eip;
                        kvm_run->debug.arch.exception = DB_VECTOR;
                        kvm_run->exit_reason = KVM_EXIT_DEBUG;
-                       *r = EMULATE_USER_EXIT;
+                       *r = 0;
                        return true;
                }
        }
@@ -6545,7 +6549,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
                        vcpu->arch.dr6 &= ~DR_TRAP_BITS;
                        vcpu->arch.dr6 |= dr6 | DR6_RTM;
                        kvm_queue_exception(vcpu, DB_VECTOR);
-                       *r = EMULATE_DONE;
+                       *r = 1;
                        return true;
                }
        }
@@ -6632,11 +6636,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                        if ((emulation_type & EMULTYPE_TRAP_UD) ||
                            (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
                                kvm_queue_exception(vcpu, UD_VECTOR);
-                               return EMULATE_DONE;
+                               return 1;
                        }
                        if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
                                                emulation_type))
-                               return EMULATE_DONE;
+                               return 1;
                        if (ctxt->have_exception) {
                                /*
                                 * #UD should result in just EMULATION_FAILED, and trap-like
@@ -6645,7 +6649,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                                WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
                                             exception_type(ctxt->exception.vector) == EXCPT_TRAP);
                                inject_emulated_exception(vcpu);
-                               return EMULATE_DONE;
+                               return 1;
                        }
                        return handle_emulation_failure(vcpu, emulation_type);
                }
@@ -6654,19 +6658,23 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
        if ((emulation_type & EMULTYPE_VMWARE_GP) &&
            !is_vmware_backdoor_opcode(ctxt)) {
                kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
-               return EMULATE_DONE;
+               return 1;
        }
 
+       /*
+        * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks
+        * for kvm_skip_emulated_instruction().  The caller is responsible for
+        * updating interruptibility state and injecting single-step #DBs.
+        */
        if (emulation_type & EMULTYPE_SKIP) {
                kvm_rip_write(vcpu, ctxt->_eip);
                if (ctxt->eflags & X86_EFLAGS_RF)
                        kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
-               kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
-               return EMULATE_DONE;
+               return 1;
        }
 
        if (retry_instruction(ctxt, cr2, emulation_type))
-               return EMULATE_DONE;
+               return 1;
 
        /* this is needed for vmware backdoor interface to work since it
           changes registers values  during IO operation */
@@ -6682,18 +6690,18 @@ restart:
        r = x86_emulate_insn(ctxt);
 
        if (r == EMULATION_INTERCEPTED)
-               return EMULATE_DONE;
+               return 1;
 
        if (r == EMULATION_FAILED) {
                if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
                                        emulation_type))
-                       return EMULATE_DONE;
+                       return 1;
 
                return handle_emulation_failure(vcpu, emulation_type);
        }
 
        if (ctxt->have_exception) {
-               r = EMULATE_DONE;
+               r = 1;
                if (inject_emulated_exception(vcpu))
                        return r;
        } else if (vcpu->arch.pio.count) {
@@ -6704,18 +6712,18 @@ restart:
                        writeback = false;
                        vcpu->arch.complete_userspace_io = complete_emulated_pio;
                }
-               r = EMULATE_USER_EXIT;
+               r = 0;
        } else if (vcpu->mmio_needed) {
                ++vcpu->stat.mmio_exits;
 
                if (!vcpu->mmio_is_write)
                        writeback = false;
-               r = EMULATE_USER_EXIT;
+               r = 0;
                vcpu->arch.complete_userspace_io = complete_emulated_mmio;
        } else if (r == EMULATION_RESTART)
                goto restart;
        else
-               r = EMULATE_DONE;
+               r = 1;
 
        if (writeback) {
                unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
@@ -6724,7 +6732,7 @@ restart:
                if (!ctxt->have_exception ||
                    exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
                        kvm_rip_write(vcpu, ctxt->eip);
-                       if (r == EMULATE_DONE && ctxt->tf)
+                       if (r && ctxt->tf)
                                r = kvm_vcpu_do_singlestep(vcpu);
                        __kvm_set_rflags(vcpu, ctxt->eflags);
                }
@@ -7930,8 +7938,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        bool req_immediate_exit = false;
 
        if (kvm_request_pending(vcpu)) {
-               if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
-                       kvm_x86_ops->get_vmcs12_pages(vcpu);
+               if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
+                       if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) {
+                               r = 0;
+                               goto out;
+                       }
+               }
                if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
                        kvm_mmu_unload(vcpu);
                if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@ -8319,12 +8331,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
 {
        int r;
+
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
        r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-       if (r != EMULATE_DONE)
-               return 0;
-       return 1;
+       return r;
 }
 
 static int complete_emulated_pio(struct kvm_vcpu *vcpu)
@@ -8696,22 +8707,18 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                vcpu->run->internal.ndata = 0;
-               return EMULATE_USER_EXIT;
+               return 0;
        }
 
        kvm_rip_write(vcpu, ctxt->eip);
        kvm_set_rflags(vcpu, ctxt->eflags);
        kvm_make_request(KVM_REQ_EVENT, vcpu);
-       return EMULATE_DONE;
+       return 1;
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
 static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
-       if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-                       (sregs->cr4 & X86_CR4_OSXSAVE))
-               return  -EINVAL;
-
        if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
                /*
                 * When EFER.LME and CR0.PG are set, the processor is in
@@ -8730,7 +8737,7 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
                        return -EINVAL;
        }
 
-       return 0;
+       return kvm_valid_cr4(vcpu, sregs->cr4);
 }
 
 static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
@@ -9420,6 +9427,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+       INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
        atomic_set(&kvm->arch.noncoherent_dma_count, 0);