KVM: stats: Separate generic stats from architecture specific ones
[linux-2.6-microblaze.git] / arch / x86 / kvm / x86.c
index 8085ab8..7120233 100644 (file)
@@ -103,6 +103,8 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 
 static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
 
+#define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE)
+
 #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
                                     KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
 
@@ -233,10 +235,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        VCPU_STAT("irq_window", irq_window_exits),
        VCPU_STAT("nmi_window", nmi_window_exits),
        VCPU_STAT("halt_exits", halt_exits),
-       VCPU_STAT("halt_successful_poll", halt_successful_poll),
-       VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
-       VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
-       VCPU_STAT("halt_wakeup", halt_wakeup),
+       VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
+       VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
+       VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
+       VCPU_STAT("halt_wakeup", generic.halt_wakeup),
        VCPU_STAT("hypercalls", hypercalls),
        VCPU_STAT("request_irq", request_irq_exits),
        VCPU_STAT("irq_exits", irq_exits),
@@ -248,8 +250,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        VCPU_STAT("nmi_injections", nmi_injections),
        VCPU_STAT("req_event", req_event),
        VCPU_STAT("l1d_flush", l1d_flush),
-       VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
-       VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+       VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
+       VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
        VCPU_STAT("nested_run", nested_run),
        VCPU_STAT("directed_yield_attempted", directed_yield_attempted),
        VCPU_STAT("directed_yield_successful", directed_yield_successful),
@@ -261,7 +263,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        VM_STAT("mmu_recycled", mmu_recycled),
        VM_STAT("mmu_cache_miss", mmu_cache_miss),
        VM_STAT("mmu_unsync", mmu_unsync),
-       VM_STAT("remote_tlb_flush", remote_tlb_flush),
+       VM_STAT("remote_tlb_flush", generic.remote_tlb_flush),
        VM_STAT("largepages", lpages, .mode = 0444),
        VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444),
        VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions),
@@ -820,6 +822,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 
        memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
        kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+       vcpu->arch.pdptrs_from_userspace = false;
+
 out:
 
        return ret;
@@ -1060,26 +1064,46 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr4);
 
+static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
+{
+       struct kvm_mmu *mmu = vcpu->arch.mmu;
+       unsigned long roots_to_free = 0;
+       int i;
+
+       /*
+        * If neither the current CR3 nor any of the prev_roots use the given
+        * PCID, then nothing needs to be done here because a resync will
+        * happen anyway before switching to any other CR3.
+        */
+       if (kvm_get_active_pcid(vcpu) == pcid) {
+               kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+               kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+       }
+
+       for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+               if (kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd) == pcid)
+                       roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+
+       kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
+}
+
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
        bool skip_tlb_flush = false;
+       unsigned long pcid = 0;
 #ifdef CONFIG_X86_64
        bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
 
        if (pcid_enabled) {
                skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
                cr3 &= ~X86_CR3_PCID_NOFLUSH;
+               pcid = cr3 & X86_CR3_PCID_MASK;
        }
 #endif
 
        /* PDPTRs are always reloaded for PAE paging. */
-       if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu)) {
-               if (!skip_tlb_flush) {
-                       kvm_mmu_sync_roots(vcpu);
-                       kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
-               }
-               return 0;
-       }
+       if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu))
+               goto handle_tlb_flush;
 
        /*
         * Do not condition the GPA check on long mode, this helper is used to
@@ -1092,10 +1116,23 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
        if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
                return 1;
 
-       kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush);
+       if (cr3 != kvm_read_cr3(vcpu))
+               kvm_mmu_new_pgd(vcpu, cr3);
+
        vcpu->arch.cr3 = cr3;
        kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
+handle_tlb_flush:
+       /*
+        * A load of CR3 that flushes the TLB flushes only the current PCID,
+        * even if PCID is disabled, in which case PCID=0 is flushed.  It's a
+        * moot point in the end because _disabling_ PCID will flush all PCIDs,
+        * and it's impossible to use a non-zero PCID when PCID is disabled,
+        * i.e. only PCID=0 can be relevant.
+        */
+       if (!skip_tlb_flush)
+               kvm_invalidate_pcid(vcpu, pcid);
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -3526,7 +3563,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_IA32_LASTBRANCHTOIP:
        case MSR_IA32_LASTINTFROMIP:
        case MSR_IA32_LASTINTTOIP:
-       case MSR_K8_SYSCFG:
+       case MSR_AMD64_SYSCFG:
        case MSR_K8_TSEG_ADDR:
        case MSR_K8_TSEG_MASK:
        case MSR_VM_HSAVE_PA:
@@ -3961,6 +3998,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_SREGS2:
                r = 1;
                break;
+       case KVM_CAP_EXIT_HYPERCALL:
+               r = KVM_EXIT_HYPERCALL_VALID_MASK;
+               break;
        case KVM_CAP_SET_GUEST_DEBUG2:
                return KVM_GUESTDBG_VALID_MASK;
 #ifdef CONFIG_KVM_XEN
@@ -5587,6 +5627,14 @@ split_irqchip_unlock:
                if (kvm_x86_ops.vm_copy_enc_context_from)
                        r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
                return r;
+       case KVM_CAP_EXIT_HYPERCALL:
+               if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
+                       r = -EINVAL;
+                       break;
+               }
+               kvm->arch.hypercall_exit_enabled = cap->args[0];
+               r = 0;
+               break;
        default:
                r = -EINVAL;
                break;
@@ -8513,6 +8561,17 @@ no_yield:
        return;
 }
 
+static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
+{
+       u64 ret = vcpu->run->hypercall.ret;
+
+       if (!is_64_bit_mode(vcpu))
+               ret = (u32)ret;
+       kvm_rax_write(vcpu, ret);
+       ++vcpu->stat.hypercalls;
+       return kvm_skip_emulated_instruction(vcpu);
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
        unsigned long nr, a0, a1, a2, a3, ret;
@@ -8578,6 +8637,28 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
                kvm_sched_yield(vcpu, a0);
                ret = 0;
                break;
+       case KVM_HC_MAP_GPA_RANGE: {
+               u64 gpa = a0, npages = a1, attrs = a2;
+
+               ret = -KVM_ENOSYS;
+               if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE)))
+                       break;
+
+               if (!PAGE_ALIGNED(gpa) || !npages ||
+                   gpa_to_gfn(gpa) + npages <= gpa_to_gfn(gpa)) {
+                       ret = -KVM_EINVAL;
+                       break;
+               }
+
+               vcpu->run->exit_reason        = KVM_EXIT_HYPERCALL;
+               vcpu->run->hypercall.nr       = KVM_HC_MAP_GPA_RANGE;
+               vcpu->run->hypercall.args[0]  = gpa;
+               vcpu->run->hypercall.args[1]  = npages;
+               vcpu->run->hypercall.args[2]  = attrs;
+               vcpu->run->hypercall.longmode = op_64_bit;
+               vcpu->arch.complete_userspace_io = complete_hypercall_exit;
+               return 0;
+       }
        default:
                ret = -KVM_ENOSYS;
                break;
@@ -9102,6 +9183,15 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
        vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
        kvm_apic_update_apicv(vcpu);
        static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
+
+       /*
+        * When APICv gets disabled, we may still have injected interrupts
+        * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
+        * still active when the interrupt got accepted. Make sure
+        * inject_pending_event() is called to check for that.
+        */
+       if (!vcpu->arch.apicv_active)
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
 
@@ -9277,7 +9367,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                }
                if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
                        kvm_vcpu_flush_tlb_current(vcpu);
-               if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+               if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
                        kvm_vcpu_flush_tlb_guest(vcpu);
 
                if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
@@ -10265,6 +10355,7 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
 
                kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
                mmu_reset_needed = 1;
+               vcpu->arch.pdptrs_from_userspace = true;
        }
        if (mmu_reset_needed)
                kvm_mmu_reset_context(vcpu);
@@ -10570,7 +10661,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        vcpu_load(vcpu);
        kvm_set_tsc_khz(vcpu, max_tsc_khz);
        kvm_vcpu_reset(vcpu, false);
-       kvm_init_mmu(vcpu, false);
+       kvm_init_mmu(vcpu);
        vcpu_put(vcpu);
        return 0;
 
@@ -10829,6 +10920,9 @@ int kvm_arch_hardware_setup(void *opaque)
        int r;
 
        rdmsrl_safe(MSR_EFER, &host_efer);
+       if (WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_NX) &&
+                        !(host_efer & EFER_NX)))
+               return -EIO;
 
        if (boot_cpu_has(X86_FEATURE_XSAVES))
                rdmsrl(MSR_IA32_XSS, host_xss);
@@ -11949,8 +12043,6 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
 {
        bool pcid_enabled;
        struct x86_exception e;
-       unsigned i;
-       unsigned long roots_to_free = 0;
        struct {
                u64 pcid;
                u64 gla;
@@ -11984,23 +12076,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
                        return 1;
                }
 
-               if (kvm_get_active_pcid(vcpu) == operand.pcid) {
-                       kvm_mmu_sync_roots(vcpu);
-                       kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
-               }
-
-               for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-                       if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd)
-                           == operand.pcid)
-                               roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
-
-               kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
-               /*
-                * If neither the current cr3 nor any of the prev_roots use the
-                * given PCID, then nothing needs to be done here because a
-                * resync will happen anyway before switching to any other CR3.
-                */
-
+               kvm_invalidate_pcid(vcpu, operand.pcid);
                return kvm_skip_emulated_instruction(vcpu);
 
        case INVPCID_TYPE_ALL_NON_GLOBAL:
@@ -12013,7 +12089,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
 
                fallthrough;
        case INVPCID_TYPE_ALL_INCL_GLOBAL:
-               kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+               kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
                return kvm_skip_emulated_instruction(vcpu);
 
        default: