KVM: stats: Separate generic stats from architecture specific ones

[linux-2.6-microblaze.git] / arch / x86 / kvm / x86.c
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 8085ab8..7120233 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -103,6 +103,8 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
  
  static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
  
+#define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE)
+
  #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
                                      KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
  
@@ -233,10 +235,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
         VCPU_STAT("irq_window", irq_window_exits),
         VCPU_STAT("nmi_window", nmi_window_exits),
         VCPU_STAT("halt_exits", halt_exits),
-       VCPU_STAT("halt_successful_poll", halt_successful_poll),
-       VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
-       VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
-       VCPU_STAT("halt_wakeup", halt_wakeup),
+       VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
+       VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
+       VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
+       VCPU_STAT("halt_wakeup", generic.halt_wakeup),
         VCPU_STAT("hypercalls", hypercalls),
         VCPU_STAT("request_irq", request_irq_exits),
         VCPU_STAT("irq_exits", irq_exits),
@@ -248,8 +250,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
         VCPU_STAT("nmi_injections", nmi_injections),
         VCPU_STAT("req_event", req_event),
         VCPU_STAT("l1d_flush", l1d_flush),
-       VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
-       VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+       VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
+       VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
         VCPU_STAT("nested_run", nested_run),
         VCPU_STAT("directed_yield_attempted", directed_yield_attempted),
         VCPU_STAT("directed_yield_successful", directed_yield_successful),
@@ -261,7 +263,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
         VM_STAT("mmu_recycled", mmu_recycled),
         VM_STAT("mmu_cache_miss", mmu_cache_miss),
         VM_STAT("mmu_unsync", mmu_unsync),
-       VM_STAT("remote_tlb_flush", remote_tlb_flush),
+       VM_STAT("remote_tlb_flush", generic.remote_tlb_flush),
         VM_STAT("largepages", lpages, .mode = 0444),
         VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444),
         VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions),
@@ -820,6 +822,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
  
         memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
         kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+       vcpu->arch.pdptrs_from_userspace = false;
+
  out:
  
         return ret;
@@ -1060,26 +1064,46 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  }
  EXPORT_SYMBOL_GPL(kvm_set_cr4);
  
+static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
+{
+       struct kvm_mmu *mmu = vcpu->arch.mmu;
+       unsigned long roots_to_free = 0;
+       int i;
+
+       /*
+        * If neither the current CR3 nor any of the prev_roots use the given
+        * PCID, then nothing needs to be done here because a resync will
+        * happen anyway before switching to any other CR3.
+        */
+       if (kvm_get_active_pcid(vcpu) == pcid) {
+               kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+               kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+       }
+
+       for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+               if (kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd) == pcid)
+                       roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+
+       kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
+}
+
  int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
  {
         bool skip_tlb_flush = false;
+       unsigned long pcid = 0;
  #ifdef CONFIG_X86_64
         bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
  
         if (pcid_enabled) {
                 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
                 cr3 &= ~X86_CR3_PCID_NOFLUSH;
+               pcid = cr3 & X86_CR3_PCID_MASK;
         }
  #endif
  
         /* PDPTRs are always reloaded for PAE paging. */
-       if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu)) {
-               if (!skip_tlb_flush) {
-                       kvm_mmu_sync_roots(vcpu);
-                       kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
-               }
-               return 0;
-       }
+       if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu))
+               goto handle_tlb_flush;
  
         /*
          * Do not condition the GPA check on long mode, this helper is used to
@@ -1092,10 +1116,23 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
         if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
                 return 1;
  
-       kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush);
+       if (cr3 != kvm_read_cr3(vcpu))
+               kvm_mmu_new_pgd(vcpu, cr3);
+
         vcpu->arch.cr3 = cr3;
         kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
  
+handle_tlb_flush:
+       /*
+        * A load of CR3 that flushes the TLB flushes only the current PCID,
+        * even if PCID is disabled, in which case PCID=0 is flushed.  It's a
+        * moot point in the end because _disabling_ PCID will flush all PCIDs,
+        * and it's impossible to use a non-zero PCID when PCID is disabled,
+        * i.e. only PCID=0 can be relevant.
+        */
+       if (!skip_tlb_flush)
+               kvm_invalidate_pcid(vcpu, pcid);
+
         return 0;
  }
  EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -3526,7 +3563,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
         case MSR_IA32_LASTBRANCHTOIP:
         case MSR_IA32_LASTINTFROMIP:
         case MSR_IA32_LASTINTTOIP:
-       case MSR_K8_SYSCFG:
+       case MSR_AMD64_SYSCFG:
         case MSR_K8_TSEG_ADDR:
         case MSR_K8_TSEG_MASK:
         case MSR_VM_HSAVE_PA:
@@ -3961,6 +3998,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
         case KVM_CAP_SREGS2:
                 r = 1;
                 break;
+       case KVM_CAP_EXIT_HYPERCALL:
+               r = KVM_EXIT_HYPERCALL_VALID_MASK;
+               break;
         case KVM_CAP_SET_GUEST_DEBUG2:
                 return KVM_GUESTDBG_VALID_MASK;
  #ifdef CONFIG_KVM_XEN
@@ -5587,6 +5627,14 @@ split_irqchip_unlock:
                 if (kvm_x86_ops.vm_copy_enc_context_from)
                         r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
                 return r;
+       case KVM_CAP_EXIT_HYPERCALL:
+               if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
+                       r = -EINVAL;
+                       break;
+               }
+               kvm->arch.hypercall_exit_enabled = cap->args[0];
+               r = 0;
+               break;
         default:
                 r = -EINVAL;
                 break;
@@ -8513,6 +8561,17 @@ no_yield:
         return;
  }
  
+static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
+{
+       u64 ret = vcpu->run->hypercall.ret;
+
+       if (!is_64_bit_mode(vcpu))
+               ret = (u32)ret;
+       kvm_rax_write(vcpu, ret);
+       ++vcpu->stat.hypercalls;
+       return kvm_skip_emulated_instruction(vcpu);
+}
+
  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
  {
         unsigned long nr, a0, a1, a2, a3, ret;
@@ -8578,6 +8637,28 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
                 kvm_sched_yield(vcpu, a0);
                 ret = 0;
                 break;
+       case KVM_HC_MAP_GPA_RANGE: {
+               u64 gpa = a0, npages = a1, attrs = a2;
+
+               ret = -KVM_ENOSYS;
+               if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE)))
+                       break;
+
+               if (!PAGE_ALIGNED(gpa) || !npages ||
+                   gpa_to_gfn(gpa) + npages <= gpa_to_gfn(gpa)) {
+                       ret = -KVM_EINVAL;
+                       break;
+               }
+
+               vcpu->run->exit_reason        = KVM_EXIT_HYPERCALL;
+               vcpu->run->hypercall.nr       = KVM_HC_MAP_GPA_RANGE;
+               vcpu->run->hypercall.args[0]  = gpa;
+               vcpu->run->hypercall.args[1]  = npages;
+               vcpu->run->hypercall.args[2]  = attrs;
+               vcpu->run->hypercall.longmode = op_64_bit;
+               vcpu->arch.complete_userspace_io = complete_hypercall_exit;
+               return 0;
+       }
         default:
                 ret = -KVM_ENOSYS;
                 break;
@@ -9102,6 +9183,15 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
         vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
         kvm_apic_update_apicv(vcpu);
         static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
+
+       /*
+        * When APICv gets disabled, we may still have injected interrupts
+        * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
+        * still active when the interrupt got accepted. Make sure
+        * inject_pending_event() is called to check for that.
+        */
+       if (!vcpu->arch.apicv_active)
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
  }
  EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
  
@@ -9277,7 +9367,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                 }
                 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
                         kvm_vcpu_flush_tlb_current(vcpu);
-               if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+               if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
                         kvm_vcpu_flush_tlb_guest(vcpu);
  
                 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
@@ -10265,6 +10355,7 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
  
                 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
                 mmu_reset_needed = 1;
+               vcpu->arch.pdptrs_from_userspace = true;
         }
         if (mmu_reset_needed)
                 kvm_mmu_reset_context(vcpu);
@@ -10570,7 +10661,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
         vcpu_load(vcpu);
         kvm_set_tsc_khz(vcpu, max_tsc_khz);
         kvm_vcpu_reset(vcpu, false);
-       kvm_init_mmu(vcpu, false);
+       kvm_init_mmu(vcpu);
         vcpu_put(vcpu);
         return 0;
  
@@ -10829,6 +10920,9 @@ int kvm_arch_hardware_setup(void *opaque)
         int r;
  
         rdmsrl_safe(MSR_EFER, &host_efer);
+       if (WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_NX) &&
+                        !(host_efer & EFER_NX)))
+               return -EIO;
  
         if (boot_cpu_has(X86_FEATURE_XSAVES))
                 rdmsrl(MSR_IA32_XSS, host_xss);
@@ -11949,8 +12043,6 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
  {
         bool pcid_enabled;
         struct x86_exception e;
-       unsigned i;
-       unsigned long roots_to_free = 0;
         struct {
                 u64 pcid;
                 u64 gla;
@@ -11984,23 +12076,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
                         return 1;
                 }
  
-               if (kvm_get_active_pcid(vcpu) == operand.pcid) {
-                       kvm_mmu_sync_roots(vcpu);
-                       kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
-               }
-
-               for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-                       if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd)
-                           == operand.pcid)
-                               roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
-
-               kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
-               /*
-                * If neither the current cr3 nor any of the prev_roots use the
-                * given PCID, then nothing needs to be done here because a
-                * resync will happen anyway before switching to any other CR3.
-                */
-
+               kvm_invalidate_pcid(vcpu, operand.pcid);
                 return kvm_skip_emulated_instruction(vcpu);
  
         case INVPCID_TYPE_ALL_NON_GLOBAL:
@@ -12013,7 +12089,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
  
                 fallthrough;
         case INVPCID_TYPE_ALL_INCL_GLOBAL:
-               kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+               kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
                 return kvm_skip_emulated_instruction(vcpu);
  
         default: