KVM: x86: track manually whether an event has been injected
[linux-2.6-microblaze.git] / arch / x86 / kvm / x86.c
index c5835f9..77b9b4e 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <linux/kvm_host.h>
 #include "irq.h"
+#include "ioapic.h"
 #include "mmu.h"
 #include "i8254.h"
 #include "tss.h"
@@ -97,9 +98,6 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 
 static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
 
-#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
-#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
-
 #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
                                     KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
 
@@ -194,45 +192,46 @@ u64 __read_mostly supported_xss;
 EXPORT_SYMBOL_GPL(supported_xss);
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
-       { "pf_fixed", VCPU_STAT(pf_fixed) },
-       { "pf_guest", VCPU_STAT(pf_guest) },
-       { "tlb_flush", VCPU_STAT(tlb_flush) },
-       { "invlpg", VCPU_STAT(invlpg) },
-       { "exits", VCPU_STAT(exits) },
-       { "io_exits", VCPU_STAT(io_exits) },
-       { "mmio_exits", VCPU_STAT(mmio_exits) },
-       { "signal_exits", VCPU_STAT(signal_exits) },
-       { "irq_window", VCPU_STAT(irq_window_exits) },
-       { "nmi_window", VCPU_STAT(nmi_window_exits) },
-       { "halt_exits", VCPU_STAT(halt_exits) },
-       { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
-       { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
-       { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
-       { "halt_wakeup", VCPU_STAT(halt_wakeup) },
-       { "hypercalls", VCPU_STAT(hypercalls) },
-       { "request_irq", VCPU_STAT(request_irq_exits) },
-       { "irq_exits", VCPU_STAT(irq_exits) },
-       { "host_state_reload", VCPU_STAT(host_state_reload) },
-       { "fpu_reload", VCPU_STAT(fpu_reload) },
-       { "insn_emulation", VCPU_STAT(insn_emulation) },
-       { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
-       { "irq_injections", VCPU_STAT(irq_injections) },
-       { "nmi_injections", VCPU_STAT(nmi_injections) },
-       { "req_event", VCPU_STAT(req_event) },
-       { "l1d_flush", VCPU_STAT(l1d_flush) },
-       { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
-       { "mmu_pte_write", VM_STAT(mmu_pte_write) },
-       { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
-       { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
-       { "mmu_flooded", VM_STAT(mmu_flooded) },
-       { "mmu_recycled", VM_STAT(mmu_recycled) },
-       { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
-       { "mmu_unsync", VM_STAT(mmu_unsync) },
-       { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
-       { "largepages", VM_STAT(lpages, .mode = 0444) },
-       { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
-       { "max_mmu_page_hash_collisions",
-               VM_STAT(max_mmu_page_hash_collisions) },
+       VCPU_STAT("pf_fixed", pf_fixed),
+       VCPU_STAT("pf_guest", pf_guest),
+       VCPU_STAT("tlb_flush", tlb_flush),
+       VCPU_STAT("invlpg", invlpg),
+       VCPU_STAT("exits", exits),
+       VCPU_STAT("io_exits", io_exits),
+       VCPU_STAT("mmio_exits", mmio_exits),
+       VCPU_STAT("signal_exits", signal_exits),
+       VCPU_STAT("irq_window", irq_window_exits),
+       VCPU_STAT("nmi_window", nmi_window_exits),
+       VCPU_STAT("halt_exits", halt_exits),
+       VCPU_STAT("halt_successful_poll", halt_successful_poll),
+       VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+       VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+       VCPU_STAT("halt_wakeup", halt_wakeup),
+       VCPU_STAT("hypercalls", hypercalls),
+       VCPU_STAT("request_irq", request_irq_exits),
+       VCPU_STAT("irq_exits", irq_exits),
+       VCPU_STAT("host_state_reload", host_state_reload),
+       VCPU_STAT("fpu_reload", fpu_reload),
+       VCPU_STAT("insn_emulation", insn_emulation),
+       VCPU_STAT("insn_emulation_fail", insn_emulation_fail),
+       VCPU_STAT("irq_injections", irq_injections),
+       VCPU_STAT("nmi_injections", nmi_injections),
+       VCPU_STAT("req_event", req_event),
+       VCPU_STAT("l1d_flush", l1d_flush),
+       VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+       VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+       VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
+       VM_STAT("mmu_pte_write", mmu_pte_write),
+       VM_STAT("mmu_pte_updated", mmu_pte_updated),
+       VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
+       VM_STAT("mmu_flooded", mmu_flooded),
+       VM_STAT("mmu_recycled", mmu_recycled),
+       VM_STAT("mmu_cache_miss", mmu_cache_miss),
+       VM_STAT("mmu_unsync", mmu_unsync),
+       VM_STAT("remote_tlb_flush", remote_tlb_flush),
+       VM_STAT("largepages", lpages, .mode = 0444),
+       VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444),
+       VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions),
        { NULL }
 };
 
@@ -261,7 +260,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
 {
        int i;
-       for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
+       for (i = 0; i < ASYNC_PF_PER_VCPU; i++)
                vcpu->arch.apf.gfns[i] = ~0;
 }
 
@@ -572,11 +571,12 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
 }
 EXPORT_SYMBOL_GPL(kvm_requeue_exception);
 
-static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
-                                 unsigned long payload)
+void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
+                          unsigned long payload)
 {
        kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
 }
+EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
 
 static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
                                    u32 error_code, unsigned long payload)
@@ -611,15 +611,28 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 }
 EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
 
-static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+                                   struct x86_exception *fault)
 {
-       if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
-               vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
-       else
-               vcpu->arch.mmu->inject_page_fault(vcpu, fault);
+       struct kvm_mmu *fault_mmu;
+       WARN_ON_ONCE(fault->vector != PF_VECTOR);
+
+       fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu :
+                                              vcpu->arch.walk_mmu;
+
+       /*
+        * Invalidate the TLB entry for the faulting address, if it exists,
+        * else the access will fault indefinitely (and to emulate hardware).
+        */
+       if ((fault->error_code & PFERR_PRESENT_MASK) &&
+           !(fault->error_code & PFERR_RSVD_MASK))
+               kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
+                                      fault_mmu->root_hpa);
 
+       fault_mmu->inject_page_fault(vcpu, fault);
        return fault->nested_page_fault;
 }
+EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
 {
@@ -836,11 +849,25 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
                    vcpu->arch.ia32_xss != host_xss)
                        wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
        }
+
+       if (static_cpu_has(X86_FEATURE_PKU) &&
+           (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
+            (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
+           vcpu->arch.pkru != vcpu->arch.host_pkru)
+               __write_pkru(vcpu->arch.pkru);
 }
 EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
 
 void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
 {
+       if (static_cpu_has(X86_FEATURE_PKU) &&
+           (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
+            (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
+               vcpu->arch.pkru = rdpkru();
+               if (vcpu->arch.pkru != vcpu->arch.host_pkru)
+                       __write_pkru(vcpu->arch.host_pkru);
+       }
+
        if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
 
                if (vcpu->arch.xcr0 != host_xcr0)
@@ -926,19 +953,6 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
        __reserved_bits;                                \
 })
 
-static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
-{
-       u64 reserved_bits = __cr4_reserved_bits(cpu_has, c);
-
-       if (kvm_cpu_cap_has(X86_FEATURE_LA57))
-               reserved_bits &= ~X86_CR4_LA57;
-
-       if (kvm_cpu_cap_has(X86_FEATURE_UMIP))
-               reserved_bits &= ~X86_CR4_UMIP;
-
-       return reserved_bits;
-}
-
 static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        if (cr4 & cr4_reserved_bits)
@@ -1006,7 +1020,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
        if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
                if (!skip_tlb_flush) {
                        kvm_mmu_sync_roots(vcpu);
-                       kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+                       kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
                }
                return 0;
        }
@@ -1018,7 +1032,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
                 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
                return 1;
 
-       kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
+       kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush);
        vcpu->arch.cr3 = cr3;
        kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
@@ -1058,12 +1072,6 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
        }
 }
 
-static void kvm_update_dr6(struct kvm_vcpu *vcpu)
-{
-       if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
-               kvm_x86_ops.set_dr6(vcpu, vcpu->arch.dr6);
-}
-
 static void kvm_update_dr7(struct kvm_vcpu *vcpu)
 {
        unsigned long dr7;
@@ -1103,7 +1111,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
                if (val & 0xffffffff00000000ULL)
                        return -1; /* #GP */
                vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
-               kvm_update_dr6(vcpu);
                break;
        case 5:
                /* fall through */
@@ -1139,10 +1146,7 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
        case 4:
                /* fall through */
        case 6:
-               if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
-                       *val = vcpu->arch.dr6;
-               else
-                       *val = kvm_x86_ops.get_dr6(vcpu);
+               *val = vcpu->arch.dr6;
                break;
        case 5:
                /* fall through */
@@ -1572,6 +1576,13 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
 
+bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
+{
+       return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
+               need_resched() || signal_pending(current);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_exit_request);
+
 /*
  * The fast path for frequent and performance sensitive wrmsr emulation,
  * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
@@ -1600,27 +1611,44 @@ static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data
        return 1;
 }
 
-enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
+static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
+{
+       if (!kvm_can_use_hv_timer(vcpu))
+               return 1;
+
+       kvm_set_lapic_tscdeadline_msr(vcpu, data);
+       return 0;
+}
+
+fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
 {
        u32 msr = kvm_rcx_read(vcpu);
        u64 data;
-       int ret = 0;
+       fastpath_t ret = EXIT_FASTPATH_NONE;
 
        switch (msr) {
        case APIC_BASE_MSR + (APIC_ICR >> 4):
                data = kvm_read_edx_eax(vcpu);
-               ret = handle_fastpath_set_x2apic_icr_irqoff(vcpu, data);
+               if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) {
+                       kvm_skip_emulated_instruction(vcpu);
+                       ret = EXIT_FASTPATH_EXIT_HANDLED;
+               }
+               break;
+       case MSR_IA32_TSCDEADLINE:
+               data = kvm_read_edx_eax(vcpu);
+               if (!handle_fastpath_set_tscdeadline(vcpu, data)) {
+                       kvm_skip_emulated_instruction(vcpu);
+                       ret = EXIT_FASTPATH_REENTER_GUEST;
+               }
                break;
        default:
-               return EXIT_FASTPATH_NONE;
+               break;
        }
 
-       if (!ret) {
+       if (ret != EXIT_FASTPATH_NONE)
                trace_kvm_msr_write(msr, data);
-               return EXIT_FASTPATH_SKIP_EMUL_INS;
-       }
 
-       return EXIT_FASTPATH_NONE;
+       return ret;
 }
 EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
 
@@ -1909,7 +1937,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
 
 static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
 {
-       u64 curr_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu);
+       u64 curr_offset = vcpu->arch.l1_tsc_offset;
        vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
 }
 
@@ -1951,14 +1979,13 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 {
-       u64 tsc_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu);
-
-       return tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
+       return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
 }
 EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
 
 static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
+       vcpu->arch.l1_tsc_offset = offset;
        vcpu->arch.tsc_offset = kvm_x86_ops.write_l1_tsc_offset(vcpu, offset);
 }
 
@@ -2083,7 +2110,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
                                           s64 adjustment)
 {
-       u64 tsc_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu);
+       u64 tsc_offset = vcpu->arch.l1_tsc_offset;
        kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
 }
 
@@ -2677,10 +2704,16 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
        vcpu->arch.time = 0;
 }
 
-static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
 {
        ++vcpu->stat.tlb_flush;
-       kvm_x86_ops.tlb_flush(vcpu, invalidate_gpa);
+       kvm_x86_ops.tlb_flush_all(vcpu);
+}
+
+static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
+{
+       ++vcpu->stat.tlb_flush;
+       kvm_x86_ops.tlb_flush_guest(vcpu);
 }
 
 static void record_steal_time(struct kvm_vcpu *vcpu)
@@ -2706,7 +2739,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
        trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
                st->preempted & KVM_VCPU_FLUSH_TLB);
        if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
-               kvm_vcpu_flush_tlb(vcpu, false);
+               kvm_vcpu_flush_tlb_guest(vcpu);
 
        vcpu->arch.st.preempted = 0;
 
@@ -3385,6 +3418,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_GET_MSR_FEATURES:
        case KVM_CAP_MSR_PLATFORM_INFO:
        case KVM_CAP_EXCEPTION_PAYLOAD:
+       case KVM_CAP_SET_GUEST_DEBUG:
                r = 1;
                break;
        case KVM_CAP_SYNC_REGS:
@@ -3438,14 +3472,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = KVM_X2APIC_API_VALID_FLAGS;
                break;
        case KVM_CAP_NESTED_STATE:
-               r = kvm_x86_ops.get_nested_state ?
-                       kvm_x86_ops.get_nested_state(NULL, NULL, 0) : 0;
+               r = kvm_x86_ops.nested_ops->get_state ?
+                       kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
                break;
        case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
                r = kvm_x86_ops.enable_direct_tlbflush != NULL;
                break;
        case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
-               r = kvm_x86_ops.nested_enable_evmcs != NULL;
+               r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
                break;
        default:
                break;
@@ -3570,6 +3604,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        kvm_x86_ops.vcpu_load(vcpu, cpu);
 
+       /* Save host pkru register if supported */
+       vcpu->arch.host_pkru = read_pkru();
+
        /* Apply any externally detected TSC adjustments (due to suspend) */
        if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
                adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
@@ -3763,7 +3800,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
        unsigned bank_num = mcg_cap & 0xff, bank;
 
        r = -EINVAL;
-       if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
+       if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
                goto out;
        if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
                goto out;
@@ -4021,7 +4058,6 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
        memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
        kvm_update_dr0123(vcpu);
        vcpu->arch.dr6 = dbgregs->dr6;
-       kvm_update_dr6(vcpu);
        vcpu->arch.dr7 = dbgregs->dr7;
        kvm_update_dr7(vcpu);
 
@@ -4231,9 +4267,9 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
                return kvm_hv_activate_synic(vcpu, cap->cap ==
                                             KVM_CAP_HYPERV_SYNIC2);
        case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
-               if (!kvm_x86_ops.nested_enable_evmcs)
+               if (!kvm_x86_ops.nested_ops->enable_evmcs)
                        return -ENOTTY;
-               r = kvm_x86_ops.nested_enable_evmcs(vcpu, &vmcs_version);
+               r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version);
                if (!r) {
                        user_ptr = (void __user *)(uintptr_t)cap->args[0];
                        if (copy_to_user(user_ptr, &vmcs_version,
@@ -4548,7 +4584,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                u32 user_data_size;
 
                r = -EINVAL;
-               if (!kvm_x86_ops.get_nested_state)
+               if (!kvm_x86_ops.nested_ops->get_state)
                        break;
 
                BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
@@ -4556,8 +4592,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                if (get_user(user_data_size, &user_kvm_nested_state->size))
                        break;
 
-               r = kvm_x86_ops.get_nested_state(vcpu, user_kvm_nested_state,
-                                                 user_data_size);
+               r = kvm_x86_ops.nested_ops->get_state(vcpu, user_kvm_nested_state,
+                                                    user_data_size);
                if (r < 0)
                        break;
 
@@ -4578,7 +4614,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                int idx;
 
                r = -EINVAL;
-               if (!kvm_x86_ops.set_nested_state)
+               if (!kvm_x86_ops.nested_ops->set_state)
                        break;
 
                r = -EFAULT;
@@ -4591,7 +4627,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
                if (kvm_state.flags &
                    ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
-                     | KVM_STATE_NESTED_EVMCS))
+                     | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING))
                        break;
 
                /* nested_run_pending implies guest_mode.  */
@@ -4600,7 +4636,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                        break;
 
                idx = srcu_read_lock(&vcpu->kvm->srcu);
-               r = kvm_x86_ops.set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
+               r = kvm_x86_ops.nested_ops->set_state(vcpu, user_kvm_nested_state, &kvm_state);
                srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
        }
@@ -5247,6 +5283,10 @@ static void kvm_init_msr_list(void)
                        if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
                                continue;
                        break;
+               case MSR_IA32_UMWAIT_CONTROL:
+                       if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
+                               continue;
+                       break;
                case MSR_IA32_RTIT_CTL:
                case MSR_IA32_RTIT_STATUS:
                        if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
@@ -5264,7 +5304,7 @@ static void kvm_init_msr_list(void)
                                 !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
                                continue;
                        break;
-               case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
+               case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
                        if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
                                msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
                                intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
@@ -5279,7 +5319,7 @@ static void kvm_init_msr_list(void)
                        if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
                            min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
                                continue;
-               }
+                       break;
                default:
                        break;
                }
@@ -6408,7 +6448,7 @@ static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
 {
        struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
        if (ctxt->exception.vector == PF_VECTOR)
-               return kvm_propagate_fault(vcpu, &ctxt->exception);
+               return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
 
        if (ctxt->exception.error_code_valid)
                kvm_queue_exception_e(vcpu, ctxt->exception.vector,
@@ -6671,7 +6711,7 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
 
        if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
                kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
-               kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+               kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
                kvm_run->debug.arch.exception = DB_VECTOR;
                kvm_run->exit_reason = KVM_EXIT_DEBUG;
                return 0;
@@ -6731,9 +6771,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
                                           vcpu->arch.db);
 
                if (dr6 != 0) {
-                       vcpu->arch.dr6 &= ~DR_TRAP_BITS;
-                       vcpu->arch.dr6 |= dr6 | DR6_RTM;
-                       kvm_queue_exception(vcpu, DB_VECTOR);
+                       kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
                        *r = 1;
                        return true;
                }
@@ -7679,11 +7717,14 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 static int inject_pending_event(struct kvm_vcpu *vcpu)
 {
        int r;
+       bool can_inject = true;
 
        /* try to reinject previous events if any */
 
-       if (vcpu->arch.exception.injected)
+       if (vcpu->arch.exception.injected) {
                kvm_x86_ops.queue_exception(vcpu);
+               can_inject = false;
+       }
        /*
         * Do not inject an NMI or interrupt if there is a pending
         * exception.  Exceptions and interrupts are recognized at
@@ -7699,20 +7740,26 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
         * fully complete the previous instruction.
         */
        else if (!vcpu->arch.exception.pending) {
-               if (vcpu->arch.nmi_injected)
+               if (vcpu->arch.nmi_injected) {
                        kvm_x86_ops.set_nmi(vcpu);
-               else if (vcpu->arch.interrupt.injected)
+                       can_inject = false;
+               } else if (vcpu->arch.interrupt.injected) {
                        kvm_x86_ops.set_irq(vcpu);
+                       can_inject = false;
+               }
        }
 
+       WARN_ON_ONCE(vcpu->arch.exception.injected &&
+                    vcpu->arch.exception.pending);
+
        /*
         * Call check_nested_events() even if we reinjected a previous event
         * in order for caller to determine if it should require immediate-exit
         * from L2 to L1 due to pending L1 events which require exit
         * from L2 to L1.
         */
-       if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
-               r = kvm_x86_ops.check_nested_events(vcpu);
+       if (is_guest_mode(vcpu)) {
+               r = kvm_x86_ops.nested_ops->check_events(vcpu);
                if (r != 0)
                        return r;
        }
@@ -7723,7 +7770,6 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
                                        vcpu->arch.exception.has_error_code,
                                        vcpu->arch.exception.error_code);
 
-               WARN_ON_ONCE(vcpu->arch.exception.injected);
                vcpu->arch.exception.pending = false;
                vcpu->arch.exception.injected = true;
 
@@ -7750,39 +7796,27 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
                }
 
                kvm_x86_ops.queue_exception(vcpu);
+               can_inject = false;
        }
 
-       /* Don't consider new event if we re-injected an event */
-       if (kvm_event_needs_reinjection(vcpu))
+       /* Finish re-injection before considering new events */
+       if (!can_inject)
                return 0;
 
-       if (vcpu->arch.smi_pending && !is_smm(vcpu) &&
-           kvm_x86_ops.smi_allowed(vcpu)) {
+       if (vcpu->arch.smi_pending &&
+           kvm_x86_ops.smi_allowed(vcpu, true)) {
                vcpu->arch.smi_pending = false;
                ++vcpu->arch.smi_count;
                enter_smm(vcpu);
-       } else if (vcpu->arch.nmi_pending && kvm_x86_ops.nmi_allowed(vcpu)) {
+       } else if (vcpu->arch.nmi_pending &&
+                  kvm_x86_ops.nmi_allowed(vcpu, true)) {
                --vcpu->arch.nmi_pending;
                vcpu->arch.nmi_injected = true;
                kvm_x86_ops.set_nmi(vcpu);
-       } else if (kvm_cpu_has_injectable_intr(vcpu)) {
-               /*
-                * Because interrupts can be injected asynchronously, we are
-                * calling check_nested_events again here to avoid a race condition.
-                * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
-                * proposal and current concerns.  Perhaps we should be setting
-                * KVM_REQ_EVENT only on certain events and not unconditionally?
-                */
-               if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
-                       r = kvm_x86_ops.check_nested_events(vcpu);
-                       if (r != 0)
-                               return r;
-               }
-               if (kvm_x86_ops.interrupt_allowed(vcpu)) {
-                       kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
-                                           false);
-                       kvm_x86_ops.set_irq(vcpu);
-               }
+       } else if (kvm_cpu_has_injectable_intr(vcpu) &&
+                  kvm_x86_ops.interrupt_allowed(vcpu, true)) {
+               kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
+               kvm_x86_ops.set_irq(vcpu);
        }
 
        return 0;
@@ -8054,7 +8088,7 @@ void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
        zalloc_cpumask_var(&cpus, GFP_ATOMIC);
 
        kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
-                                   vcpu_bitmap, cpus);
+                                   NULL, vcpu_bitmap, cpus);
 
        free_cpumask_var(cpus);
 }
@@ -8084,6 +8118,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
  */
 void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
 {
+       struct kvm_vcpu *except;
        unsigned long old, new, expected;
 
        if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
@@ -8108,7 +8143,17 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
        trace_kvm_apicv_update_request(activate, bit);
        if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
                kvm_x86_ops.pre_update_apicv_exec_ctrl(kvm, activate);
-       kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
+
+       /*
+        * Sending request to update APICV for all other vcpus,
+        * while update the calling vcpu immediately instead of
+        * waiting for another #VMEXIT to handle the request.
+        */
+       except = kvm_get_running_vcpu();
+       kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE,
+                                        except);
+       if (except)
+               kvm_vcpu_update_apicv(except);
 }
 EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
 
@@ -8165,24 +8210,13 @@ int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
 
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 {
-       struct page *page = NULL;
-
        if (!lapic_in_kernel(vcpu))
                return;
 
        if (!kvm_x86_ops.set_apic_access_page_addr)
                return;
 
-       page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
-       if (is_error_page(page))
-               return;
-       kvm_x86_ops.set_apic_access_page_addr(vcpu, page_to_phys(page));
-
-       /*
-        * Do not pin apic access page in memory, the MMU notifier
-        * will call us again if it is migrated or swapped out.
-        */
-       put_page(page);
+       kvm_x86_ops.set_apic_access_page_addr(vcpu);
 }
 
 void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
@@ -8202,13 +8236,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        bool req_int_win =
                dm_request_for_irq_injection(vcpu) &&
                kvm_cpu_accept_dm_intr(vcpu);
-       enum exit_fastpath_completion exit_fastpath = EXIT_FASTPATH_NONE;
+       fastpath_t exit_fastpath;
 
        bool req_immediate_exit = false;
 
        if (kvm_request_pending(vcpu)) {
                if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
-                       if (unlikely(!kvm_x86_ops.get_vmcs12_pages(vcpu))) {
+                       if (unlikely(!kvm_x86_ops.nested_ops->get_vmcs12_pages(vcpu))) {
                                r = 0;
                                goto out;
                        }
@@ -8230,8 +8264,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                        kvm_mmu_sync_roots(vcpu);
                if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
                        kvm_mmu_load_pgd(vcpu);
-               if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
-                       kvm_vcpu_flush_tlb(vcpu, true);
+               if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+                       kvm_vcpu_flush_tlb_all(vcpu);
+
+                       /* Flushing all ASIDs flushes the current ASID... */
+                       kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+               }
+               if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+                       kvm_vcpu_flush_tlb_current(vcpu);
+               if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+                       kvm_vcpu_flush_tlb_guest(vcpu);
+
                if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
                        vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
                        r = 0;
@@ -8338,6 +8381,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                                kvm_x86_ops.enable_nmi_window(vcpu);
                        if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
                                kvm_x86_ops.enable_irq_window(vcpu);
+                       if (is_guest_mode(vcpu) &&
+                           kvm_x86_ops.nested_ops->hv_timer_pending &&
+                           kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
+                               req_immediate_exit = true;
                        WARN_ON(vcpu->arch.exception.pending);
                }
 
@@ -8387,8 +8434,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
                kvm_x86_ops.sync_pir_to_irr(vcpu);
 
-       if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
-           || need_resched() || signal_pending(current)) {
+       if (kvm_vcpu_exit_request(vcpu)) {
                vcpu->mode = OUTSIDE_GUEST_MODE;
                smp_wmb();
                local_irq_enable();
@@ -8420,7 +8466,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
        }
 
-       kvm_x86_ops.run(vcpu);
+       exit_fastpath = kvm_x86_ops.run(vcpu);
 
        /*
         * Do this here before restoring debug registers on the host.  And
@@ -8432,7 +8478,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
                kvm_x86_ops.sync_dirty_debug_regs(vcpu);
                kvm_update_dr0123(vcpu);
-               kvm_update_dr6(vcpu);
                kvm_update_dr7(vcpu);
                vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
        }
@@ -8452,7 +8497,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        vcpu->mode = OUTSIDE_GUEST_MODE;
        smp_wmb();
 
-       kvm_x86_ops.handle_exit_irqoff(vcpu, &exit_fastpath);
+       kvm_x86_ops.handle_exit_irqoff(vcpu);
 
        /*
         * Consume any pending interrupts, including the possible source of
@@ -8499,6 +8544,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        return r;
 
 cancel_injection:
+       if (req_immediate_exit)
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
        kvm_x86_ops.cancel_injection(vcpu);
        if (unlikely(vcpu->arch.apic_attention))
                kvm_lapic_sync_from_vapic(vcpu);
@@ -8541,8 +8588,8 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
 
 static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
 {
-       if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events)
-               kvm_x86_ops.check_nested_events(vcpu);
+       if (is_guest_mode(vcpu))
+               kvm_x86_ops.nested_ops->check_events(vcpu);
 
        return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
                !vcpu->arch.apf.halted);
@@ -8724,8 +8771,9 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
        trace_kvm_fpu(0);
 }
 
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
+       struct kvm_run *kvm_run = vcpu->run;
        int r;
 
        vcpu_load(vcpu);
@@ -8743,18 +8791,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                r = -EAGAIN;
                if (signal_pending(current)) {
                        r = -EINTR;
-                       vcpu->run->exit_reason = KVM_EXIT_INTR;
+                       kvm_run->exit_reason = KVM_EXIT_INTR;
                        ++vcpu->stat.signal_exits;
                }
                goto out;
        }
 
-       if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
+       if (kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
                r = -EINVAL;
                goto out;
        }
 
-       if (vcpu->run->kvm_dirty_regs) {
+       if (kvm_run->kvm_dirty_regs) {
                r = sync_regs(vcpu);
                if (r != 0)
                        goto out;
@@ -8784,7 +8832,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 out:
        kvm_put_guest_fpu(vcpu);
-       if (vcpu->run->kvm_valid_regs)
+       if (kvm_run->kvm_valid_regs)
                store_regs(vcpu);
        post_kvm_run_save(vcpu);
        kvm_sigset_deactivate(vcpu);
@@ -9376,9 +9424,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        }
        fx_init(vcpu);
 
-       vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
-
        vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+       vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu);
 
        vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
 
@@ -9493,7 +9540,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
        kvm_update_dr0123(vcpu);
        vcpu->arch.dr6 = DR6_INIT;
-       kvm_update_dr6(vcpu);
        vcpu->arch.dr7 = DR7_FIXED_1;
        kvm_update_dr7(vcpu);
 
@@ -9675,7 +9721,9 @@ int kvm_arch_hardware_setup(void *opaque)
        if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
                supported_xss = 0;
 
-       cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
+#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
+       cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
+#undef __kvm_cpu_cap_has
 
        if (kvm_has_tsc_control) {
                /*
@@ -9707,7 +9755,8 @@ int kvm_arch_check_processor_compat(void *opaque)
 
        WARN_ON(!irqs_disabled());
 
-       if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits)
+       if (__cr4_reserved_bits(cpu_has, c) !=
+           __cr4_reserved_bits(cpu_has, &boot_cpu_data))
                return -EIO;
 
        return ops->check_processor_compatibility();
@@ -10035,7 +10084,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 {
        /* Still write protect RO slot */
        if (new->flags & KVM_MEM_READONLY) {
-               kvm_mmu_slot_remove_write_access(kvm, new, PT_PAGE_TABLE_LEVEL);
+               kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
                return;
        }
 
@@ -10075,7 +10124,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
                } else {
                        int level =
                                kvm_dirty_log_manual_protect_and_init_set(kvm) ?
-                               PT_DIRECTORY_LEVEL : PT_PAGE_TABLE_LEVEL;
+                               PG_LEVEL_2M : PG_LEVEL_4K;
 
                        /*
                         * If we're with initial-all-set, we don't need
@@ -10177,11 +10226,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
 
        if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
            (vcpu->arch.nmi_pending &&
-            kvm_x86_ops.nmi_allowed(vcpu)))
+            kvm_x86_ops.nmi_allowed(vcpu, false)))
                return true;
 
        if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
-           (vcpu->arch.smi_pending && !is_smm(vcpu)))
+           (vcpu->arch.smi_pending &&
+            kvm_x86_ops.smi_allowed(vcpu, false)))
                return true;
 
        if (kvm_arch_interrupt_allowed(vcpu) &&
@@ -10192,6 +10242,11 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
        if (kvm_hv_has_stimer_pending(vcpu))
                return true;
 
+       if (is_guest_mode(vcpu) &&
+           kvm_x86_ops.nested_ops->hv_timer_pending &&
+           kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
+               return true;
+
        return false;
 }
 
@@ -10228,7 +10283,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
-       return kvm_x86_ops.interrupt_allowed(vcpu);
+       return kvm_x86_ops.interrupt_allowed(vcpu, false);
 }
 
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
@@ -10293,12 +10348,14 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 
 static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
 {
+       BUILD_BUG_ON(!is_power_of_2(ASYNC_PF_PER_VCPU));
+
        return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
 }
 
 static inline u32 kvm_async_pf_next_probe(u32 key)
 {
-       return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
+       return (key + 1) & (ASYNC_PF_PER_VCPU - 1);
 }
 
 static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -10316,7 +10373,7 @@ static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
        int i;
        u32 key = kvm_async_pf_hash_fn(gfn);
 
-       for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
+       for (i = 0; i < ASYNC_PF_PER_VCPU &&
                     (vcpu->arch.apf.gfns[key] != gfn &&
                      vcpu->arch.apf.gfns[key] != ~0); i++)
                key = kvm_async_pf_next_probe(key);
@@ -10334,6 +10391,10 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
        u32 i, j, k;
 
        i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
+
+       if (WARN_ON_ONCE(vcpu->arch.apf.gfns[i] != gfn))
+               return;
+
        while (true) {
                vcpu->arch.apf.gfns[i] = ~0;
                do {
@@ -10393,7 +10454,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
         * If interrupts are off we cannot even use an artificial
         * halt state.
         */
-       return kvm_x86_ops.interrupt_allowed(vcpu);
+       return kvm_arch_interrupt_allowed(vcpu);
 }
 
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,