X-Git-Url: http://git.monstr.eu/?a=blobdiff_plain;f=arch%2Fx86%2Fkvm%2Fx86.c;h=397f599b20e5a8aeb4ff1423a346d1c4d2c1e2c4;hb=f59cddd8517ab880fb09bf1465b07b337e058b22;hp=c4015a43cc8a7d2df5fbd7d07a82f676f02bbd59;hpb=1b21c8db0e3b71523ada0cf568372ebfcf0d3466;p=linux-2.6-microblaze.git diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c4015a43cc8a..397f599b20e5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -194,7 +194,7 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs; u64 __read_mostly host_efer; EXPORT_SYMBOL_GPL(host_efer); -bool __read_mostly allow_smaller_maxphyaddr; +bool __read_mostly allow_smaller_maxphyaddr = 0; EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); static u64 __read_mostly host_xss; @@ -982,6 +982,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_SMEP; + unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE; if (kvm_valid_cr4(vcpu, cr4)) return 1; @@ -1009,7 +1010,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (kvm_x86_ops.set_cr4(vcpu, cr4)) return 1; - if (((cr4 ^ old_cr4) & pdptr_bits) || + if (((cr4 ^ old_cr4) & mmu_role_bits) || (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_mmu_reset_context(vcpu); @@ -1457,6 +1458,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u64 old_efer = vcpu->arch.efer; u64 efer = msr_info->data; + int r; if (efer & efer_reserved_bits) return 1; @@ -1473,7 +1475,11 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) efer &= ~EFER_LMA; efer |= vcpu->arch.efer & EFER_LMA; - kvm_x86_ops.set_efer(vcpu, efer); + r = kvm_x86_ops.set_efer(vcpu, efer); + if (r) { + WARN_ON(r > 0); + return r; + } /* Update reserved bits */ if ((efer ^ old_efer) & EFER_NX) @@ -1497,8 +1503,8 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) bool r = kvm->arch.msr_filter.default_allow; int idx; - /* MSR filtering not set up, allow everything */ - if (!count) + /* MSR filtering not set up or x2APIC enabled, allow everything */ + if (!count || (index >= 0x800 && index <= 0x8ff)) return true; /* Prevent collision with set_msr_filter */ @@ -1737,13 +1743,16 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) r = kvm_set_msr(vcpu, ecx, data); /* MSR write failed? See if we should ask user space */ - if (r && kvm_set_msr_user_space(vcpu, ecx, data, r)) { + if (r && kvm_set_msr_user_space(vcpu, ecx, data, r)) /* Bounce to user space */ return 0; - } + + /* Signal all other negative errors to userspace */ + if (r < 0) + return r; /* MSR write failed? Inject a #GP */ - if (r) { + if (r > 0) { trace_kvm_msr_write_ex(ecx, data); kvm_inject_gp(vcpu, 0); return 1; @@ -1916,6 +1925,8 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) struct pvclock_wall_clock wc; u64 wall_nsec; + kvm->arch.wall_clock = wall_clock; + if (!wall_clock) return; @@ -1948,6 +1959,34 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); } +static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time, + bool old_msr, bool host_initiated) +{ + struct kvm_arch *ka = &vcpu->kvm->arch; + + if (vcpu->vcpu_id == 0 && !host_initiated) { + if (ka->boot_vcpu_runs_old_kvmclock && old_msr) + kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); + + ka->boot_vcpu_runs_old_kvmclock = old_msr; + } + + vcpu->arch.time = system_time; + kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); + + /* we verify if the enable bit is set... */ + vcpu->arch.pv_time_enabled = false; + if (!(system_time & 1)) + return; + + if (!kvm_gfn_to_hva_cache_init(vcpu->kvm, + &vcpu->arch.pv_time, system_time & ~1ULL, + sizeof(struct pvclock_vcpu_time_info))) + vcpu->arch.pv_time_enabled = true; + + return; +} + static uint32_t div_frac(uint32_t dividend, uint32_t divisor) { do_shl32_div32(dividend, divisor); @@ -2812,24 +2851,19 @@ static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) u32 page_num = data & ~PAGE_MASK; u64 page_addr = data & PAGE_MASK; u8 *page; - int r; - r = -E2BIG; if (page_num >= blob_size) - goto out; - r = -ENOMEM; + return 1; + page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE); - if (IS_ERR(page)) { - r = PTR_ERR(page); - goto out; + if (IS_ERR(page)) + return PTR_ERR(page); + + if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) { + kfree(page); + return 1; } - if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) - goto out_free; - r = 0; -out_free: - kfree(page); -out: - return r; + return 0; } static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu) @@ -2847,6 +2881,14 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) if (data & 0x30) return 1; + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) && + (data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT)) + return 1; + + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) && + (data & KVM_ASYNC_PF_DELIVERY_AS_INT)) + return 1; + if (!lapic_in_kernel(vcpu)) return data ? 1 : 0; @@ -2924,10 +2966,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu) * Doing a TLB flush here, on the guest's behalf, can avoid * expensive IPIs. */ - trace_kvm_pv_tlb_flush(vcpu->vcpu_id, - st->preempted & KVM_VCPU_FLUSH_TLB); - if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) - kvm_vcpu_flush_tlb_guest(vcpu); + if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) { + trace_kvm_pv_tlb_flush(vcpu->vcpu_id, + st->preempted & KVM_VCPU_FLUSH_TLB); + if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) + kvm_vcpu_flush_tlb_guest(vcpu); + } vcpu->arch.st.preempted = 0; @@ -3088,53 +3132,54 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.smi_count = data; break; case MSR_KVM_WALL_CLOCK_NEW: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) + return 1; + + kvm_write_wall_clock(vcpu->kvm, data); + break; case MSR_KVM_WALL_CLOCK: - vcpu->kvm->arch.wall_clock = data; + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) + return 1; + kvm_write_wall_clock(vcpu->kvm, data); break; case MSR_KVM_SYSTEM_TIME_NEW: - case MSR_KVM_SYSTEM_TIME: { - struct kvm_arch *ka = &vcpu->kvm->arch; - - if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) { - bool tmp = (msr == MSR_KVM_SYSTEM_TIME); - - if (ka->boot_vcpu_runs_old_kvmclock != tmp) - kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); - - ka->boot_vcpu_runs_old_kvmclock = tmp; - } - - vcpu->arch.time = data; - kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); - - /* we verify if the enable bit is set... */ - vcpu->arch.pv_time_enabled = false; - if (!(data & 1)) - break; + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) + return 1; - if (!kvm_gfn_to_hva_cache_init(vcpu->kvm, - &vcpu->arch.pv_time, data & ~1ULL, - sizeof(struct pvclock_vcpu_time_info))) - vcpu->arch.pv_time_enabled = true; + kvm_write_system_time(vcpu, data, false, msr_info->host_initiated); + break; + case MSR_KVM_SYSTEM_TIME: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) + return 1; + kvm_write_system_time(vcpu, data, true, msr_info->host_initiated); break; - } case MSR_KVM_ASYNC_PF_EN: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + return 1; + if (kvm_pv_enable_async_pf(vcpu, data)) return 1; break; case MSR_KVM_ASYNC_PF_INT: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) + return 1; + if (kvm_pv_enable_async_pf_int(vcpu, data)) return 1; break; case MSR_KVM_ASYNC_PF_ACK: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + return 1; if (data & 0x1) { vcpu->arch.apf.pageready_pending = false; kvm_check_async_pf_completion(vcpu); } break; case MSR_KVM_STEAL_TIME: + if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME)) + return 1; if (unlikely(!sched_info_on())) return 1; @@ -3151,11 +3196,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_KVM_PV_EOI_EN: + if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI)) + return 1; + if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8))) return 1; break; case MSR_KVM_POLL_CONTROL: + if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL)) + return 1; + /* only enable bit supported */ if (data & (-1ULL << 1)) return 1; @@ -3350,9 +3401,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) * even when not intercepted. AMD manual doesn't explicitly * state this but appears to behave the same. * - * Unconditionally return L1's TSC offset on userspace reads - * so that userspace reads and writes always operate on L1's - * offset, e.g. to ensure deterministic behavior for migration. + * On userspace reads and writes, however, we unconditionally + * return L1's TSC value to ensure backwards-compatible + * behavior for migration. */ u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset : vcpu->arch.tsc_offset; @@ -3651,6 +3702,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_LAST_CPU: case KVM_CAP_X86_USER_SPACE_MSR: case KVM_CAP_X86_MSR_FILTER: + case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: r = 1; break; case KVM_CAP_SYNC_REGS: @@ -4521,6 +4573,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return kvm_x86_ops.enable_direct_tlbflush(vcpu); + case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: + vcpu->arch.pv_cpuid.enforce = cap->args[0]; + + return 0; + default: return -EINVAL; } @@ -5252,14 +5309,21 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) struct kvm_msr_filter filter; bool default_allow; int r = 0; + bool empty = true; u32 i; if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) return -EFAULT; - kvm_clear_msr_filter(kvm); + for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) + empty &= !filter.ranges[i].nmsrs; default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY); + if (empty && !default_allow) + return -EINVAL; + + kvm_clear_msr_filter(kvm); + kvm->arch.msr_filter.default_allow = default_allow; /* @@ -7986,11 +8050,16 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) goto out; } + ret = -KVM_ENOSYS; + switch (nr) { case KVM_HC_VAPIC_POLL_IRQ: ret = 0; break; case KVM_HC_KICK_CPU: + if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT)) + break; + kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); kvm_sched_yield(vcpu->kvm, a1); ret = 0; @@ -8001,9 +8070,15 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) break; #endif case KVM_HC_SEND_IPI: + if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI)) + break; + ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit); break; case KVM_HC_SCHED_YIELD: + if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD)) + break; + kvm_sched_yield(vcpu->kvm, a0); ret = 0; break; @@ -9877,6 +9952,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_destroy(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, idx); free_page((unsigned long)vcpu->arch.pio_data); + kvfree(vcpu->arch.cpuid_entries); if (!lapic_in_kernel(vcpu)) static_key_slow_dec(&kvm_no_apic_vcpu); }