u32 kvm_max_guest_tsc_khz;
EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
+/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
+static u32 tsc_tolerance_ppm = 250;
+module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
+
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
unsigned long max_tsc_khz;
-static inline int kvm_tsc_changes_freq(void)
+static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
{
- int cpu = get_cpu();
- int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
- cpufreq_quick_get(cpu) != 0;
- put_cpu();
- return ret;
+ return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
+ vcpu->arch.virtual_tsc_shift);
}
-u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
+static u32 adjust_tsc_khz(u32 khz, s32 ppm)
{
- if (vcpu->arch.virtual_tsc_khz)
- return vcpu->arch.virtual_tsc_khz;
- else
- return __this_cpu_read(cpu_tsc_khz);
+ u64 v = (u64)khz * (1000000 + ppm);
+ do_div(v, 1000000);
+ return v;
}
-static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
+static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
{
- u64 ret;
+ u32 thresh_lo, thresh_hi;
+ int use_scaling = 0;
- WARN_ON(preemptible());
- if (kvm_tsc_changes_freq())
- printk_once(KERN_WARNING
- "kvm: unreliable cycle conversion on adjustable rate TSC\n");
- ret = nsec * vcpu_tsc_khz(vcpu);
- do_div(ret, USEC_PER_SEC);
- return ret;
-}
-
-static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
-{
/* Compute a scale to convert nanoseconds in TSC cycles */
kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
- &vcpu->arch.tsc_catchup_shift,
- &vcpu->arch.tsc_catchup_mult);
+ &vcpu->arch.virtual_tsc_shift,
+ &vcpu->arch.virtual_tsc_mult);
+ vcpu->arch.virtual_tsc_khz = this_tsc_khz;
+
+ /*
+ * Compute the variation in TSC rate which is acceptable
+ * within the range of tolerance and decide if the
+ * rate being applied is within that bounds of the hardware
+ * rate. If so, no scaling or compensation need be done.
+ */
+ thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
+ thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
+ if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
+ pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
+ use_scaling = 1;
+ }
+ kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
}
static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
{
u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
- vcpu->arch.tsc_catchup_mult,
- vcpu->arch.tsc_catchup_shift);
+ vcpu->arch.virtual_tsc_mult,
+ vcpu->arch.virtual_tsc_shift);
tsc += vcpu->arch.last_tsc_write;
return tsc;
}
struct kvm *kvm = vcpu->kvm;
u64 offset, ns, elapsed;
unsigned long flags;
- s64 sdiff;
+ s64 nsdiff;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
ns = get_kernel_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
- sdiff = data - kvm->arch.last_tsc_write;
- if (sdiff < 0)
- sdiff = -sdiff;
+
+ /* n.b - signed multiplication and division required */
+ nsdiff = data - kvm->arch.last_tsc_write;
+#ifdef CONFIG_X86_64
+ nsdiff = (nsdiff * 1000) / vcpu->arch.virtual_tsc_khz;
+#else
+ /* do_div() only does unsigned */
+ asm("idivl %2; xor %%edx, %%edx"
+ : "=A"(nsdiff)
+ : "A"(nsdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
+#endif
+ nsdiff -= elapsed;
+ if (nsdiff < 0)
+ nsdiff = -nsdiff;
/*
- * Special case: close write to TSC within 5 seconds of
- * another CPU is interpreted as an attempt to synchronize
- * The 5 seconds is to accommodate host load / swapping as
- * well as any reset of TSC during the boot process.
- *
- * In that case, for a reliable TSC, we can match TSC offsets,
- * or make a best guest using elapsed value.
- */
- if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) &&
- elapsed < 5ULL * NSEC_PER_SEC) {
+ * Special case: TSC write with a small delta (1 second) of virtual
+ * cycle time against real time is interpreted as an attempt to
+ * synchronize the CPU.
+ *
+ * For a reliable TSC, we can match TSC offsets, and for an unstable
+ * TSC, we add elapsed time in this computation. We could let the
+ * compensation code attempt to catch up if we fall behind, but
+ * it's better to try to match offsets from the beginning.
+ */
+ if (nsdiff < NSEC_PER_SEC &&
+ vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
if (!check_tsc_unstable()) {
offset = kvm->arch.last_tsc_offset;
pr_debug("kvm: matched tsc offset for %llu\n", data);
} else {
u64 delta = nsec_to_cycles(vcpu, elapsed);
- offset += delta;
+ data += delta;
+ offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
}
- ns = kvm->arch.last_tsc_nsec;
}
kvm->arch.last_tsc_nsec = ns;
kvm->arch.last_tsc_write = data;
kvm->arch.last_tsc_offset = offset;
+ kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
kvm_x86_ops->write_tsc_offset(vcpu, offset);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
vcpu->arch.hv_clock.tsc_timestamp = 0;
vcpu->arch.last_tsc_write = data;
vcpu->arch.last_tsc_nsec = ns;
+ vcpu->arch.last_guest_tsc = data;
}
EXPORT_SYMBOL_GPL(kvm_write_tsc);
local_irq_save(flags);
tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
kernel_ns = get_kernel_ns();
- this_tsc_khz = vcpu_tsc_khz(v);
+ this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
if (unlikely(this_tsc_khz == 0)) {
local_irq_restore(flags);
kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
if (vcpu->tsc_catchup) {
u64 tsc = compute_guest_tsc(v, kernel_ns);
if (tsc > tsc_timestamp) {
- kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp);
+ adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
tsc_timestamp = tsc;
}
}
* observed by the guest and ensure the new system time is greater.
*/
max_kernel_ns = 0;
- if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) {
+ if (vcpu->hv_clock.tsc_timestamp) {
max_kernel_ns = vcpu->last_guest_tsc -
vcpu->hv_clock.tsc_timestamp;
max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
*/
pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
break;
+ case MSR_AMD64_OSVW_ID_LENGTH:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ vcpu->arch.osvw.length = data;
+ break;
+ case MSR_AMD64_OSVW_STATUS:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ vcpu->arch.osvw.status = data;
+ break;
default:
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
*/
data = 0xbe702111;
break;
+ case MSR_AMD64_OSVW_ID_LENGTH:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ data = vcpu->arch.osvw.length;
+ break;
+ case MSR_AMD64_OSVW_STATUS:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ data = vcpu->arch.osvw.status;
+ break;
default:
if (kvm_pmu_msr(vcpu, msr))
return kvm_pmu_get_msr(vcpu, msr, pdata);
kvm_x86_ops->vcpu_load(vcpu, cpu);
if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
- /* Make sure TSC doesn't go backwards */
- s64 tsc_delta;
- u64 tsc;
-
- tsc = kvm_x86_ops->read_l1_tsc(vcpu);
- tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
- tsc - vcpu->arch.last_guest_tsc;
-
+ s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
+ native_read_tsc() - vcpu->arch.last_host_tsc;
if (tsc_delta < 0)
mark_tsc_unstable("KVM discovered backwards TSC");
if (check_tsc_unstable()) {
- kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
+ u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
+ vcpu->arch.last_guest_tsc);
+ kvm_x86_ops->write_tsc_offset(vcpu, offset);
vcpu->arch.tsc_catchup = 1;
}
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
{
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
- vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
+ vcpu->arch.last_host_tsc = native_read_tsc();
}
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
u32 user_tsc_khz;
r = -EINVAL;
- if (!kvm_has_tsc_control)
- break;
-
user_tsc_khz = (u32)arg;
if (user_tsc_khz >= kvm_max_guest_tsc_khz)
goto out;
- kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
+ if (user_tsc_khz == 0)
+ user_tsc_khz = tsc_khz;
+
+ kvm_set_tsc_khz(vcpu, user_tsc_khz);
r = 0;
goto out;
}
case KVM_GET_TSC_KHZ: {
- r = -EIO;
- if (check_tsc_unstable())
- goto out;
-
- r = vcpu_tsc_khz(vcpu);
-
+ r = vcpu->arch.virtual_tsc_khz;
goto out;
}
default:
return r;
}
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
{
int ret;
profile_hit(KVM_PROFILING, (void *)rip);
}
+ if (unlikely(vcpu->arch.tsc_always_catchup))
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
kvm_lapic_sync_from_vapic(vcpu);
}
vcpu->arch.pio_data = page_address(page);
- kvm_init_tsc_catchup(vcpu, max_tsc_khz);
+ kvm_set_tsc_khz(vcpu, max_tsc_khz);
r = kvm_mmu_create(vcpu);
if (r < 0)
free_page((unsigned long)vcpu->arch.pio_data);
}
-int kvm_arch_init_vm(struct kvm *kvm)
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
+ if (type)
+ return -EINVAL;
+
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);