KVM: Allow adjust_tsc_offset to be in host or guest cycles

[linux-2.6-microblaze.git] / arch / x86 / kvm / x86.c
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 9cbfc06..3b93130 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -96,6 +96,10 @@ EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
  u32  kvm_max_guest_tsc_khz;
  EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
  
+/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
+static u32 tsc_tolerance_ppm = 250;
+module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
+
  #define KVM_NR_SHARED_MSRS 16
  
  struct kvm_shared_msrs_global {
@@ -968,49 +972,50 @@ static inline u64 get_kernel_ns(void)
  static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
  unsigned long max_tsc_khz;
  
-static inline int kvm_tsc_changes_freq(void)
+static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
  {
-       int cpu = get_cpu();
-       int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
-                 cpufreq_quick_get(cpu) != 0;
-       put_cpu();
-       return ret;
+       return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
+                                  vcpu->arch.virtual_tsc_shift);
  }
  
-u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
+static u32 adjust_tsc_khz(u32 khz, s32 ppm)
  {
-       if (vcpu->arch.virtual_tsc_khz)
-               return vcpu->arch.virtual_tsc_khz;
-       else
-               return __this_cpu_read(cpu_tsc_khz);
+       u64 v = (u64)khz * (1000000 + ppm);
+       do_div(v, 1000000);
+       return v;
  }
  
-static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
+static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
  {
-       u64 ret;
+       u32 thresh_lo, thresh_hi;
+       int use_scaling = 0;
  
-       WARN_ON(preemptible());
-       if (kvm_tsc_changes_freq())
-               printk_once(KERN_WARNING
-                "kvm: unreliable cycle conversion on adjustable rate TSC\n");
-       ret = nsec * vcpu_tsc_khz(vcpu);
-       do_div(ret, USEC_PER_SEC);
-       return ret;
-}
-
-static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
-{
         /* Compute a scale to convert nanoseconds in TSC cycles */
         kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
-                          &vcpu->arch.tsc_catchup_shift,
-                          &vcpu->arch.tsc_catchup_mult);
+                          &vcpu->arch.virtual_tsc_shift,
+                          &vcpu->arch.virtual_tsc_mult);
+       vcpu->arch.virtual_tsc_khz = this_tsc_khz;
+
+       /*
+        * Compute the variation in TSC rate which is acceptable
+        * within the range of tolerance and decide if the
+        * rate being applied is within that bounds of the hardware
+        * rate.  If so, no scaling or compensation need be done.
+        */
+       thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
+       thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
+       if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
+               pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
+               use_scaling = 1;
+       }
+       kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
  }
  
  static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
  {
         u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
-                                     vcpu->arch.tsc_catchup_mult,
-                                     vcpu->arch.tsc_catchup_shift);
+                                     vcpu->arch.virtual_tsc_mult,
+                                     vcpu->arch.virtual_tsc_shift);
         tsc += vcpu->arch.last_tsc_write;
         return tsc;
  }
@@ -1020,40 +1025,53 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
         struct kvm *kvm = vcpu->kvm;
         u64 offset, ns, elapsed;
         unsigned long flags;
-       s64 sdiff;
+       s64 nsdiff;
  
         raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
         offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
         ns = get_kernel_ns();
         elapsed = ns - kvm->arch.last_tsc_nsec;
-       sdiff = data - kvm->arch.last_tsc_write;
-       if (sdiff < 0)
-               sdiff = -sdiff;
+
+       /* n.b - signed multiplication and division required */
+       nsdiff = data - kvm->arch.last_tsc_write;
+#ifdef CONFIG_X86_64
+       nsdiff = (nsdiff * 1000) / vcpu->arch.virtual_tsc_khz;
+#else
+       /* do_div() only does unsigned */
+       asm("idivl %2; xor %%edx, %%edx"
+           : "=A"(nsdiff)
+           : "A"(nsdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
+#endif
+       nsdiff -= elapsed;
+       if (nsdiff < 0)
+               nsdiff = -nsdiff;
  
         /*
-        * Special case: close write to TSC within 5 seconds of
-        * another CPU is interpreted as an attempt to synchronize
-        * The 5 seconds is to accommodate host load / swapping as
-        * well as any reset of TSC during the boot process.
-        *
-        * In that case, for a reliable TSC, we can match TSC offsets,
-        * or make a best guest using elapsed value.
-        */
-       if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) &&
-           elapsed < 5ULL * NSEC_PER_SEC) {
+        * Special case: TSC write with a small delta (1 second) of virtual
+        * cycle time against real time is interpreted as an attempt to
+        * synchronize the CPU.
+         *
+        * For a reliable TSC, we can match TSC offsets, and for an unstable
+        * TSC, we add elapsed time in this computation.  We could let the
+        * compensation code attempt to catch up if we fall behind, but
+        * it's better to try to match offsets from the beginning.
+         */
+       if (nsdiff < NSEC_PER_SEC &&
+           vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
                 if (!check_tsc_unstable()) {
                         offset = kvm->arch.last_tsc_offset;
                         pr_debug("kvm: matched tsc offset for %llu\n", data);
                 } else {
                         u64 delta = nsec_to_cycles(vcpu, elapsed);
-                       offset += delta;
+                       data += delta;
+                       offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
                         pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
                 }
-               ns = kvm->arch.last_tsc_nsec;
         }
         kvm->arch.last_tsc_nsec = ns;
         kvm->arch.last_tsc_write = data;
         kvm->arch.last_tsc_offset = offset;
+       kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
         kvm_x86_ops->write_tsc_offset(vcpu, offset);
         raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
  
@@ -1061,6 +1079,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
         vcpu->arch.hv_clock.tsc_timestamp = 0;
         vcpu->arch.last_tsc_write = data;
         vcpu->arch.last_tsc_nsec = ns;
+       vcpu->arch.last_guest_tsc = data;
  }
  EXPORT_SYMBOL_GPL(kvm_write_tsc);
  
@@ -1077,7 +1096,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
         local_irq_save(flags);
         tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
         kernel_ns = get_kernel_ns();
-       this_tsc_khz = vcpu_tsc_khz(v);
+       this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
         if (unlikely(this_tsc_khz == 0)) {
                 local_irq_restore(flags);
                 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
@@ -1097,7 +1116,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
         if (vcpu->tsc_catchup) {
                 u64 tsc = compute_guest_tsc(v, kernel_ns);
                 if (tsc > tsc_timestamp) {
-                       kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp);
+                       adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
                         tsc_timestamp = tsc;
                 }
         }
@@ -1129,7 +1148,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
          * observed by the guest and ensure the new system time is greater.
          */
         max_kernel_ns = 0;
-       if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) {
+       if (vcpu->hv_clock.tsc_timestamp) {
                 max_kernel_ns = vcpu->last_guest_tsc -
                                 vcpu->hv_clock.tsc_timestamp;
                 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
@@ -1675,6 +1694,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                  */
                 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
                 break;
+       case MSR_AMD64_OSVW_ID_LENGTH:
+               if (!guest_cpuid_has_osvw(vcpu))
+                       return 1;
+               vcpu->arch.osvw.length = data;
+               break;
+       case MSR_AMD64_OSVW_STATUS:
+               if (!guest_cpuid_has_osvw(vcpu))
+                       return 1;
+               vcpu->arch.osvw.status = data;
+               break;
         default:
                 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
                         return xen_hvm_config(vcpu, data);
@@ -1959,6 +1988,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
                  */
                 data = 0xbe702111;
                 break;
+       case MSR_AMD64_OSVW_ID_LENGTH:
+               if (!guest_cpuid_has_osvw(vcpu))
+                       return 1;
+               data = vcpu->arch.osvw.length;
+               break;
+       case MSR_AMD64_OSVW_STATUS:
+               if (!guest_cpuid_has_osvw(vcpu))
+                       return 1;
+               data = vcpu->arch.osvw.status;
+               break;
         default:
                 if (kvm_pmu_msr(vcpu, msr))
                         return kvm_pmu_get_msr(vcpu, msr, pdata);
@@ -2214,18 +2253,14 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  
         kvm_x86_ops->vcpu_load(vcpu, cpu);
         if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
-               /* Make sure TSC doesn't go backwards */
-               s64 tsc_delta;
-               u64 tsc;
-
-               tsc = kvm_x86_ops->read_l1_tsc(vcpu);
-               tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
-                            tsc - vcpu->arch.last_guest_tsc;
-
+               s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
+                               native_read_tsc() - vcpu->arch.last_host_tsc;
                 if (tsc_delta < 0)
                         mark_tsc_unstable("KVM discovered backwards TSC");
                 if (check_tsc_unstable()) {
-                       kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
+                       u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
+                                               vcpu->arch.last_guest_tsc);
+                       kvm_x86_ops->write_tsc_offset(vcpu, offset);
                         vcpu->arch.tsc_catchup = 1;
                 }
                 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -2242,7 +2277,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
  {
         kvm_x86_ops->vcpu_put(vcpu);
         kvm_put_guest_fpu(vcpu);
-       vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
+       vcpu->arch.last_host_tsc = native_read_tsc();
  }
  
  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
@@ -2784,26 +2819,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                 u32 user_tsc_khz;
  
                 r = -EINVAL;
-               if (!kvm_has_tsc_control)
-                       break;
-
                 user_tsc_khz = (u32)arg;
  
                 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
                         goto out;
  
-               kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
+               if (user_tsc_khz == 0)
+                       user_tsc_khz = tsc_khz;
+
+               kvm_set_tsc_khz(vcpu, user_tsc_khz);
  
                 r = 0;
                 goto out;
         }
         case KVM_GET_TSC_KHZ: {
-               r = -EIO;
-               if (check_tsc_unstable())
-                       goto out;
-
-               r = vcpu_tsc_khz(vcpu);
-
+               r = vcpu->arch.virtual_tsc_khz;
                 goto out;
         }
         default:
@@ -2814,6 +2844,11 @@ out:
         return r;
  }
  
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+       return VM_FAULT_SIGBUS;
+}
+
  static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
  {
         int ret;
@@ -5287,6 +5322,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                 profile_hit(KVM_PROFILING, (void *)rip);
         }
  
+       if (unlikely(vcpu->arch.tsc_always_catchup))
+               kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
  
         kvm_lapic_sync_from_vapic(vcpu);
  
@@ -5979,7 +6016,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
         }
         vcpu->arch.pio_data = page_address(page);
  
-       kvm_init_tsc_catchup(vcpu, max_tsc_khz);
+       kvm_set_tsc_khz(vcpu, max_tsc_khz);
  
         r = kvm_mmu_create(vcpu);
         if (r < 0)
@@ -6031,8 +6068,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
         free_page((unsigned long)vcpu->arch.pio_data);
  }
  
-int kvm_arch_init_vm(struct kvm *kvm)
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
  {
+       if (type)
+               return -EINVAL;
+
         INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
         INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);