Merge branch 'kvm-sev-cgroup' into HEAD
[linux-2.6-microblaze.git] / arch / x86 / kvm / x86.c
index c9ba6f2..f0d0b6e 100644 (file)
@@ -275,8 +275,7 @@ static struct kmem_cache *x86_emulator_cache;
  * When called, it means the previous get/set msr reached an invalid msr.
  * Return true if we want to ignore/silent this failed msr access.
  */
-static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
-                                 u64 data, bool write)
+static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write)
 {
        const char *op = write ? "wrmsr" : "rdmsr";
 
@@ -1451,7 +1450,7 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
        if (r == KVM_MSR_RET_INVALID) {
                /* Unconditionally clear the output for simplicity */
                *data = 0;
-               if (kvm_msr_ignored_check(vcpu, index, 0, false))
+               if (kvm_msr_ignored_check(index, 0, false))
                        r = 0;
        }
 
@@ -1532,35 +1531,44 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
 
 bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
 {
+       struct kvm_x86_msr_filter *msr_filter;
+       struct msr_bitmap_range *ranges;
        struct kvm *kvm = vcpu->kvm;
-       struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
-       u32 count = kvm->arch.msr_filter.count;
-       u32 i;
-       bool r = kvm->arch.msr_filter.default_allow;
+       bool allowed;
        int idx;
+       u32 i;
 
-       /* MSR filtering not set up or x2APIC enabled, allow everything */
-       if (!count || (index >= 0x800 && index <= 0x8ff))
+       /* x2APIC MSRs do not support filtering. */
+       if (index >= 0x800 && index <= 0x8ff)
                return true;
 
-       /* Prevent collision with set_msr_filter */
        idx = srcu_read_lock(&kvm->srcu);
 
-       for (i = 0; i < count; i++) {
+       msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu);
+       if (!msr_filter) {
+               allowed = true;
+               goto out;
+       }
+
+       allowed = msr_filter->default_allow;
+       ranges = msr_filter->ranges;
+
+       for (i = 0; i < msr_filter->count; i++) {
                u32 start = ranges[i].base;
                u32 end = start + ranges[i].nmsrs;
                u32 flags = ranges[i].flags;
                unsigned long *bitmap = ranges[i].bitmap;
 
                if ((index >= start) && (index < end) && (flags & type)) {
-                       r = !!test_bit(index - start, bitmap);
+                       allowed = !!test_bit(index - start, bitmap);
                        break;
                }
        }
 
+out:
        srcu_read_unlock(&kvm->srcu, idx);
 
-       return r;
+       return allowed;
 }
 EXPORT_SYMBOL_GPL(kvm_msr_allowed);
 
@@ -1617,7 +1625,7 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
        int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
 
        if (ret == KVM_MSR_RET_INVALID)
-               if (kvm_msr_ignored_check(vcpu, index, data, true))
+               if (kvm_msr_ignored_check(index, data, true))
                        ret = 0;
 
        return ret;
@@ -1655,7 +1663,7 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
        if (ret == KVM_MSR_RET_INVALID) {
                /* Unconditionally clear *data for simplicity */
                *data = 0;
-               if (kvm_msr_ignored_check(vcpu, index, 0, false))
+               if (kvm_msr_ignored_check(index, 0, false))
                        ret = 0;
        }
 
@@ -2360,7 +2368,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
        kvm_vcpu_write_tsc_offset(vcpu, offset);
        raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
 
-       spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
+       spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
        if (!matched) {
                kvm->arch.nr_vcpus_matched_tsc = 0;
        } else if (!already_matched) {
@@ -2368,7 +2376,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
        }
 
        kvm_track_tsc_matching(vcpu);
-       spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
+       spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
 }
 
 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
@@ -2590,11 +2598,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
        int i;
        struct kvm_vcpu *vcpu;
        struct kvm_arch *ka = &kvm->arch;
+       unsigned long flags;
+
+       kvm_hv_invalidate_tsc_page(kvm);
 
-       spin_lock(&ka->pvclock_gtod_sync_lock);
        kvm_make_mclock_inprogress_request(kvm);
+
        /* no guest entries from this point */
+       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        pvclock_update_vm_gtod_copy(kvm);
+       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
        kvm_for_each_vcpu(i, vcpu, kvm)
                kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -2602,8 +2615,6 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
        /* guest entries allowed */
        kvm_for_each_vcpu(i, vcpu, kvm)
                kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
-
-       spin_unlock(&ka->pvclock_gtod_sync_lock);
 #endif
 }
 
@@ -2611,17 +2622,18 @@ u64 get_kvmclock_ns(struct kvm *kvm)
 {
        struct kvm_arch *ka = &kvm->arch;
        struct pvclock_vcpu_time_info hv_clock;
+       unsigned long flags;
        u64 ret;
 
-       spin_lock(&ka->pvclock_gtod_sync_lock);
+       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        if (!ka->use_master_clock) {
-               spin_unlock(&ka->pvclock_gtod_sync_lock);
+               spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
                return get_kvmclock_base_ns() + ka->kvmclock_offset;
        }
 
        hv_clock.tsc_timestamp = ka->master_cycle_now;
        hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
-       spin_unlock(&ka->pvclock_gtod_sync_lock);
+       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
        /* both __this_cpu_read() and rdtsc() should be on the same cpu */
        get_cpu();
@@ -2715,13 +2727,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
         * If the host uses TSC clock, then passthrough TSC as stable
         * to the guest.
         */
-       spin_lock(&ka->pvclock_gtod_sync_lock);
+       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        use_master_clock = ka->use_master_clock;
        if (use_master_clock) {
                host_tsc = ka->master_cycle_now;
                kernel_ns = ka->master_kernel_ns;
        }
-       spin_unlock(&ka->pvclock_gtod_sync_lock);
+       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
        /* Keep irq disabled to prevent changes to the clock */
        local_irq_save(flags);
@@ -5425,25 +5437,34 @@ split_irqchip_unlock:
        return r;
 }
 
-static void kvm_clear_msr_filter(struct kvm *kvm)
+static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow)
+{
+       struct kvm_x86_msr_filter *msr_filter;
+
+       msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT);
+       if (!msr_filter)
+               return NULL;
+
+       msr_filter->default_allow = default_allow;
+       return msr_filter;
+}
+
+static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
 {
        u32 i;
-       u32 count = kvm->arch.msr_filter.count;
-       struct msr_bitmap_range ranges[16];
 
-       mutex_lock(&kvm->lock);
-       kvm->arch.msr_filter.count = 0;
-       memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0]));
-       mutex_unlock(&kvm->lock);
-       synchronize_srcu(&kvm->srcu);
+       if (!msr_filter)
+               return;
 
-       for (i = 0; i < count; i++)
-               kfree(ranges[i].bitmap);
+       for (i = 0; i < msr_filter->count; i++)
+               kfree(msr_filter->ranges[i].bitmap);
+
+       kfree(msr_filter);
 }
 
-static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range)
+static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
+                             struct kvm_msr_filter_range *user_range)
 {
-       struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
        struct msr_bitmap_range range;
        unsigned long *bitmap = NULL;
        size_t bitmap_size;
@@ -5477,11 +5498,9 @@ static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user
                goto err;
        }
 
-       /* Everything ok, add this range identifier to our global pool */
-       ranges[kvm->arch.msr_filter.count] = range;
-       /* Make sure we filled the array before we tell anyone to walk it */
-       smp_wmb();
-       kvm->arch.msr_filter.count++;
+       /* Everything ok, add this range identifier. */
+       msr_filter->ranges[msr_filter->count] = range;
+       msr_filter->count++;
 
        return 0;
 err:
@@ -5492,10 +5511,11 @@ err:
 static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
 {
        struct kvm_msr_filter __user *user_msr_filter = argp;
+       struct kvm_x86_msr_filter *new_filter, *old_filter;
        struct kvm_msr_filter filter;
        bool default_allow;
-       int r = 0;
        bool empty = true;
+       int r = 0;
        u32 i;
 
        if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
@@ -5508,25 +5528,32 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
        if (empty && !default_allow)
                return -EINVAL;
 
-       kvm_clear_msr_filter(kvm);
-
-       kvm->arch.msr_filter.default_allow = default_allow;
+       new_filter = kvm_alloc_msr_filter(default_allow);
+       if (!new_filter)
+               return -ENOMEM;
 
-       /*
-        * Protect from concurrent calls to this function that could trigger
-        * a TOCTOU violation on kvm->arch.msr_filter.count.
-        */
-       mutex_lock(&kvm->lock);
        for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
-               r = kvm_add_msr_filter(kvm, &filter.ranges[i]);
-               if (r)
-                       break;
+               r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
+               if (r) {
+                       kvm_free_msr_filter(new_filter);
+                       return r;
+               }
        }
 
+       mutex_lock(&kvm->lock);
+
+       /* The per-VM filter is protected by kvm->lock... */
+       old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1);
+
+       rcu_assign_pointer(kvm->arch.msr_filter, new_filter);
+       synchronize_srcu(&kvm->srcu);
+
+       kvm_free_msr_filter(old_filter);
+
        kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
        mutex_unlock(&kvm->lock);
 
-       return r;
+       return 0;
 }
 
 long kvm_arch_vm_ioctl(struct file *filp,
@@ -5773,6 +5800,7 @@ set_pit2_out:
        }
 #endif
        case KVM_SET_CLOCK: {
+               struct kvm_arch *ka = &kvm->arch;
                struct kvm_clock_data user_ns;
                u64 now_ns;
 
@@ -5791,8 +5819,22 @@ set_pit2_out:
                 * pvclock_update_vm_gtod_copy().
                 */
                kvm_gen_update_masterclock(kvm);
-               now_ns = get_kvmclock_ns(kvm);
-               kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
+
+               /*
+                * This pairs with kvm_guest_time_update(): when masterclock is
+                * in use, we use master_kernel_ns + kvmclock_offset to set
+                * unsigned 'system_time' so if we use get_kvmclock_ns() (which
+                * is slightly ahead) here we risk going negative on unsigned
+                * 'system_time' when 'user_ns.clock' is very small.
+                */
+               spin_lock_irq(&ka->pvclock_gtod_sync_lock);
+               if (kvm->arch.use_master_clock)
+                       now_ns = ka->master_kernel_ns;
+               else
+                       now_ns = get_kvmclock_base_ns();
+               ka->kvmclock_offset = user_ns.clock - now_ns;
+               spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
+
                kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
                break;
        }
@@ -6678,7 +6720,7 @@ static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
                int cpu = get_cpu();
 
                cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
-               smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
+               on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask,
                                wbinvd_ipi, NULL, 1);
                put_cpu();
                cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
@@ -7773,6 +7815,7 @@ static void kvm_hyperv_tsc_notifier(void)
        struct kvm *kvm;
        struct kvm_vcpu *vcpu;
        int cpu;
+       unsigned long flags;
 
        mutex_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
@@ -7788,17 +7831,15 @@ static void kvm_hyperv_tsc_notifier(void)
        list_for_each_entry(kvm, &vm_list, vm_list) {
                struct kvm_arch *ka = &kvm->arch;
 
-               spin_lock(&ka->pvclock_gtod_sync_lock);
-
+               spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
                pvclock_update_vm_gtod_copy(kvm);
+               spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
                kvm_for_each_vcpu(cpu, vcpu, kvm)
                        kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 
                kvm_for_each_vcpu(cpu, vcpu, kvm)
                        kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
-
-               spin_unlock(&ka->pvclock_gtod_sync_lock);
        }
        mutex_unlock(&kvm_lock);
 }
@@ -10745,8 +10786,6 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-       u32 i;
-
        if (current->mm == kvm->mm) {
                /*
                 * Free memory regions allocated on behalf of userspace,
@@ -10762,8 +10801,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
                mutex_unlock(&kvm->slots_lock);
        }
        static_call_cond(kvm_x86_vm_destroy)(kvm);
-       for (i = 0; i < kvm->arch.msr_filter.count; i++)
-               kfree(kvm->arch.msr_filter.ranges[i].bitmap);
+       kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
        kvm_pic_destroy(kvm);
        kvm_ioapic_destroy(kvm);
        kvm_free_vcpus(kvm);