Merge tag 'kvm-s390-master-5.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / virt / kvm / kvm_main.c
index 6b4feb9..f7445c3 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/io.h>
 #include <linux/lockdep.h>
 #include <linux/kthread.h>
+#include <linux/suspend.h>
 
 #include <asm/processor.h>
 #include <asm/ioctl.h>
@@ -114,7 +115,6 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu);
 struct dentry *kvm_debugfs_dir;
 EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
 
-static int kvm_debugfs_num_entries;
 static const struct file_operations stat_fops_per_vm;
 
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
@@ -307,6 +307,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
        return kvm_make_all_cpus_request_except(kvm, req, NULL);
 }
+EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
 
 #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
 void kvm_flush_remote_tlbs(struct kvm *kvm)
@@ -330,7 +331,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
         */
        if (!kvm_arch_flush_remote_tlb(kvm)
            || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
-               ++kvm->stat.remote_tlb_flush;
+               ++kvm->stat.generic.remote_tlb_flush;
        cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
@@ -779,6 +780,38 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
 
 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
 
+#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
+static int kvm_pm_notifier_call(struct notifier_block *bl,
+                               unsigned long state,
+                               void *unused)
+{
+       struct kvm *kvm = container_of(bl, struct kvm, pm_notifier);
+
+       return kvm_arch_pm_notifier(kvm, state);
+}
+
+static void kvm_init_pm_notifier(struct kvm *kvm)
+{
+       kvm->pm_notifier.notifier_call = kvm_pm_notifier_call;
+       /* Suspend KVM before we suspend ftrace, RCU, etc. */
+       kvm->pm_notifier.priority = INT_MAX;
+       register_pm_notifier(&kvm->pm_notifier);
+}
+
+static void kvm_destroy_pm_notifier(struct kvm *kvm)
+{
+       unregister_pm_notifier(&kvm->pm_notifier);
+}
+#else /* !CONFIG_HAVE_KVM_PM_NOTIFIER */
+static void kvm_init_pm_notifier(struct kvm *kvm)
+{
+}
+
+static void kvm_destroy_pm_notifier(struct kvm *kvm)
+{
+}
+#endif /* CONFIG_HAVE_KVM_PM_NOTIFIER */
+
 static struct kvm_memslots *kvm_alloc_memslots(void)
 {
        int i;
@@ -826,9 +859,24 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
        kvfree(slots);
 }
 
+static umode_t kvm_stats_debugfs_mode(const struct _kvm_stats_desc *pdesc)
+{
+       switch (pdesc->desc.flags & KVM_STATS_TYPE_MASK) {
+       case KVM_STATS_TYPE_INSTANT:
+               return 0444;
+       case KVM_STATS_TYPE_CUMULATIVE:
+       case KVM_STATS_TYPE_PEAK:
+       default:
+               return 0644;
+       }
+}
+
+
 static void kvm_destroy_vm_debugfs(struct kvm *kvm)
 {
        int i;
+       int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
+                                     kvm_vcpu_stats_header.num_desc;
 
        if (!kvm->debugfs_dentry)
                return;
@@ -846,7 +894,10 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
 {
        char dir_name[ITOA_MAX_LEN * 2];
        struct kvm_stat_data *stat_data;
-       struct kvm_stats_debugfs_item *p;
+       const struct _kvm_stats_desc *pdesc;
+       int i;
+       int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
+                                     kvm_vcpu_stats_header.num_desc;
 
        if (!debugfs_initialized())
                return 0;
@@ -860,15 +911,32 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
        if (!kvm->debugfs_stat_data)
                return -ENOMEM;
 
-       for (p = debugfs_entries; p->name; p++) {
+       for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) {
+               pdesc = &kvm_vm_stats_desc[i];
                stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
                if (!stat_data)
                        return -ENOMEM;
 
                stat_data->kvm = kvm;
-               stat_data->dbgfs_item = p;
-               kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
-               debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p),
+               stat_data->desc = pdesc;
+               stat_data->kind = KVM_STAT_VM;
+               kvm->debugfs_stat_data[i] = stat_data;
+               debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
+                                   kvm->debugfs_dentry, stat_data,
+                                   &stat_fops_per_vm);
+       }
+
+       for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) {
+               pdesc = &kvm_vcpu_stats_desc[i];
+               stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
+               if (!stat_data)
+                       return -ENOMEM;
+
+               stat_data->kvm = kvm;
+               stat_data->desc = pdesc;
+               stat_data->kind = KVM_STAT_VCPU;
+               kvm->debugfs_stat_data[i] = stat_data;
+               debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
                                    kvm->debugfs_dentry, stat_data,
                                    &stat_fops_per_vm);
        }
@@ -908,6 +976,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
        mutex_init(&kvm->lock);
        mutex_init(&kvm->irq_lock);
        mutex_init(&kvm->slots_lock);
+       mutex_init(&kvm->slots_arch_lock);
        INIT_LIST_HEAD(&kvm->devices);
 
        BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
@@ -962,6 +1031,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
        mutex_unlock(&kvm_lock);
 
        preempt_notifier_inc();
+       kvm_init_pm_notifier(kvm);
 
        return kvm;
 
@@ -1009,6 +1079,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
        int i;
        struct mm_struct *mm = kvm->mm;
 
+       kvm_destroy_pm_notifier(kvm);
        kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
        kvm_destroy_vm_debugfs(kvm);
        kvm_arch_sync_events(kvm);
@@ -1280,6 +1351,14 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
        slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
 
        rcu_assign_pointer(kvm->memslots[as_id], slots);
+
+       /*
+        * Acquired in kvm_set_memslot. Must be released before synchronize
+        * SRCU below in order to avoid deadlock with another thread
+        * acquiring the slots_arch_lock in an srcu critical section.
+        */
+       mutex_unlock(&kvm->slots_arch_lock);
+
        synchronize_srcu_expedited(&kvm->srcu);
 
        /*
@@ -1306,6 +1385,18 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
        return old_memslots;
 }
 
+static size_t kvm_memslots_size(int slots)
+{
+       return sizeof(struct kvm_memslots) +
+              (sizeof(struct kvm_memory_slot) * slots);
+}
+
+static void kvm_copy_memslots(struct kvm_memslots *to,
+                             struct kvm_memslots *from)
+{
+       memcpy(to, from, kvm_memslots_size(from->used_slots));
+}
+
 /*
  * Note, at a minimum, the current number of used slots must be allocated, even
  * when deleting a memslot, as we need a complete duplicate of the memslots for
@@ -1315,19 +1406,16 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
                                             enum kvm_mr_change change)
 {
        struct kvm_memslots *slots;
-       size_t old_size, new_size;
-
-       old_size = sizeof(struct kvm_memslots) +
-                  (sizeof(struct kvm_memory_slot) * old->used_slots);
+       size_t new_size;
 
        if (change == KVM_MR_CREATE)
-               new_size = old_size + sizeof(struct kvm_memory_slot);
+               new_size = kvm_memslots_size(old->used_slots + 1);
        else
-               new_size = old_size;
+               new_size = kvm_memslots_size(old->used_slots);
 
        slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
        if (likely(slots))
-               memcpy(slots, old, old_size);
+               kvm_copy_memslots(slots, old);
 
        return slots;
 }
@@ -1342,9 +1430,27 @@ static int kvm_set_memslot(struct kvm *kvm,
        struct kvm_memslots *slots;
        int r;
 
+       /*
+        * Released in install_new_memslots.
+        *
+        * Must be held from before the current memslots are copied until
+        * after the new memslots are installed with rcu_assign_pointer,
+        * then released before the synchronize srcu in install_new_memslots.
+        *
+        * When modifying memslots outside of the slots_lock, must be held
+        * before reading the pointer to the current memslots until after all
+        * changes to those memslots are complete.
+        *
+        * These rules ensure that installing new memslots does not lose
+        * changes made to the previous memslots.
+        */
+       mutex_lock(&kvm->slots_arch_lock);
+
        slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), change);
-       if (!slots)
+       if (!slots) {
+               mutex_unlock(&kvm->slots_arch_lock);
                return -ENOMEM;
+       }
 
        if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
                /*
@@ -1355,10 +1461,9 @@ static int kvm_set_memslot(struct kvm *kvm,
                slot->flags |= KVM_MEMSLOT_INVALID;
 
                /*
-                * We can re-use the old memslots, the only difference from the
-                * newly installed memslots is the invalid flag, which will get
-                * dropped by update_memslots anyway.  We'll also revert to the
-                * old memslots if preparing the new memory region fails.
+                * We can re-use the memory from the old memslots.
+                * It will be overwritten with a copy of the new memslots
+                * after reacquiring the slots_arch_lock below.
                 */
                slots = install_new_memslots(kvm, as_id, slots);
 
@@ -1370,6 +1475,17 @@ static int kvm_set_memslot(struct kvm *kvm,
                 *      - kvm_is_visible_gfn (mmu_check_root)
                 */
                kvm_arch_flush_shadow_memslot(kvm, slot);
+
+               /* Released in install_new_memslots. */
+               mutex_lock(&kvm->slots_arch_lock);
+
+               /*
+                * The arch-specific fields of the memslots could have changed
+                * between releasing the slots_arch_lock in
+                * install_new_memslots and here, so get a fresh copy of the
+                * slots.
+                */
+               kvm_copy_memslots(slots, __kvm_memslots(kvm, as_id));
        }
 
        r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
@@ -1385,8 +1501,13 @@ static int kvm_set_memslot(struct kvm *kvm,
        return 0;
 
 out_slots:
-       if (change == KVM_MR_DELETE || change == KVM_MR_MOVE)
+       if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+               slot = id_to_memslot(slots, old->id);
+               slot->flags &= ~KVM_MEMSLOT_INVALID;
                slots = install_new_memslots(kvm, as_id, slots);
+       } else {
+               mutex_unlock(&kvm->slots_arch_lock);
+       }
        kvfree(slots);
        return r;
 }
@@ -2054,6 +2175,13 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
        return true;
 }
 
+static int kvm_try_get_pfn(kvm_pfn_t pfn)
+{
+       if (kvm_is_reserved_pfn(pfn))
+               return 1;
+       return get_page_unless_zero(pfn_to_page(pfn));
+}
+
 static int hva_to_pfn_remapped(struct vm_area_struct *vma,
                               unsigned long addr, bool *async,
                               bool write_fault, bool *writable,
@@ -2103,13 +2231,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
         * Whoever called remap_pfn_range is also going to call e.g.
         * unmap_mapping_range before the underlying pages are freed,
         * causing a call to our MMU notifier.
+        *
+        * Certain IO or PFNMAP mappings can be backed with valid
+        * struct pages, but be allocated without refcounting e.g.,
+        * tail pages of non-compound higher order allocations, which
+        * would then underflow the refcount when the caller does the
+        * required put_page. Don't allow those pages here.
         */ 
-       kvm_get_pfn(pfn);
+       if (!kvm_try_get_pfn(pfn))
+               r = -EFAULT;
 
 out:
        pte_unmap_unlock(ptep, ptl);
        *p_pfn = pfn;
-       return 0;
+
+       return r;
 }
 
 /*
@@ -2929,6 +3065,8 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
                goto out;
        if (signal_pending(current))
                goto out;
+       if (kvm_check_request(KVM_REQ_UNBLOCK, vcpu))
+               goto out;
 
        ret = 0;
 out:
@@ -2940,9 +3078,9 @@ static inline void
 update_halt_poll_stats(struct kvm_vcpu *vcpu, u64 poll_ns, bool waited)
 {
        if (waited)
-               vcpu->stat.halt_poll_fail_ns += poll_ns;
+               vcpu->stat.generic.halt_poll_fail_ns += poll_ns;
        else
-               vcpu->stat.halt_poll_success_ns += poll_ns;
+               vcpu->stat.generic.halt_poll_success_ns += poll_ns;
 }
 
 /*
@@ -2960,21 +3098,20 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
        if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
                ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
 
-               ++vcpu->stat.halt_attempted_poll;
+               ++vcpu->stat.generic.halt_attempted_poll;
                do {
                        /*
                         * This sets KVM_REQ_UNHALT if an interrupt
                         * arrives.
                         */
                        if (kvm_vcpu_check_block(vcpu) < 0) {
-                               ++vcpu->stat.halt_successful_poll;
+                               ++vcpu->stat.generic.halt_successful_poll;
                                if (!vcpu_valid_wakeup(vcpu))
-                                       ++vcpu->stat.halt_poll_invalid;
+                                       ++vcpu->stat.generic.halt_poll_invalid;
                                goto out;
                        }
                        poll_end = cur = ktime_get();
-               } while (single_task_running() && !need_resched() &&
-                        ktime_before(cur, stop));
+               } while (kvm_vcpu_can_poll(cur, stop));
        }
 
        prepare_to_rcuwait(&vcpu->wait);
@@ -3027,7 +3164,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
        waitp = kvm_arch_vcpu_get_wait(vcpu);
        if (rcuwait_wake_up(waitp)) {
                WRITE_ONCE(vcpu->ready, true);
-               ++vcpu->stat.halt_wakeup;
+               ++vcpu->stat.generic.halt_wakeup;
                return true;
        }
 
@@ -3360,6 +3497,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
        vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
        BUG_ON(kvm->vcpus[vcpu->vcpu_idx]);
 
+       /* Fill the stats id string for the vcpu */
+       snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
+                task_pid_nr(current), id);
+
        /* Now it's all set up, let userspace reach it */
        kvm_get_kvm(kvm);
        r = create_vcpu_fd(vcpu);
@@ -3409,6 +3550,44 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
        return 0;
 }
 
+static ssize_t kvm_vcpu_stats_read(struct file *file, char __user *user_buffer,
+                             size_t size, loff_t *offset)
+{
+       struct kvm_vcpu *vcpu = file->private_data;
+
+       return kvm_stats_read(vcpu->stats_id, &kvm_vcpu_stats_header,
+                       &kvm_vcpu_stats_desc[0], &vcpu->stat,
+                       sizeof(vcpu->stat), user_buffer, size, offset);
+}
+
+static const struct file_operations kvm_vcpu_stats_fops = {
+       .read = kvm_vcpu_stats_read,
+       .llseek = noop_llseek,
+};
+
+static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
+{
+       int fd;
+       struct file *file;
+       char name[15 + ITOA_MAX_LEN + 1];
+
+       snprintf(name, sizeof(name), "kvm-vcpu-stats:%d", vcpu->vcpu_id);
+
+       fd = get_unused_fd_flags(O_CLOEXEC);
+       if (fd < 0)
+               return fd;
+
+       file = anon_inode_getfile(name, &kvm_vcpu_stats_fops, vcpu, O_RDONLY);
+       if (IS_ERR(file)) {
+               put_unused_fd(fd);
+               return PTR_ERR(file);
+       }
+       file->f_mode |= FMODE_PREAD;
+       fd_install(fd, file);
+
+       return fd;
+}
+
 static long kvm_vcpu_ioctl(struct file *filp,
                           unsigned int ioctl, unsigned long arg)
 {
@@ -3606,6 +3785,10 @@ out_free1:
                r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
                break;
        }
+       case KVM_GET_STATS_FD: {
+               r = kvm_vcpu_ioctl_get_stats_fd(vcpu);
+               break;
+       }
        default:
                r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
        }
@@ -3864,6 +4047,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #else
                return 0;
 #endif
+       case KVM_CAP_BINARY_STATS_FD:
+               return 1;
        default:
                break;
        }
@@ -3967,6 +4152,42 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
        }
 }
 
+static ssize_t kvm_vm_stats_read(struct file *file, char __user *user_buffer,
+                             size_t size, loff_t *offset)
+{
+       struct kvm *kvm = file->private_data;
+
+       return kvm_stats_read(kvm->stats_id, &kvm_vm_stats_header,
+                               &kvm_vm_stats_desc[0], &kvm->stat,
+                               sizeof(kvm->stat), user_buffer, size, offset);
+}
+
+static const struct file_operations kvm_vm_stats_fops = {
+       .read = kvm_vm_stats_read,
+       .llseek = noop_llseek,
+};
+
+static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
+{
+       int fd;
+       struct file *file;
+
+       fd = get_unused_fd_flags(O_CLOEXEC);
+       if (fd < 0)
+               return fd;
+
+       file = anon_inode_getfile("kvm-vm-stats",
+                       &kvm_vm_stats_fops, kvm, O_RDONLY);
+       if (IS_ERR(file)) {
+               put_unused_fd(fd);
+               return PTR_ERR(file);
+       }
+       file->f_mode |= FMODE_PREAD;
+       fd_install(fd, file);
+
+       return fd;
+}
+
 static long kvm_vm_ioctl(struct file *filp,
                           unsigned int ioctl, unsigned long arg)
 {
@@ -4149,6 +4370,9 @@ static long kvm_vm_ioctl(struct file *filp,
        case KVM_RESET_DIRTY_RINGS:
                r = kvm_vm_ioctl_reset_dirty_pages(kvm);
                break;
+       case KVM_GET_STATS_FD:
+               r = kvm_vm_ioctl_get_stats_fd(kvm);
+               break;
        default:
                r = kvm_arch_vm_ioctl(filp, ioctl, arg);
        }
@@ -4228,6 +4452,9 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
        if (r < 0)
                goto put_kvm;
 
+       snprintf(kvm->stats_id, sizeof(kvm->stats_id),
+                       "kvm-%d", task_pid_nr(current));
+
        file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
        if (IS_ERR(file)) {
                put_unused_fd(r);
@@ -4722,7 +4949,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
                return -ENOENT;
 
        if (simple_attr_open(inode, file, get,
-                   KVM_DBGFS_GET_MODE(stat_data->dbgfs_item) & 0222
+                   kvm_stats_debugfs_mode(stat_data->desc) & 0222
                    ? set : NULL,
                    fmt)) {
                kvm_put_kvm(stat_data->kvm);
@@ -4745,14 +4972,14 @@ static int kvm_debugfs_release(struct inode *inode, struct file *file)
 
 static int kvm_get_stat_per_vm(struct kvm *kvm, size_t offset, u64 *val)
 {
-       *val = *(ulong *)((void *)kvm + offset);
+       *val = *(u64 *)((void *)(&kvm->stat) + offset);
 
        return 0;
 }
 
 static int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset)
 {
-       *(ulong *)((void *)kvm + offset) = 0;
+       *(u64 *)((void *)(&kvm->stat) + offset) = 0;
 
        return 0;
 }
@@ -4765,7 +4992,7 @@ static int kvm_get_stat_per_vcpu(struct kvm *kvm, size_t offset, u64 *val)
        *val = 0;
 
        kvm_for_each_vcpu(i, vcpu, kvm)
-               *val += *(u64 *)((void *)vcpu + offset);
+               *val += *(u64 *)((void *)(&vcpu->stat) + offset);
 
        return 0;
 }
@@ -4776,7 +5003,7 @@ static int kvm_clear_stat_per_vcpu(struct kvm *kvm, size_t offset)
        struct kvm_vcpu *vcpu;
 
        kvm_for_each_vcpu(i, vcpu, kvm)
-               *(u64 *)((void *)vcpu + offset) = 0;
+               *(u64 *)((void *)(&vcpu->stat) + offset) = 0;
 
        return 0;
 }
@@ -4786,14 +5013,14 @@ static int kvm_stat_data_get(void *data, u64 *val)
        int r = -EFAULT;
        struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
 
-       switch (stat_data->dbgfs_item->kind) {
+       switch (stat_data->kind) {
        case KVM_STAT_VM:
                r = kvm_get_stat_per_vm(stat_data->kvm,
-                                       stat_data->dbgfs_item->offset, val);
+                                       stat_data->desc->desc.offset, val);
                break;
        case KVM_STAT_VCPU:
                r = kvm_get_stat_per_vcpu(stat_data->kvm,
-                                         stat_data->dbgfs_item->offset, val);
+                                         stat_data->desc->desc.offset, val);
                break;
        }
 
@@ -4808,14 +5035,14 @@ static int kvm_stat_data_clear(void *data, u64 val)
        if (val)
                return -EINVAL;
 
-       switch (stat_data->dbgfs_item->kind) {
+       switch (stat_data->kind) {
        case KVM_STAT_VM:
                r = kvm_clear_stat_per_vm(stat_data->kvm,
-                                         stat_data->dbgfs_item->offset);
+                                         stat_data->desc->desc.offset);
                break;
        case KVM_STAT_VCPU:
                r = kvm_clear_stat_per_vcpu(stat_data->kvm,
-                                           stat_data->dbgfs_item->offset);
+                                           stat_data->desc->desc.offset);
                break;
        }
 
@@ -4872,6 +5099,7 @@ static int vm_stat_clear(void *_offset, u64 val)
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, vm_stat_clear, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(vm_stat_readonly_fops, vm_stat_get, NULL, "%llu\n");
 
 static int vcpu_stat_get(void *_offset, u64 *val)
 {
@@ -4908,11 +5136,7 @@ static int vcpu_stat_clear(void *_offset, u64 val)
 
 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear,
                        "%llu\n");
-
-static const struct file_operations *stat_fops[] = {
-       [KVM_STAT_VCPU] = &vcpu_stat_fops,
-       [KVM_STAT_VM]   = &vm_stat_fops,
-};
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_readonly_fops, vcpu_stat_get, NULL, "%llu\n");
 
 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
 {
@@ -4966,15 +5190,32 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
 
 static void kvm_init_debug(void)
 {
-       struct kvm_stats_debugfs_item *p;
+       const struct file_operations *fops;
+       const struct _kvm_stats_desc *pdesc;
+       int i;
 
        kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
 
-       kvm_debugfs_num_entries = 0;
-       for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
-               debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p),
-                                   kvm_debugfs_dir, (void *)(long)p->offset,
-                                   stat_fops[p->kind]);
+       for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) {
+               pdesc = &kvm_vm_stats_desc[i];
+               if (kvm_stats_debugfs_mode(pdesc) & 0222)
+                       fops = &vm_stat_fops;
+               else
+                       fops = &vm_stat_readonly_fops;
+               debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
+                               kvm_debugfs_dir,
+                               (void *)(long)pdesc->desc.offset, fops);
+       }
+
+       for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) {
+               pdesc = &kvm_vcpu_stats_desc[i];
+               if (kvm_stats_debugfs_mode(pdesc) & 0222)
+                       fops = &vcpu_stat_fops;
+               else
+                       fops = &vcpu_stat_readonly_fops;
+               debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
+                               kvm_debugfs_dir,
+                               (void *)(long)pdesc->desc.offset, fops);
        }
 }
 
@@ -5124,7 +5365,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
                kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align,
                                           SLAB_ACCOUNT,
                                           offsetof(struct kvm_vcpu, arch),
-                                          sizeof_field(struct kvm_vcpu, arch),
+                                          offsetofend(struct kvm_vcpu, stats_id)
+                                          - offsetof(struct kvm_vcpu, arch),
                                           NULL);
        if (!kvm_vcpu_cache) {
                r = -ENOMEM;