KVM: X86: Introduce mmu_rmaps_stat per-vm debugfs file
authorPeter Xu <peterx@redhat.com>
Fri, 30 Jul 2021 22:04:52 +0000 (18:04 -0400)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 20 Aug 2021 20:06:11 +0000 (16:06 -0400)
Use this file to dump rmap statistic information.  The statistic is done by
calculating the rmap count and the result is log-2-based.

An example output of this looks like (idle 6GB guest, right after boot linux):

Rmap_Count:     0       1       2-3     4-7     8-15    16-31   32-63   64-127  128-255 256-511 512-1023
Level=4K:       3086676 53045   12330   1272    502     121     76      2       0       0       0
Level=2M:       5947    231     0       0       0       0       0       0       0       0       0
Level=1G:       32      0       0       0       0       0       0       0       0       0       0

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20210730220455.26054-5-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/debugfs.c
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/mmu_internal.h

index 95a9841..54a83a7 100644 (file)
@@ -7,6 +7,8 @@
 #include <linux/kvm_host.h>
 #include <linux/debugfs.h>
 #include "lapic.h"
+#include "mmu.h"
+#include "mmu/mmu_internal.h"
 
 static int vcpu_get_timer_advance_ns(void *data, u64 *val)
 {
@@ -73,3 +75,112 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
                                    &vcpu_tsc_scaling_frac_fops);
        }
 }
+
+/*
+ * This covers statistics <1024 (11=log(1024)+1), which should be enough to
+ * cover RMAP_RECYCLE_THRESHOLD.
+ */
+#define  RMAP_LOG_SIZE  11
+
+static const char *kvm_lpage_str[KVM_NR_PAGE_SIZES] = { "4K", "2M", "1G" };
+
+static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
+{
+       struct kvm_rmap_head *rmap;
+       struct kvm *kvm = m->private;
+       struct kvm_memory_slot *slot;
+       struct kvm_memslots *slots;
+       unsigned int lpage_size, index;
+       /* Still small enough to be on the stack */
+       unsigned int *log[KVM_NR_PAGE_SIZES], *cur;
+       int i, j, k, l, ret;
+
+       ret = -ENOMEM;
+       memset(log, 0, sizeof(log));
+       for (i = 0; i < KVM_NR_PAGE_SIZES; i++) {
+               log[i] = kcalloc(RMAP_LOG_SIZE, sizeof(unsigned int), GFP_KERNEL);
+               if (!log[i])
+                       goto out;
+       }
+
+       mutex_lock(&kvm->slots_lock);
+       write_lock(&kvm->mmu_lock);
+
+       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+               slots = __kvm_memslots(kvm, i);
+               for (j = 0; j < slots->used_slots; j++) {
+                       slot = &slots->memslots[j];
+                       for (k = 0; k < KVM_NR_PAGE_SIZES; k++) {
+                               rmap = slot->arch.rmap[k];
+                               lpage_size = kvm_mmu_slot_lpages(slot, k + 1);
+                               cur = log[k];
+                               for (l = 0; l < lpage_size; l++) {
+                                       index = ffs(pte_list_count(&rmap[l]));
+                                       if (WARN_ON_ONCE(index >= RMAP_LOG_SIZE))
+                                               index = RMAP_LOG_SIZE - 1;
+                                       cur[index]++;
+                               }
+                       }
+               }
+       }
+
+       write_unlock(&kvm->mmu_lock);
+       mutex_unlock(&kvm->slots_lock);
+
+       /* index=0 counts no rmap; index=1 counts 1 rmap */
+       seq_printf(m, "Rmap_Count:\t0\t1\t");
+       for (i = 2; i < RMAP_LOG_SIZE; i++) {
+               j = 1 << (i - 1);
+               k = (1 << i) - 1;
+               seq_printf(m, "%d-%d\t", j, k);
+       }
+       seq_printf(m, "\n");
+
+       for (i = 0; i < KVM_NR_PAGE_SIZES; i++) {
+               seq_printf(m, "Level=%s:\t", kvm_lpage_str[i]);
+               cur = log[i];
+               for (j = 0; j < RMAP_LOG_SIZE; j++)
+                       seq_printf(m, "%d\t", cur[j]);
+               seq_printf(m, "\n");
+       }
+
+       ret = 0;
+out:
+       for (i = 0; i < KVM_NR_PAGE_SIZES; i++)
+               kfree(log[i]);
+
+       return ret;
+}
+
+static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file)
+{
+       struct kvm *kvm = inode->i_private;
+
+       if (!kvm_get_kvm_safe(kvm))
+               return -ENOENT;
+
+       return single_open(file, kvm_mmu_rmaps_stat_show, kvm);
+}
+
+static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
+{
+       struct kvm *kvm = inode->i_private;
+
+       kvm_put_kvm(kvm);
+
+       return single_release(inode, file);
+}
+
+static const struct file_operations mmu_rmaps_stat_fops = {
+       .open           = kvm_mmu_rmaps_stat_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = kvm_mmu_rmaps_stat_release,
+};
+
+int kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+       debugfs_create_file("mmu_rmaps_stat", 0644, kvm->debugfs_dentry, kvm,
+                           &mmu_rmaps_stat_fops);
+       return 0;
+}
index d282ccf..5daa8a9 100644 (file)
@@ -1035,6 +1035,26 @@ out:
        return true;
 }
 
+unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
+{
+       struct pte_list_desc *desc;
+       unsigned int count = 0;
+
+       if (!rmap_head->val)
+               return 0;
+       else if (!(rmap_head->val & 1))
+               return 1;
+
+       desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+
+       while (desc) {
+               count += desc->spte_count;
+               desc = desc->more;
+       }
+
+       return count;
+}
+
 static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
                                         const struct kvm_memory_slot *slot)
 {
index ca7b759..62bb8f7 100644 (file)
@@ -131,6 +131,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
                                    int min_level);
 void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
                                        u64 start_gfn, u64 pages);
+unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
 
 /*
  * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().