perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE

author Kan Liang <kan.liang@linux.intel.com>

Thu, 1 Oct 2020 13:57:46 +0000 (06:57 -0700)

committer Peter Zijlstra <peterz@infradead.org>

Thu, 29 Oct 2020 10:00:38 +0000 (11:00 +0100)
author Kan Liang <kan.liang@linux.intel.com>
Thu, 1 Oct 2020 13:57:46 +0000 (06:57 -0700)
committer Peter Zijlstra <peterz@infradead.org>
Thu, 29 Oct 2020 10:00:38 +0000 (11:00 +0100)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h

index 0c19d27..7e3785d 100644 (file)
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1034,6 +1034,7 @@ struct perf_sample_data {
  
         u64                             phys_addr;
         u64                             cgroup;
+       u64                             data_page_size;
  } ____cacheline_aligned;
  
  /* default value for data source */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h

index 077e7ee..cc6ea34 100644 (file)
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -143,8 +143,9 @@ enum perf_event_sample_format {
         PERF_SAMPLE_PHYS_ADDR                   = 1U << 19,
         PERF_SAMPLE_AUX                         = 1U << 20,
         PERF_SAMPLE_CGROUP                      = 1U << 21,
+       PERF_SAMPLE_DATA_PAGE_SIZE              = 1U << 22,
  
-       PERF_SAMPLE_MAX = 1U << 22,             /* non-ABI */
+       PERF_SAMPLE_MAX = 1U << 23,             /* non-ABI */
  
         __PERF_SAMPLE_CALLCHAIN_EARLY           = 1ULL << 63, /* non-ABI; internal use */
  };
@@ -896,6 +897,7 @@ enum perf_event_type {
          *      { u64                   phys_addr;} && PERF_SAMPLE_PHYS_ADDR
          *      { u64                   size;
          *        char                  data[size]; } && PERF_SAMPLE_AUX
+        *      { u64                   data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
          * };
          */
         PERF_RECORD_SAMPLE                      = 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c

index fb662eb..a796db2 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -51,6 +51,7 @@
  #include <linux/proc_ns.h>
  #include <linux/mount.h>
  #include <linux/min_heap.h>
+#include <linux/highmem.h>
  
  #include "internal.h"
  
@@ -1894,6 +1895,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
         if (sample_type & PERF_SAMPLE_CGROUP)
                 size += sizeof(data->cgroup);
  
+       if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+               size += sizeof(data->data_page_size);
+
         event->header_size = size;
  }
  
@@ -6938,6 +6942,9 @@ void perf_output_sample(struct perf_output_handle *handle,
         if (sample_type & PERF_SAMPLE_CGROUP)
                 perf_output_put(handle, data->cgroup);
  
+       if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+               perf_output_put(handle, data->data_page_size);
+
         if (sample_type & PERF_SAMPLE_AUX) {
                 perf_output_put(handle, data->aux_size);
  
@@ -6995,6 +7002,94 @@ static u64 perf_virt_to_phys(u64 virt)
         return phys_addr;
  }
  
+#ifdef CONFIG_MMU
+
+/*
+ * Return the MMU page size of a given virtual address
+ */
+static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr)
+{
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       pgd = pgd_offset(mm, addr);
+       if (pgd_none(*pgd))
+               return 0;
+
+       p4d = p4d_offset(pgd, addr);
+       if (!p4d_present(*p4d))
+               return 0;
+
+       if (p4d_leaf(*p4d))
+               return 1ULL << P4D_SHIFT;
+
+       pud = pud_offset(p4d, addr);
+       if (!pud_present(*pud))
+               return 0;
+
+       if (pud_leaf(*pud))
+               return 1ULL << PUD_SHIFT;
+
+       pmd = pmd_offset(pud, addr);
+       if (!pmd_present(*pmd))
+               return 0;
+
+       if (pmd_leaf(*pmd))
+               return 1ULL << PMD_SHIFT;
+
+       pte = pte_offset_map(pmd, addr);
+       if (!pte_present(*pte)) {
+               pte_unmap(pte);
+               return 0;
+       }
+
+       pte_unmap(pte);
+       return PAGE_SIZE;
+}
+
+#else
+
+static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr)
+{
+       return 0;
+}
+
+#endif
+
+static u64 perf_get_page_size(unsigned long addr)
+{
+       struct mm_struct *mm;
+       unsigned long flags;
+       u64 size;
+
+       if (!addr)
+               return 0;
+
+       /*
+        * Software page-table walkers must disable IRQs,
+        * which prevents any tear down of the page tables.
+        */
+       local_irq_save(flags);
+
+       mm = current->mm;
+       if (!mm) {
+               /*
+                * For kernel threads and the like, use init_mm so that
+                * we can find kernel memory.
+                */
+               mm = &init_mm;
+       }
+
+       size = __perf_get_page_size(mm, addr);
+
+       local_irq_restore(flags);
+
+       return size;
+}
+
  static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
  
  struct perf_callchain_entry *
@@ -7150,6 +7245,14 @@ void perf_prepare_sample(struct perf_event_header *header,
         }
  #endif
  
+       /*
+        * PERF_DATA_PAGE_SIZE requires PERF_SAMPLE_ADDR. If the user doesn't
+        * require PERF_SAMPLE_ADDR, kernel implicitly retrieve the data->addr,
+        * but the value will not dump to the userspace.
+        */
+       if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+               data->data_page_size = perf_get_page_size(data->addr);
+
         if (sample_type & PERF_SAMPLE_AUX) {
                 u64 size;
author	Kan Liang <kan.liang@linux.intel.com>
	Thu, 1 Oct 2020 13:57:46 +0000 (06:57 -0700)
committer	Peter Zijlstra <peterz@infradead.org>
	Thu, 29 Oct 2020 10:00:38 +0000 (11:00 +0100)
include/linux/perf_event.h		patch \| blob \| history
include/uapi/linux/perf_event.h		patch \| blob \| history
kernel/events/core.c		patch \| blob \| history