perf/core: Add PERF_SAMPLE_WEIGHT_STRUCT
[linux-2.6-microblaze.git] / kernel / events / core.c
index dc568ca..5206097 100644 (file)
@@ -51,6 +51,8 @@
 #include <linux/proc_ns.h>
 #include <linux/mount.h>
 #include <linux/min_heap.h>
+#include <linux/highmem.h>
+#include <linux/pgtable.h>
 
 #include "internal.h"
 
@@ -1325,7 +1327,7 @@ static void put_ctx(struct perf_event_context *ctx)
  * function.
  *
  * Lock order:
- *    exec_update_mutex
+ *    exec_update_lock
  *     task_struct::perf_event_mutex
  *       perf_event_context::mutex
  *         perf_event::child_mutex;
@@ -1877,8 +1879,8 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
        if (sample_type & PERF_SAMPLE_PERIOD)
                size += sizeof(data->period);
 
-       if (sample_type & PERF_SAMPLE_WEIGHT)
-               size += sizeof(data->weight);
+       if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
+               size += sizeof(data->weight.full);
 
        if (sample_type & PERF_SAMPLE_READ)
                size += event->read_size;
@@ -1895,6 +1897,12 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
        if (sample_type & PERF_SAMPLE_CGROUP)
                size += sizeof(data->cgroup);
 
+       if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+               size += sizeof(data->data_page_size);
+
+       if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+               size += sizeof(data->code_page_size);
+
        event->header_size = size;
 }
 
@@ -6899,8 +6907,8 @@ void perf_output_sample(struct perf_output_handle *handle,
                                          data->regs_user.regs);
        }
 
-       if (sample_type & PERF_SAMPLE_WEIGHT)
-               perf_output_put(handle, data->weight);
+       if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
+               perf_output_put(handle, data->weight.full);
 
        if (sample_type & PERF_SAMPLE_DATA_SRC)
                perf_output_put(handle, data->data_src.val);
@@ -6931,6 +6939,12 @@ void perf_output_sample(struct perf_output_handle *handle,
        if (sample_type & PERF_SAMPLE_CGROUP)
                perf_output_put(handle, data->cgroup);
 
+       if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+               perf_output_put(handle, data->data_page_size);
+
+       if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+               perf_output_put(handle, data->code_page_size);
+
        if (sample_type & PERF_SAMPLE_AUX) {
                perf_output_put(handle, data->aux_size);
 
@@ -6988,6 +7002,93 @@ static u64 perf_virt_to_phys(u64 virt)
        return phys_addr;
 }
 
+/*
+ * Return the pagetable size of a given virtual address.
+ */
+static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
+{
+       u64 size = 0;
+
+#ifdef CONFIG_HAVE_FAST_GUP
+       pgd_t *pgdp, pgd;
+       p4d_t *p4dp, p4d;
+       pud_t *pudp, pud;
+       pmd_t *pmdp, pmd;
+       pte_t *ptep, pte;
+
+       pgdp = pgd_offset(mm, addr);
+       pgd = READ_ONCE(*pgdp);
+       if (pgd_none(pgd))
+               return 0;
+
+       if (pgd_leaf(pgd))
+               return pgd_leaf_size(pgd);
+
+       p4dp = p4d_offset_lockless(pgdp, pgd, addr);
+       p4d = READ_ONCE(*p4dp);
+       if (!p4d_present(p4d))
+               return 0;
+
+       if (p4d_leaf(p4d))
+               return p4d_leaf_size(p4d);
+
+       pudp = pud_offset_lockless(p4dp, p4d, addr);
+       pud = READ_ONCE(*pudp);
+       if (!pud_present(pud))
+               return 0;
+
+       if (pud_leaf(pud))
+               return pud_leaf_size(pud);
+
+       pmdp = pmd_offset_lockless(pudp, pud, addr);
+       pmd = READ_ONCE(*pmdp);
+       if (!pmd_present(pmd))
+               return 0;
+
+       if (pmd_leaf(pmd))
+               return pmd_leaf_size(pmd);
+
+       ptep = pte_offset_map(&pmd, addr);
+       pte = ptep_get_lockless(ptep);
+       if (pte_present(pte))
+               size = pte_leaf_size(pte);
+       pte_unmap(ptep);
+#endif /* CONFIG_HAVE_FAST_GUP */
+
+       return size;
+}
+
+static u64 perf_get_page_size(unsigned long addr)
+{
+       struct mm_struct *mm;
+       unsigned long flags;
+       u64 size;
+
+       if (!addr)
+               return 0;
+
+       /*
+        * Software page-table walkers must disable IRQs,
+        * which prevents any tear down of the page tables.
+        */
+       local_irq_save(flags);
+
+       mm = current->mm;
+       if (!mm) {
+               /*
+                * For kernel threads and the like, use init_mm so that
+                * we can find kernel memory.
+                */
+               mm = &init_mm;
+       }
+
+       size = perf_get_pgtable_size(mm, addr);
+
+       local_irq_restore(flags);
+
+       return size;
+}
+
 static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
 
 struct perf_callchain_entry *
@@ -7023,7 +7124,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 
        __perf_event_header__init_id(header, data, event);
 
-       if (sample_type & PERF_SAMPLE_IP)
+       if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
                data->ip = perf_instruction_pointer(regs);
 
        if (sample_type & PERF_SAMPLE_CALLCHAIN) {
@@ -7142,6 +7243,17 @@ void perf_prepare_sample(struct perf_event_header *header,
        }
 #endif
 
+       /*
+        * PERF_DATA_PAGE_SIZE requires PERF_SAMPLE_ADDR. If the user doesn't
+        * require PERF_SAMPLE_ADDR, kernel implicitly retrieve the data->addr,
+        * but the value will not dump to the userspace.
+        */
+       if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+               data->data_page_size = perf_get_page_size(data->addr);
+
+       if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+               data->code_page_size = perf_get_page_size(data->ip);
+
        if (sample_type & PERF_SAMPLE_AUX) {
                u64 size;
 
@@ -11452,6 +11564,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
        if (attr->sample_type & PERF_SAMPLE_CGROUP)
                return -EINVAL;
 #endif
+       if ((attr->sample_type & PERF_SAMPLE_WEIGHT) &&
+           (attr->sample_type & PERF_SAMPLE_WEIGHT_STRUCT))
+               return -EINVAL;
 
 out:
        return ret;
@@ -11720,24 +11835,6 @@ SYSCALL_DEFINE5(perf_event_open,
                goto err_task;
        }
 
-       if (task) {
-               err = mutex_lock_interruptible(&task->signal->exec_update_mutex);
-               if (err)
-                       goto err_task;
-
-               /*
-                * Preserve ptrace permission check for backwards compatibility.
-                *
-                * We must hold exec_update_mutex across this and any potential
-                * perf_install_in_context() call for this new event to
-                * serialize against exec() altering our credentials (and the
-                * perf_event_exit_task() that could imply).
-                */
-               err = -EACCES;
-               if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
-                       goto err_cred;
-       }
-
        if (flags & PERF_FLAG_PID_CGROUP)
                cgroup_fd = pid;
 
@@ -11745,7 +11842,7 @@ SYSCALL_DEFINE5(perf_event_open,
                                 NULL, NULL, cgroup_fd);
        if (IS_ERR(event)) {
                err = PTR_ERR(event);
-               goto err_cred;
+               goto err_task;
        }
 
        if (is_sampling_event(event)) {
@@ -11864,6 +11961,24 @@ SYSCALL_DEFINE5(perf_event_open,
                goto err_context;
        }
 
+       if (task) {
+               err = down_read_interruptible(&task->signal->exec_update_lock);
+               if (err)
+                       goto err_file;
+
+               /*
+                * Preserve ptrace permission check for backwards compatibility.
+                *
+                * We must hold exec_update_lock across this and any potential
+                * perf_install_in_context() call for this new event to
+                * serialize against exec() altering our credentials (and the
+                * perf_event_exit_task() that could imply).
+                */
+               err = -EACCES;
+               if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
+                       goto err_cred;
+       }
+
        if (move_group) {
                gctx = __perf_event_ctx_lock_double(group_leader, ctx);
 
@@ -12017,7 +12132,7 @@ SYSCALL_DEFINE5(perf_event_open,
        mutex_unlock(&ctx->mutex);
 
        if (task) {
-               mutex_unlock(&task->signal->exec_update_mutex);
+               up_read(&task->signal->exec_update_lock);
                put_task_struct(task);
        }
 
@@ -12039,7 +12154,10 @@ err_locked:
        if (move_group)
                perf_event_ctx_unlock(group_leader, gctx);
        mutex_unlock(&ctx->mutex);
-/* err_file: */
+err_cred:
+       if (task)
+               up_read(&task->signal->exec_update_lock);
+err_file:
        fput(event_file);
 err_context:
        perf_unpin_context(ctx);
@@ -12051,9 +12169,6 @@ err_alloc:
         */
        if (!event_file)
                free_event(event);
-err_cred:
-       if (task)
-               mutex_unlock(&task->signal->exec_update_mutex);
 err_task:
        if (task)
                put_task_struct(task);
@@ -12358,7 +12473,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 /*
  * When a child task exits, feed back event values to parent events.
  *
- * Can be called with exec_update_mutex held when called from
+ * Can be called with exec_update_lock held when called from
  * setup_new_exec().
  */
 void perf_event_exit_task(struct task_struct *child)