perf/aux: Fix AUX buffer serialization
authorPeter Zijlstra <peterz@infradead.org>
Mon, 2 Sep 2024 08:14:24 +0000 (10:14 +0200)
committerIngo Molnar <mingo@kernel.org>
Wed, 4 Sep 2024 16:22:56 +0000 (18:22 +0200)
Ole reported that event->mmap_mutex is strictly insufficient to
serialize the AUX buffer, add a per RB mutex to fully serialize it.

Note that in the lock order comment the perf_event::mmap_mutex order
was already wrong, that is, it nesting under mmap_lock is not new with
this patch.

Fixes: 45bfb2e50471 ("perf: Add AUX area to ring buffer for raw data streams")
Reported-by: Ole <ole@binarygecko.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/events/core.c
kernel/events/internal.h
kernel/events/ring_buffer.c

index c973e3c..8a6c6bb 100644 (file)
@@ -1255,8 +1255,9 @@ static void put_ctx(struct perf_event_context *ctx)
  *       perf_event_context::mutex
  *         perf_event::child_mutex;
  *           perf_event_context::lock
- *         perf_event::mmap_mutex
  *         mmap_lock
+ *           perf_event::mmap_mutex
+ *             perf_buffer::aux_mutex
  *           perf_addr_filters_head::lock
  *
  *    cpu_hotplug_lock
@@ -6373,12 +6374,11 @@ static void perf_mmap_close(struct vm_area_struct *vma)
                event->pmu->event_unmapped(event, vma->vm_mm);
 
        /*
-        * rb->aux_mmap_count will always drop before rb->mmap_count and
-        * event->mmap_count, so it is ok to use event->mmap_mutex to
-        * serialize with perf_mmap here.
+        * The AUX buffer is strictly a sub-buffer, serialize using aux_mutex
+        * to avoid complications.
         */
        if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
-           atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
+           atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &rb->aux_mutex)) {
                /*
                 * Stop all AUX events that are writing to this buffer,
                 * so that we can free its AUX pages and corresponding PMU
@@ -6395,7 +6395,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
                rb_free_aux(rb);
                WARN_ON_ONCE(refcount_read(&rb->aux_refcount));
 
-               mutex_unlock(&event->mmap_mutex);
+               mutex_unlock(&rb->aux_mutex);
        }
 
        if (atomic_dec_and_test(&rb->mmap_count))
@@ -6483,6 +6483,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        struct perf_event *event = file->private_data;
        unsigned long user_locked, user_lock_limit;
        struct user_struct *user = current_user();
+       struct mutex *aux_mutex = NULL;
        struct perf_buffer *rb = NULL;
        unsigned long locked, lock_limit;
        unsigned long vma_size;
@@ -6531,6 +6532,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
                if (!rb)
                        goto aux_unlock;
 
+               aux_mutex = &rb->aux_mutex;
+               mutex_lock(aux_mutex);
+
                aux_offset = READ_ONCE(rb->user_page->aux_offset);
                aux_size = READ_ONCE(rb->user_page->aux_size);
 
@@ -6681,6 +6685,8 @@ unlock:
                atomic_dec(&rb->mmap_count);
        }
 aux_unlock:
+       if (aux_mutex)
+               mutex_unlock(aux_mutex);
        mutex_unlock(&event->mmap_mutex);
 
        /*
index 4515144..e072d99 100644 (file)
@@ -40,6 +40,7 @@ struct perf_buffer {
        struct user_struct              *mmap_user;
 
        /* AUX area */
+       struct mutex                    aux_mutex;
        long                            aux_head;
        unsigned int                    aux_nest;
        long                            aux_wakeup;     /* last aux_watermark boundary crossed by aux_head */
index 8cadf97..4f46f68 100644 (file)
@@ -337,6 +337,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
         */
        if (!rb->nr_pages)
                rb->paused = 1;
+
+       mutex_init(&rb->aux_mutex);
 }
 
 void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)