tracing/user_events: Split up mm alloc and attach
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 19 May 2023 23:07:38 +0000 (16:07 -0700)
committerSteven Rostedt (Google) <rostedt@goodmis.org>
Wed, 24 May 2023 00:58:17 +0000 (20:58 -0400)
When a new mm is being created in a fork() path it currently is
allocated and then attached in one go. This leaves the mm exposed out to
the tracing register callbacks while any parent enabler locations are
copied in. This should not happen.

Split up mm alloc and attach as unique operations. When duplicating
enablers, first alloc, then duplicate, and only upon success, attach.
This prevents any timing window outside of the event_reg mutex for
enablement walking. This allows for dropping RCU requirement for
enablement walking in later patches.

Link: https://lkml.kernel.org/r/20230519230741.669-2-beaub@linux.microsoft.com
Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=whTBvXJuoi_kACo3qi5WZUmRrhyA-_=rRFsycTytmB6qw@mail.gmail.com/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[ change log written by Beau Belgrave ]
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
kernel/trace/trace_events_user.c

index e37c7f1..599aab4 100644 (file)
@@ -539,10 +539,9 @@ static struct user_event_mm *user_event_mm_get_all(struct user_event *user)
        return found;
 }
 
-static struct user_event_mm *user_event_mm_create(struct task_struct *t)
+static struct user_event_mm *user_event_mm_alloc(struct task_struct *t)
 {
        struct user_event_mm *user_mm;
-       unsigned long flags;
 
        user_mm = kzalloc(sizeof(*user_mm), GFP_KERNEL_ACCOUNT);
 
@@ -554,12 +553,6 @@ static struct user_event_mm *user_event_mm_create(struct task_struct *t)
        refcount_set(&user_mm->refcnt, 1);
        refcount_set(&user_mm->tasks, 1);
 
-       spin_lock_irqsave(&user_event_mms_lock, flags);
-       list_add_rcu(&user_mm->link, &user_event_mms);
-       spin_unlock_irqrestore(&user_event_mms_lock, flags);
-
-       t->user_event_mm = user_mm;
-
        /*
         * The lifetime of the memory descriptor can slightly outlast
         * the task lifetime if a ref to the user_event_mm is taken
@@ -573,6 +566,17 @@ static struct user_event_mm *user_event_mm_create(struct task_struct *t)
        return user_mm;
 }
 
+static void user_event_mm_attach(struct user_event_mm *user_mm, struct task_struct *t)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&user_event_mms_lock, flags);
+       list_add_rcu(&user_mm->link, &user_event_mms);
+       spin_unlock_irqrestore(&user_event_mms_lock, flags);
+
+       t->user_event_mm = user_mm;
+}
+
 static struct user_event_mm *current_user_event_mm(void)
 {
        struct user_event_mm *user_mm = current->user_event_mm;
@@ -580,10 +584,12 @@ static struct user_event_mm *current_user_event_mm(void)
        if (user_mm)
                goto inc;
 
-       user_mm = user_event_mm_create(current);
+       user_mm = user_event_mm_alloc(current);
 
        if (!user_mm)
                goto error;
+
+       user_event_mm_attach(user_mm, current);
 inc:
        refcount_inc(&user_mm->refcnt);
 error:
@@ -671,7 +677,7 @@ void user_event_mm_remove(struct task_struct *t)
 
 void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)
 {
-       struct user_event_mm *mm = user_event_mm_create(t);
+       struct user_event_mm *mm = user_event_mm_alloc(t);
        struct user_event_enabler *enabler;
 
        if (!mm)
@@ -685,10 +691,11 @@ void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)
 
        rcu_read_unlock();
 
+       user_event_mm_attach(mm, t);
        return;
 error:
        rcu_read_unlock();
-       user_event_mm_remove(t);
+       user_event_mm_destroy(mm);
 }
 
 static bool current_user_event_enabler_exists(unsigned long uaddr,