bpf: Enable IRQ after irq_work_raise() completes in unit_free{_rcu}()
authorHou Tao <houtao1@huawei.com>
Fri, 1 Sep 2023 11:19:53 +0000 (19:19 +0800)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 8 Sep 2023 15:42:19 +0000 (08:42 -0700)
Both unit_free() and unit_free_rcu() invoke irq_work_raise() to free
freed objects back to slab and the invocation may also be preempted by
unit_alloc() and unit_alloc() may return NULL unexpectedly as shown in
the following case:

task A         task B

unit_free()
  // high_watermark = 48
  // free_cnt = 49 after free
  irq_work_raise()
    // mark irq work as IRQ_WORK_PENDING
    irq_work_claim()

               // task B preempts task A
               unit_alloc()
                 // free_cnt = 48 after alloc

               // does unit_alloc() 32-times
       ......
       // free_cnt = 16

       unit_alloc()
         // free_cnt = 15 after alloc
                 // irq work is already PENDING,
                 // so just return
                 irq_work_raise()

       // does unit_alloc() 15-times
               ......
       // free_cnt = 0

               unit_alloc()
                 // free_cnt = 0 before alloc
                 return NULL

Fix it by enabling IRQ after irq_work_raise() completes.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20230901111954.1804721-3-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
kernel/bpf/memalloc.c

index c5d822d..961df89 100644 (file)
@@ -778,11 +778,16 @@ static void notrace unit_free(struct bpf_mem_cache *c, void *ptr)
                llist_add(llnode, &c->free_llist_extra);
        }
        local_dec(&c->active);
-       local_irq_restore(flags);
 
        if (cnt > c->high_watermark)
                /* free few objects from current cpu into global kmalloc pool */
                irq_work_raise(c);
+       /* Enable IRQ after irq_work_raise() completes, otherwise when current
+        * task is preempted by task which does unit_alloc(), unit_alloc() may
+        * return NULL unexpectedly because irq work is already pending but can
+        * not been triggered and free_llist can not be refilled timely.
+        */
+       local_irq_restore(flags);
 }
 
 static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr)
@@ -800,10 +805,10 @@ static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr)
                llist_add(llnode, &c->free_llist_extra_rcu);
        }
        local_dec(&c->active);
-       local_irq_restore(flags);
 
        if (!atomic_read(&c->call_rcu_in_progress))
                irq_work_raise(c);
+       local_irq_restore(flags);
 }
 
 /* Called from BPF program or from sys_bpf syscall.