bpf: Allow refcounted bpf_rb_node used in bpf_rbtree_{remove,left,right}
authorMartin KaFai Lau <martin.lau@kernel.org>
Tue, 6 May 2025 01:58:51 +0000 (18:58 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Tue, 6 May 2025 17:21:05 +0000 (10:21 -0700)
The bpf_rbtree_{remove,left,right} requires the root's lock to be held.
They also check the node_internal->owner is still owned by that root
before proceeding, so it is safe to allow refcounted bpf_rb_node
pointer to be used in these kfuncs.

In a bpf fq implementation which is much closer to the kernel fq,
https://lore.kernel.org/bpf/20250418224652.105998-13-martin.lau@linux.dev/,
a networking flow (allocated by bpf_obj_new) can be added to two different
rbtrees. There are cases that the flow is searched from one rbtree,
held the refcount of the flow, and then removed from another rbtree:

struct fq_flow {
struct bpf_rb_node fq_node;
struct bpf_rb_node rate_node;
struct bpf_refcount refcount;
unsigned long sk_long;
};

int bpf_fq_enqueue(...)
{
/* ... */

bpf_spin_lock(&root->lock);
while (can_loop) {
/* ... */
if (!p)
break;
gc_f = bpf_rb_entry(p, struct fq_flow, fq_node);
if (gc_f->sk_long == sk_long) {
f = bpf_refcount_acquire(gc_f);
break;
}
/* ... */
}
bpf_spin_unlock(&root->lock);

if (f) {
bpf_spin_lock(&q->lock);
bpf_rbtree_remove(&q->delayed, &f->rate_node);
bpf_spin_unlock(&q->lock);
}
}

bpf_rbtree_{left,right} do not need this change but are relaxed together
with bpf_rbtree_remove instead of adding extra verifier logic
to exclude these kfuncs.

To avoid bi-sect failure, this patch also changes the selftests together.

The "rbtree_api_remove_unadded_node" is not expecting verifier's error.
The test now expects bpf_rbtree_remove(&groot, &m->node) to return NULL.
The test uses __retval(0) to ensure this NULL return value.

Some of the "only take non-owning..." failure messages are changed also.

Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20250506015857.817950-5-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
kernel/bpf/verifier.c
tools/testing/selftests/bpf/progs/rbtree_fail.c

index 51a17e6..9093a35 100644 (file)
@@ -13229,8 +13229,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
                                        return -EINVAL;
                                }
                        } else {
-                               if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
-                                       verbose(env, "%s node input must be non-owning ref\n", func_name);
+                               if (!type_is_non_owning_ref(reg->type) && !reg->ref_obj_id) {
+                                       verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name);
                                        return -EINVAL;
                                }
                                if (in_rbtree_lock_required_cb(env)) {
index dbd5eee..4acb6af 100644 (file)
@@ -69,11 +69,11 @@ long rbtree_api_nolock_first(void *ctx)
 }
 
 SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__retval(0)
 long rbtree_api_remove_unadded_node(void *ctx)
 {
        struct node_data *n, *m;
-       struct bpf_rb_node *res;
+       struct bpf_rb_node *res_n, *res_m;
 
        n = bpf_obj_new(typeof(*n));
        if (!n)
@@ -88,19 +88,20 @@ long rbtree_api_remove_unadded_node(void *ctx)
        bpf_spin_lock(&glock);
        bpf_rbtree_add(&groot, &n->node, less);
 
-       /* This remove should pass verifier */
-       res = bpf_rbtree_remove(&groot, &n->node);
-       n = container_of(res, struct node_data, node);
+       res_n = bpf_rbtree_remove(&groot, &n->node);
 
-       /* This remove shouldn't, m isn't in an rbtree */
-       res = bpf_rbtree_remove(&groot, &m->node);
-       m = container_of(res, struct node_data, node);
+       res_m = bpf_rbtree_remove(&groot, &m->node);
        bpf_spin_unlock(&glock);
 
-       if (n)
-               bpf_obj_drop(n);
-       if (m)
-               bpf_obj_drop(m);
+       bpf_obj_drop(m);
+       if (res_n)
+               bpf_obj_drop(container_of(res_n, struct node_data, node));
+       if (res_m) {
+               bpf_obj_drop(container_of(res_m, struct node_data, node));
+               /* m was not added to the rbtree */
+               return 2;
+       }
+
        return 0;
 }
 
@@ -178,7 +179,7 @@ err_out:
 }
 
 SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
 long rbtree_api_add_release_unlock_escape(void *ctx)
 {
        struct node_data *n;
@@ -202,7 +203,7 @@ long rbtree_api_add_release_unlock_escape(void *ctx)
 }
 
 SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
 long rbtree_api_first_release_unlock_escape(void *ctx)
 {
        struct bpf_rb_node *res;