bpf: Add bpf_loop helper
authorJoanne Koong <joannekoong@fb.com>
Tue, 30 Nov 2021 03:06:19 +0000 (19:06 -0800)
committerAlexei Starovoitov <ast@kernel.org>
Tue, 30 Nov 2021 18:56:28 +0000 (10:56 -0800)
This patch adds the kernel-side and API changes for a new helper
function, bpf_loop:

long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
u64 flags);

where long (*callback_fn)(u32 index, void *ctx);

bpf_loop invokes the "callback_fn" **nr_loops** times or until the
callback_fn returns 1. The callback_fn can only return 0 or 1, and
this is enforced by the verifier. The callback_fn index is zero-indexed.

A few things to please note:
~ The "u64 flags" parameter is currently unused but is included in
case a future use case for it arises.
~ In the kernel-side implementation of bpf_loop (kernel/bpf/bpf_iter.c),
bpf_callback_t is used as the callback function cast.
~ A program can have nested bpf_loop calls but the program must
still adhere to the verifier constraint of its stack depth (the stack depth
cannot exceed MAX_BPF_STACK))
~ Recursive callback_fns do not pass the verifier, due to the call stack
for these being too deep.
~ The next patch will include the tests and benchmark

Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211130030622.4131246-2-joannekoong@fb.com
include/linux/bpf.h
include/uapi/linux/bpf.h
kernel/bpf/bpf_iter.c
kernel/bpf/helpers.c
kernel/bpf/verifier.c
tools/include/uapi/linux/bpf.h

index cc7a0c3..cad0829 100644 (file)
@@ -2164,6 +2164,7 @@ extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
 extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
 extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
+extern const struct bpf_func_proto bpf_loop_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
index a69e4b0..211b43a 100644 (file)
@@ -4957,6 +4957,30 @@ union bpf_attr {
  *             **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
  *             **-EBUSY** if failed to try lock mmap_lock.
  *             **-EINVAL** for invalid **flags**.
+ *
+ * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags)
+ *     Description
+ *             For **nr_loops**, call **callback_fn** function
+ *             with **callback_ctx** as the context parameter.
+ *             The **callback_fn** should be a static function and
+ *             the **callback_ctx** should be a pointer to the stack.
+ *             The **flags** is used to control certain aspects of the helper.
+ *             Currently, the **flags** must be 0. Currently, nr_loops is
+ *             limited to 1 << 23 (~8 million) loops.
+ *
+ *             long (\*callback_fn)(u32 index, void \*ctx);
+ *
+ *             where **index** is the current index in the loop. The index
+ *             is zero-indexed.
+ *
+ *             If **callback_fn** returns 0, the helper will continue to the next
+ *             loop. If return value is 1, the helper will skip the rest of
+ *             the loops and return. Other return values are not used now,
+ *             and will be rejected by the verifier.
+ *
+ *     Return
+ *             The number of loops performed, **-EINVAL** for invalid **flags**,
+ *             **-E2BIG** if **nr_loops** exceeds the maximum number of loops.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -5140,6 +5164,7 @@ union bpf_attr {
        FN(skc_to_unix_sock),           \
        FN(kallsyms_lookup_name),       \
        FN(find_vma),                   \
+       FN(loop),                       \
        /* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
index b2ee450..b7aef5b 100644 (file)
@@ -714,3 +714,38 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = {
        .arg3_type      = ARG_PTR_TO_STACK_OR_NULL,
        .arg4_type      = ARG_ANYTHING,
 };
+
+/* maximum number of loops */
+#define MAX_LOOPS      BIT(23)
+
+BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
+          u64, flags)
+{
+       bpf_callback_t callback = (bpf_callback_t)callback_fn;
+       u64 ret;
+       u32 i;
+
+       if (flags)
+               return -EINVAL;
+       if (nr_loops > MAX_LOOPS)
+               return -E2BIG;
+
+       for (i = 0; i < nr_loops; i++) {
+               ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0);
+               /* return value: 0 - continue, 1 - stop and return */
+               if (ret)
+                       return i + 1;
+       }
+
+       return i;
+}
+
+const struct bpf_func_proto bpf_loop_proto = {
+       .func           = bpf_loop,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+       .arg2_type      = ARG_PTR_TO_FUNC,
+       .arg3_type      = ARG_PTR_TO_STACK_OR_NULL,
+       .arg4_type      = ARG_ANYTHING,
+};
index 1ffd469..5218800 100644 (file)
@@ -1378,6 +1378,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
                return &bpf_ringbuf_query_proto;
        case BPF_FUNC_for_each_map_elem:
                return &bpf_for_each_map_elem_proto;
+       case BPF_FUNC_loop:
+               return &bpf_loop_proto;
        default:
                break;
        }
index 0763cca..d7678d8 100644 (file)
@@ -6085,6 +6085,27 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
        return 0;
 }
 
+static int set_loop_callback_state(struct bpf_verifier_env *env,
+                                  struct bpf_func_state *caller,
+                                  struct bpf_func_state *callee,
+                                  int insn_idx)
+{
+       /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
+        *          u64 flags);
+        * callback_fn(u32 index, void *callback_ctx);
+        */
+       callee->regs[BPF_REG_1].type = SCALAR_VALUE;
+       callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
+
+       /* unused */
+       __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+       __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+       __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+
+       callee->in_callback_fn = true;
+       return 0;
+}
+
 static int set_timer_callback_state(struct bpf_verifier_env *env,
                                    struct bpf_func_state *caller,
                                    struct bpf_func_state *callee,
@@ -6458,13 +6479,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                        return err;
        }
 
-       if (func_id == BPF_FUNC_tail_call) {
-               err = check_reference_leak(env);
-               if (err) {
-                       verbose(env, "tail_call would lead to reference leak\n");
-                       return err;
-               }
-       } else if (is_release_function(func_id)) {
+       if (is_release_function(func_id)) {
                err = release_reference(env, meta.ref_obj_id);
                if (err) {
                        verbose(env, "func %s#%d reference has not been acquired before\n",
@@ -6475,42 +6490,47 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 
        regs = cur_regs(env);
 
-       /* check that flags argument in get_local_storage(map, flags) is 0,
-        * this is required because get_local_storage() can't return an error.
-        */
-       if (func_id == BPF_FUNC_get_local_storage &&
-           !register_is_null(&regs[BPF_REG_2])) {
-               verbose(env, "get_local_storage() doesn't support non-zero flags\n");
-               return -EINVAL;
-       }
-
-       if (func_id == BPF_FUNC_for_each_map_elem) {
+       switch (func_id) {
+       case BPF_FUNC_tail_call:
+               err = check_reference_leak(env);
+               if (err) {
+                       verbose(env, "tail_call would lead to reference leak\n");
+                       return err;
+               }
+               break;
+       case BPF_FUNC_get_local_storage:
+               /* check that flags argument in get_local_storage(map, flags) is 0,
+                * this is required because get_local_storage() can't return an error.
+                */
+               if (!register_is_null(&regs[BPF_REG_2])) {
+                       verbose(env, "get_local_storage() doesn't support non-zero flags\n");
+                       return -EINVAL;
+               }
+               break;
+       case BPF_FUNC_for_each_map_elem:
                err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
                                        set_map_elem_callback_state);
-               if (err < 0)
-                       return -EINVAL;
-       }
-
-       if (func_id == BPF_FUNC_timer_set_callback) {
+               break;
+       case BPF_FUNC_timer_set_callback:
                err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
                                        set_timer_callback_state);
-               if (err < 0)
-                       return -EINVAL;
-       }
-
-       if (func_id == BPF_FUNC_find_vma) {
+               break;
+       case BPF_FUNC_find_vma:
                err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
                                        set_find_vma_callback_state);
-               if (err < 0)
-                       return -EINVAL;
-       }
-
-       if (func_id == BPF_FUNC_snprintf) {
+               break;
+       case BPF_FUNC_snprintf:
                err = check_bpf_snprintf_call(env, regs);
-               if (err < 0)
-                       return err;
+               break;
+       case BPF_FUNC_loop:
+               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+                                       set_loop_callback_state);
+               break;
        }
 
+       if (err)
+               return err;
+
        /* reset caller saved regs */
        for (i = 0; i < CALLER_SAVED_REGS; i++) {
                mark_reg_not_init(env, regs, caller_saved[i]);
index a69e4b0..211b43a 100644 (file)
@@ -4957,6 +4957,30 @@ union bpf_attr {
  *             **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
  *             **-EBUSY** if failed to try lock mmap_lock.
  *             **-EINVAL** for invalid **flags**.
+ *
+ * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags)
+ *     Description
+ *             For **nr_loops**, call **callback_fn** function
+ *             with **callback_ctx** as the context parameter.
+ *             The **callback_fn** should be a static function and
+ *             the **callback_ctx** should be a pointer to the stack.
+ *             The **flags** is used to control certain aspects of the helper.
+ *             Currently, the **flags** must be 0. Currently, nr_loops is
+ *             limited to 1 << 23 (~8 million) loops.
+ *
+ *             long (\*callback_fn)(u32 index, void \*ctx);
+ *
+ *             where **index** is the current index in the loop. The index
+ *             is zero-indexed.
+ *
+ *             If **callback_fn** returns 0, the helper will continue to the next
+ *             loop. If return value is 1, the helper will skip the rest of
+ *             the loops and return. Other return values are not used now,
+ *             and will be rejected by the verifier.
+ *
+ *     Return
+ *             The number of loops performed, **-EINVAL** for invalid **flags**,
+ *             **-E2BIG** if **nr_loops** exceeds the maximum number of loops.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -5140,6 +5164,7 @@ union bpf_attr {
        FN(skc_to_unix_sock),           \
        FN(kallsyms_lookup_name),       \
        FN(find_vma),                   \
+       FN(loop),                       \
        /* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper