bpf: Allow per unit prefill for non-fix-size percpu memory allocator

author Yonghong Song <yonghong.song@linux.dev>

Fri, 22 Dec 2023 03:17:45 +0000 (19:17 -0800)

committer Alexei Starovoitov <ast@kernel.org>

Thu, 4 Jan 2024 05:08:25 +0000 (21:08 -0800)
author Yonghong Song <yonghong.song@linux.dev>
Fri, 22 Dec 2023 03:17:45 +0000 (19:17 -0800)
committer Alexei Starovoitov <ast@kernel.org>
Thu, 4 Jan 2024 05:08:25 +0000 (21:08 -0800)
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h

index acef8c8..aaf004d 100644 (file)
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -22,8 +22,15 @@ struct bpf_mem_alloc {
   * 'size = 0' is for bpf_mem_alloc which manages many fixed-size objects.
   * Alloc and free are done with bpf_mem_{alloc,free}() and the size of
   * the returned object is given by the size argument of bpf_mem_alloc().
+ * If percpu equals true, error will be returned in order to avoid
+ * large memory consumption and the below bpf_mem_alloc_percpu_unit_init()
+ * should be used to do on-demand per-cpu allocation for each size.
   */
  int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu);
+/* Initialize a non-fix-size percpu memory allocator */
+int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg);
+/* The percpu allocation with a specific unit size. */
+int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
  void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
  
  /* kmalloc/kfree equivalent: */
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c

index 4a21050..f71da07 100644 (file)
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -121,6 +121,8 @@ struct bpf_mem_caches {
         struct bpf_mem_cache cache[NUM_CACHES];
  };
  
+static const u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
+
  static struct llist_node notrace *__llist_del_first(struct llist_head *head)
  {
         struct llist_node *entry, *next;
@@ -499,12 +501,14 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
   */
  int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
  {
-       static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
         struct bpf_mem_caches *cc, __percpu *pcc;
         struct bpf_mem_cache *c, __percpu *pc;
         struct obj_cgroup *objcg = NULL;
         int cpu, i, unit_size, percpu_size = 0;
  
+       if (percpu && size == 0)
+               return -EINVAL;
+
         /* room for llist_node and per-cpu pointer */
         if (percpu)
                 percpu_size = LLIST_NODE_SZ + sizeof(void *);
@@ -524,6 +528,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
                         objcg = get_obj_cgroup_from_current();
  #endif
                 ma->objcg = objcg;
+
                 for_each_possible_cpu(cpu) {
                         c = per_cpu_ptr(pc, cpu);
                         c->unit_size = unit_size;
@@ -562,6 +567,56 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
         return 0;
  }
  
+int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg)
+{
+       struct bpf_mem_caches __percpu *pcc;
+
+       pcc = __alloc_percpu_gfp(sizeof(struct bpf_mem_caches), 8, GFP_KERNEL);
+       if (!pcc)
+               return -ENOMEM;
+
+       ma->caches = pcc;
+       ma->objcg = objcg;
+       ma->percpu = true;
+       return 0;
+}
+
+int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size)
+{
+       struct bpf_mem_caches *cc, __percpu *pcc;
+       int cpu, i, unit_size, percpu_size;
+       struct obj_cgroup *objcg;
+       struct bpf_mem_cache *c;
+
+       i = bpf_mem_cache_idx(size);
+       if (i < 0)
+               return -EINVAL;
+
+       /* room for llist_node and per-cpu pointer */
+       percpu_size = LLIST_NODE_SZ + sizeof(void *);
+
+       unit_size = sizes[i];
+       objcg = ma->objcg;
+       pcc = ma->caches;
+
+       for_each_possible_cpu(cpu) {
+               cc = per_cpu_ptr(pcc, cpu);
+               c = &cc->cache[i];
+               if (cpu == 0 && c->unit_size)
+                       break;
+
+               c->unit_size = unit_size;
+               c->objcg = objcg;
+               c->percpu_size = percpu_size;
+               c->tgt = c;
+
+               init_refill_work(c);
+               prefill_mem_cache(c, cpu);
+       }
+
+       return 0;
+}
+
  static void drain_mem_cache(struct bpf_mem_cache *c)
  {
         bool percpu = !!c->percpu_size;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index d4e31f6..e9699a2 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -12139,20 +12139,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                                 if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
                                         return -ENOMEM;
  
-                               if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
-                                       if (!bpf_global_percpu_ma_set) {
-                                               mutex_lock(&bpf_percpu_ma_lock);
-                                               if (!bpf_global_percpu_ma_set) {
-                                                       err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
-                                                       if (!err)
-                                                               bpf_global_percpu_ma_set = true;
-                                               }
-                                               mutex_unlock(&bpf_percpu_ma_lock);
-                                               if (err)
-                                                       return err;
-                                       }
-                               }
-
                                 if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
                                         verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
                                         return -EINVAL;
@@ -12173,6 +12159,29 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                                         return -EINVAL;
                                 }
  
+                               if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+                                       if (!bpf_global_percpu_ma_set) {
+                                               mutex_lock(&bpf_percpu_ma_lock);
+                                               if (!bpf_global_percpu_ma_set) {
+                                                       /* Charge memory allocated with bpf_global_percpu_ma to
+                                                        * root memcg. The obj_cgroup for root memcg is NULL.
+                                                        */
+                                                       err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
+                                                       if (!err)
+                                                               bpf_global_percpu_ma_set = true;
+                                               }
+                                               mutex_unlock(&bpf_percpu_ma_lock);
+                                               if (err)
+                                                       return err;
+                                       }
+
+                                       mutex_lock(&bpf_percpu_ma_lock);
+                                       err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
+                                       mutex_unlock(&bpf_percpu_ma_lock);
+                                       if (err)
+                                               return err;
+                               }
+
                                 struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
                                 if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
                                         if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
author	Yonghong Song <yonghong.song@linux.dev>
	Fri, 22 Dec 2023 03:17:45 +0000 (19:17 -0800)
committer	Alexei Starovoitov <ast@kernel.org>
	Thu, 4 Jan 2024 05:08:25 +0000 (21:08 -0800)
include/linux/bpf_mem_alloc.h		patch \| blob \| history
kernel/bpf/memalloc.c		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history