Merge branch 'shared-cgroup-storage'

author Alexei Starovoitov <ast@kernel.org>

Fri, 24 Jul 2020 06:01:16 +0000 (23:01 -0700)

committer Alexei Starovoitov <ast@kernel.org>

Sun, 26 Jul 2020 03:16:36 +0000 (20:16 -0700)
author Alexei Starovoitov <ast@kernel.org>
Fri, 24 Jul 2020 06:01:16 +0000 (23:01 -0700)
committer Alexei Starovoitov <ast@kernel.org>
Sun, 26 Jul 2020 03:16:36 +0000 (20:16 -0700)
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst

index 38b4db8..26f4bb3 100644 (file)
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -48,6 +48,15 @@ Program types
     bpf_lsm
  
  
+Map types
+=========
+
+.. toctree::
+   :maxdepth: 1
+
+   map_cgroup_storage
+
+
  Testing and debugging BPF
  =========================
  
diff --git a/Documentation/bpf/map_cgroup_storage.rst b/Documentation/bpf/map_cgroup_storage.rst

new file mode 100644 (file)

index 0000000..cab9543
--- /dev/null
+++ b/Documentation/bpf/map_cgroup_storage.rst
@@ -0,0 +1,169 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2020 Google LLC.
+
+===========================
+BPF_MAP_TYPE_CGROUP_STORAGE
+===========================
+
+The ``BPF_MAP_TYPE_CGROUP_STORAGE`` map type represents a local fix-sized
+storage. It is only available with ``CONFIG_CGROUP_BPF``, and to programs that
+attach to cgroups; the programs are made available by the same Kconfig. The
+storage is identified by the cgroup the program is attached to.
+
+The map provide a local storage at the cgroup that the BPF program is attached
+to. It provides a faster and simpler access than the general purpose hash
+table, which performs a hash table lookups, and requires user to track live
+cgroups on their own.
+
+This document describes the usage and semantics of the
+``BPF_MAP_TYPE_CGROUP_STORAGE`` map type. Some of its behaviors was changed in
+Linux 5.9 and this document will describe the differences.
+
+Usage
+=====
+
+The map uses key of type of either ``__u64 cgroup_inode_id`` or
+``struct bpf_cgroup_storage_key``, declared in ``linux/bpf.h``::
+
+    struct bpf_cgroup_storage_key {
+            __u64 cgroup_inode_id;
+            __u32 attach_type;
+    };
+
+``cgroup_inode_id`` is the inode id of the cgroup directory.
+``attach_type`` is the the program's attach type.
+
+Linux 5.9 added support for type ``__u64 cgroup_inode_id`` as the key type.
+When this key type is used, then all attach types of the particular cgroup and
+map will share the same storage. Otherwise, if the type is
+``struct bpf_cgroup_storage_key``, then programs of different attach types
+be isolated and see different storages.
+
+To access the storage in a program, use ``bpf_get_local_storage``::
+
+    void *bpf_get_local_storage(void *map, u64 flags)
+
+``flags`` is reserved for future use and must be 0.
+
+There is no implicit synchronization. Storages of ``BPF_MAP_TYPE_CGROUP_STORAGE``
+can be accessed by multiple programs across different CPUs, and user should
+take care of synchronization by themselves. The bpf infrastructure provides
+``struct bpf_spin_lock`` to synchronize the storage. See
+``tools/testing/selftests/bpf/progs/test_spin_lock.c``.
+
+Examples
+========
+
+Usage with key type as ``struct bpf_cgroup_storage_key``::
+
+    #include <bpf/bpf.h>
+
+    struct {
+            __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+            __type(key, struct bpf_cgroup_storage_key);
+            __type(value, __u32);
+    } cgroup_storage SEC(".maps");
+
+    int program(struct __sk_buff *skb)
+    {
+            __u32 *ptr = bpf_get_local_storage(&cgroup_storage, 0);
+            __sync_fetch_and_add(ptr, 1);
+
+            return 0;
+    }
+
+Userspace accessing map declared above::
+
+    #include <linux/bpf.h>
+    #include <linux/libbpf.h>
+
+    __u32 map_lookup(struct bpf_map *map, __u64 cgrp, enum bpf_attach_type type)
+    {
+            struct bpf_cgroup_storage_key = {
+                    .cgroup_inode_id = cgrp,
+                    .attach_type = type,
+            };
+            __u32 value;
+            bpf_map_lookup_elem(bpf_map__fd(map), &key, &value);
+            // error checking omitted
+            return value;
+    }
+
+Alternatively, using just ``__u64 cgroup_inode_id`` as key type::
+
+    #include <bpf/bpf.h>
+
+    struct {
+            __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+            __type(key, __u64);
+            __type(value, __u32);
+    } cgroup_storage SEC(".maps");
+
+    int program(struct __sk_buff *skb)
+    {
+            __u32 *ptr = bpf_get_local_storage(&cgroup_storage, 0);
+            __sync_fetch_and_add(ptr, 1);
+
+            return 0;
+    }
+
+And userspace::
+
+    #include <linux/bpf.h>
+    #include <linux/libbpf.h>
+
+    __u32 map_lookup(struct bpf_map *map, __u64 cgrp, enum bpf_attach_type type)
+    {
+            __u32 value;
+            bpf_map_lookup_elem(bpf_map__fd(map), &cgrp, &value);
+            // error checking omitted
+            return value;
+    }
+
+Semantics
+=========
+
+``BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE`` is a variant of this map type. This
+per-CPU variant will have different memory regions for each CPU for each
+storage. The non-per-CPU will have the same memory region for each storage.
+
+Prior to Linux 5.9, the lifetime of a storage is precisely per-attachment, and
+for a single ``CGROUP_STORAGE`` map, there can be at most one program loaded
+that uses the map. A program may be attached to multiple cgroups or have
+multiple attach types, and each attach creates a fresh zeroed storage. The
+storage is freed upon detach.
+
+There is a one-to-one association between the map of each type (per-CPU and
+non-per-CPU) and the BPF program during load verification time. As a result,
+each map can only be used by one BPF program and each BPF program can only use
+one storage map of each type. Because of map can only be used by one BPF
+program, sharing of this cgroup's storage with other BPF programs were
+impossible.
+
+Since Linux 5.9, storage can be shared by multiple programs. When a program is
+attached to a cgroup, the kernel would create a new storage only if the map
+does not already contain an entry for the cgroup and attach type pair, or else
+the old storage is reused for the new attachment. If the map is attach type
+shared, then attach type is simply ignored during comparison. Storage is freed
+only when either the map or the cgroup attached to is being freed. Detaching
+will not directly free the storage, but it may cause the reference to the map
+to reach zero and indirectly freeing all storage in the map.
+
+The map is not associated with any BPF program, thus making sharing possible.
+However, the BPF program can still only associate with one map of each type
+(per-CPU and non-per-CPU). A BPF program cannot use more than one
+``BPF_MAP_TYPE_CGROUP_STORAGE`` or more than one
+``BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE``.
+
+In all versions, userspace may use the the attach parameters of cgroup and
+attach type pair in ``struct bpf_cgroup_storage_key`` as the key to the BPF map
+APIs to read or update the storage for a given attachment. For Linux 5.9
+attach type shared storages, only the first value in the struct, cgroup inode
+id, is used during comparison, so userspace may just specify a ``__u64``
+directly.
+
+The storage is bound at attach time. Even if the program is attached to parent
+and triggers in child, the storage still belongs to the parent.
+
+Userspace cannot create a new entry in the map or delete an existing entry.
+Program test runs always use a temporary storage.
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h

index 2c6f266..64f3670 100644 (file)
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -46,7 +46,8 @@ struct bpf_cgroup_storage {
         };
         struct bpf_cgroup_storage_map *map;
         struct bpf_cgroup_storage_key key;
-       struct list_head list;
+       struct list_head list_map;
+       struct list_head list_cg;
         struct rb_node node;
         struct rcu_head rcu;
  };
@@ -78,6 +79,9 @@ struct cgroup_bpf {
         struct list_head progs[MAX_BPF_ATTACH_TYPE];
         u32 flags[MAX_BPF_ATTACH_TYPE];
  
+       /* list of cgroup shared storages */
+       struct list_head storages;
+
         /* temp storage for effective prog array used by prog_attach/detach */
         struct bpf_prog_array *inactive;
  
@@ -161,6 +165,9 @@ static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
                 this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
  }
  
+struct bpf_cgroup_storage *
+cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
+                     void *key, bool locked);
  struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
                                         enum bpf_cgroup_storage_type stype);
  void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
@@ -169,7 +176,6 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
                              enum bpf_attach_type type);
  void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
  int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map);
-void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *map);
  
  int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
  int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
@@ -383,8 +389,6 @@ static inline void bpf_cgroup_storage_set(
         struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
  static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
                                             struct bpf_map *map) { return 0; }
-static inline void bpf_cgroup_storage_release(struct bpf_prog_aux *aux,
-                                             struct bpf_map *map) {}
  static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
         struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; }
  static inline void bpf_cgroup_storage_free(
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c

index ac53102..957cce1 100644 (file)
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -37,17 +37,34 @@ static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])
  }
  
  static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[],
-                                    struct bpf_prog *prog)
+                                    struct bpf_cgroup_storage *new_storages[],
+                                    enum bpf_attach_type type,
+                                    struct bpf_prog *prog,
+                                    struct cgroup *cgrp)
  {
         enum bpf_cgroup_storage_type stype;
+       struct bpf_cgroup_storage_key key;
+       struct bpf_map *map;
+
+       key.cgroup_inode_id = cgroup_id(cgrp);
+       key.attach_type = type;
  
         for_each_cgroup_storage_type(stype) {
+               map = prog->aux->cgroup_storage[stype];
+               if (!map)
+                       continue;
+
+               storages[stype] = cgroup_storage_lookup((void *)map, &key, false);
+               if (storages[stype])
+                       continue;
+
                 storages[stype] = bpf_cgroup_storage_alloc(prog, stype);
                 if (IS_ERR(storages[stype])) {
-                       storages[stype] = NULL;
-                       bpf_cgroup_storages_free(storages);
+                       bpf_cgroup_storages_free(new_storages);
                         return -ENOMEM;
                 }
+
+               new_storages[stype] = storages[stype];
         }
  
         return 0;
@@ -63,7 +80,7 @@ static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[],
  }
  
  static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
-                                    struct cgroup* cgrp,
+                                    struct cgroup *cgrp,
                                      enum bpf_attach_type attach_type)
  {
         enum bpf_cgroup_storage_type stype;
@@ -72,14 +89,6 @@ static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
                 bpf_cgroup_storage_link(storages[stype], cgrp, attach_type);
  }
  
-static void bpf_cgroup_storages_unlink(struct bpf_cgroup_storage *storages[])
-{
-       enum bpf_cgroup_storage_type stype;
-
-       for_each_cgroup_storage_type(stype)
-               bpf_cgroup_storage_unlink(storages[stype]);
-}
-
  /* Called when bpf_cgroup_link is auto-detached from dying cgroup.
   * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It
   * doesn't free link memory, which will eventually be done by bpf_link's
@@ -101,22 +110,23 @@ static void cgroup_bpf_release(struct work_struct *work)
         struct cgroup *p, *cgrp = container_of(work, struct cgroup,
                                                bpf.release_work);
         struct bpf_prog_array *old_array;
+       struct list_head *storages = &cgrp->bpf.storages;
+       struct bpf_cgroup_storage *storage, *stmp;
+
         unsigned int type;
  
         mutex_lock(&cgroup_mutex);
  
         for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
                 struct list_head *progs = &cgrp->bpf.progs[type];
-               struct bpf_prog_list *pl, *tmp;
+               struct bpf_prog_list *pl, *pltmp;
  
-               list_for_each_entry_safe(pl, tmp, progs, node) {
+               list_for_each_entry_safe(pl, pltmp, progs, node) {
                         list_del(&pl->node);
                         if (pl->prog)
                                 bpf_prog_put(pl->prog);
                         if (pl->link)
                                 bpf_cgroup_link_auto_detach(pl->link);
-                       bpf_cgroup_storages_unlink(pl->storage);
-                       bpf_cgroup_storages_free(pl->storage);
                         kfree(pl);
                         static_branch_dec(&cgroup_bpf_enabled_key);
                 }
@@ -126,6 +136,11 @@ static void cgroup_bpf_release(struct work_struct *work)
                 bpf_prog_array_free(old_array);
         }
  
+       list_for_each_entry_safe(storage, stmp, storages, list_cg) {
+               bpf_cgroup_storage_unlink(storage);
+               bpf_cgroup_storage_free(storage);
+       }
+
         mutex_unlock(&cgroup_mutex);
  
         for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
@@ -290,6 +305,8 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
         for (i = 0; i < NR; i++)
                 INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
  
+       INIT_LIST_HEAD(&cgrp->bpf.storages);
+
         for (i = 0; i < NR; i++)
                 if (compute_effective_progs(cgrp, i, &arrays[i]))
                         goto cleanup;
@@ -422,7 +439,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
         struct list_head *progs = &cgrp->bpf.progs[type];
         struct bpf_prog *old_prog = NULL;
         struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
-       struct bpf_cgroup_storage *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+       struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
         struct bpf_prog_list *pl;
         int err;
  
@@ -455,17 +472,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
         if (IS_ERR(pl))
                 return PTR_ERR(pl);
  
-       if (bpf_cgroup_storages_alloc(storage, prog ? : link->link.prog))
+       if (bpf_cgroup_storages_alloc(storage, new_storage, type,
+                                     prog ? : link->link.prog, cgrp))
                 return -ENOMEM;
  
         if (pl) {
                 old_prog = pl->prog;
-               bpf_cgroup_storages_unlink(pl->storage);
-               bpf_cgroup_storages_assign(old_storage, pl->storage);
         } else {
                 pl = kmalloc(sizeof(*pl), GFP_KERNEL);
                 if (!pl) {
-                       bpf_cgroup_storages_free(storage);
+                       bpf_cgroup_storages_free(new_storage);
                         return -ENOMEM;
                 }
                 list_add_tail(&pl->node, progs);
@@ -480,12 +496,11 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
         if (err)
                 goto cleanup;
  
-       bpf_cgroup_storages_free(old_storage);
         if (old_prog)
                 bpf_prog_put(old_prog);
         else
                 static_branch_inc(&cgroup_bpf_enabled_key);
-       bpf_cgroup_storages_link(pl->storage, cgrp, type);
+       bpf_cgroup_storages_link(new_storage, cgrp, type);
         return 0;
  
  cleanup:
@@ -493,9 +508,7 @@ cleanup:
                 pl->prog = old_prog;
                 pl->link = NULL;
         }
-       bpf_cgroup_storages_free(pl->storage);
-       bpf_cgroup_storages_assign(pl->storage, old_storage);
-       bpf_cgroup_storages_link(pl->storage, cgrp, type);
+       bpf_cgroup_storages_free(new_storage);
         if (!old_prog) {
                 list_del(&pl->node);
                 kfree(pl);
@@ -679,8 +692,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
  
         /* now can actually delete it from this cgroup list */
         list_del(&pl->node);
-       bpf_cgroup_storages_unlink(pl->storage);
-       bpf_cgroup_storages_free(pl->storage);
         kfree(pl);
         if (list_empty(progs))
                 /* last program was detached, reset flags to zero */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index 7be02e5..bde9334 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2097,24 +2097,12 @@ int bpf_prog_array_copy_info(struct bpf_prog_array *array,
                                                                      : 0;
  }
  
-static void bpf_free_cgroup_storage(struct bpf_prog_aux *aux)
-{
-       enum bpf_cgroup_storage_type stype;
-
-       for_each_cgroup_storage_type(stype) {
-               if (!aux->cgroup_storage[stype])
-                       continue;
-               bpf_cgroup_storage_release(aux, aux->cgroup_storage[stype]);
-       }
-}
-
  void __bpf_free_used_maps(struct bpf_prog_aux *aux,
                           struct bpf_map **used_maps, u32 len)
  {
         struct bpf_map *map;
         u32 i;
  
-       bpf_free_cgroup_storage(aux);
         for (i = 0; i < len; i++) {
                 map = used_maps[i];
                 if (map->ops->map_poke_untrack)
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c

index 51bd5a8..3b2c701 100644 (file)
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -9,6 +9,8 @@
  #include <linux/slab.h>
  #include <uapi/linux/btf.h>
  
+#include "../cgroup/cgroup-internal.h"
+
  DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
  
  #ifdef CONFIG_CGROUP_BPF
@@ -20,7 +22,6 @@ struct bpf_cgroup_storage_map {
         struct bpf_map map;
  
         spinlock_t lock;
-       struct bpf_prog_aux *aux;
         struct rb_root root;
         struct list_head list;
  };
@@ -30,24 +31,41 @@ static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
         return container_of(map, struct bpf_cgroup_storage_map, map);
  }
  
-static int bpf_cgroup_storage_key_cmp(
-       const struct bpf_cgroup_storage_key *key1,
-       const struct bpf_cgroup_storage_key *key2)
+static bool attach_type_isolated(const struct bpf_map *map)
  {
-       if (key1->cgroup_inode_id < key2->cgroup_inode_id)
-               return -1;
-       else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
-               return 1;
-       else if (key1->attach_type < key2->attach_type)
-               return -1;
-       else if (key1->attach_type > key2->attach_type)
-               return 1;
+       return map->key_size == sizeof(struct bpf_cgroup_storage_key);
+}
+
+static int bpf_cgroup_storage_key_cmp(const struct bpf_cgroup_storage_map *map,
+                                     const void *_key1, const void *_key2)
+{
+       if (attach_type_isolated(&map->map)) {
+               const struct bpf_cgroup_storage_key *key1 = _key1;
+               const struct bpf_cgroup_storage_key *key2 = _key2;
+
+               if (key1->cgroup_inode_id < key2->cgroup_inode_id)
+                       return -1;
+               else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
+                       return 1;
+               else if (key1->attach_type < key2->attach_type)
+                       return -1;
+               else if (key1->attach_type > key2->attach_type)
+                       return 1;
+       } else {
+               const __u64 *cgroup_inode_id1 = _key1;
+               const __u64 *cgroup_inode_id2 = _key2;
+
+               if (*cgroup_inode_id1 < *cgroup_inode_id2)
+                       return -1;
+               else if (*cgroup_inode_id1 > *cgroup_inode_id2)
+                       return 1;
+       }
         return 0;
  }
  
-static struct bpf_cgroup_storage *cgroup_storage_lookup(
-       struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
-       bool locked)
+struct bpf_cgroup_storage *
+cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
+                     void *key, bool locked)
  {
         struct rb_root *root = &map->root;
         struct rb_node *node;
@@ -61,7 +79,7 @@ static struct bpf_cgroup_storage *cgroup_storage_lookup(
  
                 storage = container_of(node, struct bpf_cgroup_storage, node);
  
-               switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
+               switch (bpf_cgroup_storage_key_cmp(map, key, &storage->key)) {
                 case -1:
                         node = node->rb_left;
                         break;
@@ -93,7 +111,7 @@ static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
                 this = container_of(*new, struct bpf_cgroup_storage, node);
  
                 parent = *new;
-               switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
+               switch (bpf_cgroup_storage_key_cmp(map, &storage->key, &this->key)) {
                 case -1:
                         new = &((*new)->rb_left);
                         break;
@@ -111,10 +129,9 @@ static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
         return 0;
  }
  
-static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
+static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *key)
  {
         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
-       struct bpf_cgroup_storage_key *key = _key;
         struct bpf_cgroup_storage *storage;
  
         storage = cgroup_storage_lookup(map, key, false);
@@ -124,17 +141,13 @@ static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
         return &READ_ONCE(storage->buf)->data[0];
  }
  
-static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
+static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
                                       void *value, u64 flags)
  {
-       struct bpf_cgroup_storage_key *key = _key;
         struct bpf_cgroup_storage *storage;
         struct bpf_storage_buffer *new;
  
-       if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST)))
-               return -EINVAL;
-
-       if (unlikely(flags & BPF_NOEXIST))
+       if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST)))
                 return -EINVAL;
  
         if (unlikely((flags & BPF_F_LOCK) &&
@@ -167,11 +180,10 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
         return 0;
  }
  
-int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
+int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *key,
                                    void *value)
  {
         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
-       struct bpf_cgroup_storage_key *key = _key;
         struct bpf_cgroup_storage *storage;
         int cpu, off = 0;
         u32 size;
@@ -197,11 +209,10 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
         return 0;
  }
  
-int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
+int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key,
                                      void *value, u64 map_flags)
  {
         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
-       struct bpf_cgroup_storage_key *key = _key;
         struct bpf_cgroup_storage *storage;
         int cpu, off = 0;
         u32 size;
@@ -232,12 +243,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
         return 0;
  }
  
-static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
+static int cgroup_storage_get_next_key(struct bpf_map *_map, void *key,
                                        void *_next_key)
  {
         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
-       struct bpf_cgroup_storage_key *key = _key;
-       struct bpf_cgroup_storage_key *next = _next_key;
         struct bpf_cgroup_storage *storage;
  
         spin_lock_bh(&map->lock);
@@ -250,17 +259,23 @@ static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
                 if (!storage)
                         goto enoent;
  
-               storage = list_next_entry(storage, list);
+               storage = list_next_entry(storage, list_map);
                 if (!storage)
                         goto enoent;
         } else {
                 storage = list_first_entry(&map->list,
-                                        struct bpf_cgroup_storage, list);
+                                        struct bpf_cgroup_storage, list_map);
         }
  
         spin_unlock_bh(&map->lock);
-       next->attach_type = storage->key.attach_type;
-       next->cgroup_inode_id = storage->key.cgroup_inode_id;
+
+       if (attach_type_isolated(&map->map)) {
+               struct bpf_cgroup_storage_key *next = _next_key;
+               *next = storage->key;
+       } else {
+               __u64 *next = _next_key;
+               *next = storage->key.cgroup_inode_id;
+       }
         return 0;
  
  enoent:
@@ -275,7 +290,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
         struct bpf_map_memory mem;
         int ret;
  
-       if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
+       if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) &&
+           attr->key_size != sizeof(__u64))
                 return ERR_PTR(-EINVAL);
  
         if (attr->value_size == 0)
@@ -318,6 +334,17 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
  static void cgroup_storage_map_free(struct bpf_map *_map)
  {
         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+       struct list_head *storages = &map->list;
+       struct bpf_cgroup_storage *storage, *stmp;
+
+       mutex_lock(&cgroup_mutex);
+
+       list_for_each_entry_safe(storage, stmp, storages, list_map) {
+               bpf_cgroup_storage_unlink(storage);
+               bpf_cgroup_storage_free(storage);
+       }
+
+       mutex_unlock(&cgroup_mutex);
  
         WARN_ON(!RB_EMPTY_ROOT(&map->root));
         WARN_ON(!list_empty(&map->list));
@@ -335,49 +362,63 @@ static int cgroup_storage_check_btf(const struct bpf_map *map,
                                     const struct btf_type *key_type,
                                     const struct btf_type *value_type)
  {
-       struct btf_member *m;
-       u32 offset, size;
-
-       /* Key is expected to be of struct bpf_cgroup_storage_key type,
-        * which is:
-        * struct bpf_cgroup_storage_key {
-        *      __u64   cgroup_inode_id;
-        *      __u32   attach_type;
-        * };
-        */
+       if (attach_type_isolated(map)) {
+               struct btf_member *m;
+               u32 offset, size;
+
+               /* Key is expected to be of struct bpf_cgroup_storage_key type,
+                * which is:
+                * struct bpf_cgroup_storage_key {
+                *      __u64   cgroup_inode_id;
+                *      __u32   attach_type;
+                * };
+                */
+
+               /*
+                * Key_type must be a structure with two fields.
+                */
+               if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ||
+                   BTF_INFO_VLEN(key_type->info) != 2)
+                       return -EINVAL;
+
+               /*
+                * The first field must be a 64 bit integer at 0 offset.
+                */
+               m = (struct btf_member *)(key_type + 1);
+               size = sizeof_field(struct bpf_cgroup_storage_key, cgroup_inode_id);
+               if (!btf_member_is_reg_int(btf, key_type, m, 0, size))
+                       return -EINVAL;
+
+               /*
+                * The second field must be a 32 bit integer at 64 bit offset.
+                */
+               m++;
+               offset = offsetof(struct bpf_cgroup_storage_key, attach_type);
+               size = sizeof_field(struct bpf_cgroup_storage_key, attach_type);
+               if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
+                       return -EINVAL;
+       } else {
+               u32 int_data;
  
-       /*
-        * Key_type must be a structure with two fields.
-        */
-       if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ||
-           BTF_INFO_VLEN(key_type->info) != 2)
-               return -EINVAL;
+               /*
+                * Key is expected to be u64, which stores the cgroup_inode_id
+                */
  
-       /*
-        * The first field must be a 64 bit integer at 0 offset.
-        */
-       m = (struct btf_member *)(key_type + 1);
-       size = sizeof_field(struct bpf_cgroup_storage_key, cgroup_inode_id);
-       if (!btf_member_is_reg_int(btf, key_type, m, 0, size))
-               return -EINVAL;
+               if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
+                       return -EINVAL;
  
-       /*
-        * The second field must be a 32 bit integer at 64 bit offset.
-        */
-       m++;
-       offset = offsetof(struct bpf_cgroup_storage_key, attach_type);
-       size = sizeof_field(struct bpf_cgroup_storage_key, attach_type);
-       if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
-               return -EINVAL;
+               int_data = *(u32 *)(key_type + 1);
+               if (BTF_INT_BITS(int_data) != 64 || BTF_INT_OFFSET(int_data))
+                       return -EINVAL;
+       }
  
         return 0;
  }
  
-static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *_key,
+static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
                                          struct seq_file *m)
  {
         enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
-       struct bpf_cgroup_storage_key *key = _key;
         struct bpf_cgroup_storage *storage;
         int cpu;
  
@@ -426,38 +467,13 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
  int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map)
  {
         enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
-       struct bpf_cgroup_storage_map *map = map_to_storage(_map);
-       int ret = -EBUSY;
-
-       spin_lock_bh(&map->lock);
  
-       if (map->aux && map->aux != aux)
-               goto unlock;
         if (aux->cgroup_storage[stype] &&
             aux->cgroup_storage[stype] != _map)
-               goto unlock;
+               return -EBUSY;
  
-       map->aux = aux;
         aux->cgroup_storage[stype] = _map;
-       ret = 0;
-unlock:
-       spin_unlock_bh(&map->lock);
-
-       return ret;
-}
-
-void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *_map)
-{
-       enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
-       struct bpf_cgroup_storage_map *map = map_to_storage(_map);
-
-       spin_lock_bh(&map->lock);
-       if (map->aux == aux) {
-               WARN_ON(aux->cgroup_storage[stype] != _map);
-               map->aux = NULL;
-               aux->cgroup_storage[stype] = NULL;
-       }
-       spin_unlock_bh(&map->lock);
+       return 0;
  }
  
  static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
@@ -578,7 +594,8 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
  
         spin_lock_bh(&map->lock);
         WARN_ON(cgroup_storage_insert(map, storage));
-       list_add(&storage->list, &map->list);
+       list_add(&storage->list_map, &map->list);
+       list_add(&storage->list_cg, &cgroup->bpf.storages);
         spin_unlock_bh(&map->lock);
  }
  
@@ -596,7 +613,8 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
         root = &map->root;
         rb_erase(&storage->node, root);
  
-       list_del(&storage->list);
+       list_del(&storage->list_map);
+       list_del(&storage->list_cg);
         spin_unlock_bh(&map->lock);
  }
  
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c

new file mode 100644 (file)

index 0000000..c67d8c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+#include "cg_storage_multi_egress_only.skel.h"
+#include "cg_storage_multi_isolated.skel.h"
+#include "cg_storage_multi_shared.skel.h"
+
+#define PARENT_CGROUP "/cgroup_storage"
+#define CHILD_CGROUP "/cgroup_storage/child"
+
+static int duration;
+
+static bool assert_storage(struct bpf_map *map, const void *key,
+                          struct cgroup_value *expected)
+{
+       struct cgroup_value value;
+       int map_fd;
+
+       map_fd = bpf_map__fd(map);
+
+       if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) < 0,
+                 "map-lookup", "errno %d", errno))
+               return true;
+       if (CHECK(memcmp(&value, expected, sizeof(struct cgroup_value)),
+                 "assert-storage", "storages differ"))
+               return true;
+
+       return false;
+}
+
+static bool assert_storage_noexist(struct bpf_map *map, const void *key)
+{
+       struct cgroup_value value;
+       int map_fd;
+
+       map_fd = bpf_map__fd(map);
+
+       if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) == 0,
+                 "map-lookup", "succeeded, expected ENOENT"))
+               return true;
+       if (CHECK(errno != ENOENT,
+                 "map-lookup", "errno %d, expected ENOENT", errno))
+               return true;
+
+       return false;
+}
+
+static bool connect_send(const char *cgroup_path)
+{
+       bool res = true;
+       int server_fd = -1, client_fd = -1;
+
+       if (join_cgroup(cgroup_path))
+               goto out_clean;
+
+       server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+       if (server_fd < 0)
+               goto out_clean;
+
+       client_fd = connect_to_fd(server_fd, 0);
+       if (client_fd < 0)
+               goto out_clean;
+
+       if (send(client_fd, "message", strlen("message"), 0) < 0)
+               goto out_clean;
+
+       res = false;
+
+out_clean:
+       close(client_fd);
+       close(server_fd);
+       return res;
+}
+
+static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
+{
+       struct cg_storage_multi_egress_only *obj;
+       struct cgroup_value expected_cgroup_value;
+       struct bpf_cgroup_storage_key key;
+       struct bpf_link *parent_link = NULL, *child_link = NULL;
+       bool err;
+
+       key.attach_type = BPF_CGROUP_INET_EGRESS;
+
+       obj = cg_storage_multi_egress_only__open_and_load();
+       if (CHECK(!obj, "skel-load", "errno %d", errno))
+               return;
+
+       /* Attach to parent cgroup, trigger packet from child.
+        * Assert that there is only one run and in that run the storage is
+        * parent cgroup's storage.
+        * Also assert that child cgroup's storage does not exist
+        */
+       parent_link = bpf_program__attach_cgroup(obj->progs.egress,
+                                                parent_cgroup_fd);
+       if (CHECK(IS_ERR(parent_link), "parent-cg-attach",
+                 "err %ld", PTR_ERR(parent_link)))
+               goto close_bpf_object;
+       err = connect_send(CHILD_CGROUP);
+       if (CHECK(err, "first-connect-send", "errno %d", errno))
+               goto close_bpf_object;
+       if (CHECK(obj->bss->invocations != 1,
+                 "first-invoke", "invocations=%d", obj->bss->invocations))
+               goto close_bpf_object;
+       key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+       expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
+       if (assert_storage(obj->maps.cgroup_storage,
+                          &key, &expected_cgroup_value))
+               goto close_bpf_object;
+       key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+       if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+               goto close_bpf_object;
+
+       /* Attach to parent and child cgroup, trigger packet from child.
+        * Assert that there are two additional runs, one that run with parent
+        * cgroup's storage and one with child cgroup's storage.
+        */
+       child_link = bpf_program__attach_cgroup(obj->progs.egress,
+                                               child_cgroup_fd);
+       if (CHECK(IS_ERR(child_link), "child-cg-attach",
+                 "err %ld", PTR_ERR(child_link)))
+               goto close_bpf_object;
+       err = connect_send(CHILD_CGROUP);
+       if (CHECK(err, "second-connect-send", "errno %d", errno))
+               goto close_bpf_object;
+       if (CHECK(obj->bss->invocations != 3,
+                 "second-invoke", "invocations=%d", obj->bss->invocations))
+               goto close_bpf_object;
+       key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+       expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+       if (assert_storage(obj->maps.cgroup_storage,
+                          &key, &expected_cgroup_value))
+               goto close_bpf_object;
+       key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+       expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
+       if (assert_storage(obj->maps.cgroup_storage,
+                          &key, &expected_cgroup_value))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(parent_link);
+       bpf_link__destroy(child_link);
+
+       cg_storage_multi_egress_only__destroy(obj);
+}
+
+static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
+{
+       struct cg_storage_multi_isolated *obj;
+       struct cgroup_value expected_cgroup_value;
+       struct bpf_cgroup_storage_key key;
+       struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL;
+       struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL;
+       struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL;
+       bool err;
+
+       obj = cg_storage_multi_isolated__open_and_load();
+       if (CHECK(!obj, "skel-load", "errno %d", errno))
+               return;
+
+       /* Attach to parent cgroup, trigger packet from child.
+        * Assert that there is three runs, two with parent cgroup egress and
+        * one with parent cgroup ingress, stored in separate parent storages.
+        * Also assert that child cgroup's storages does not exist
+        */
+       parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+                                                        parent_cgroup_fd);
+       if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
+                 "err %ld", PTR_ERR(parent_egress1_link)))
+               goto close_bpf_object;
+       parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+                                                        parent_cgroup_fd);
+       if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
+                 "err %ld", PTR_ERR(parent_egress2_link)))
+               goto close_bpf_object;
+       parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+                                                        parent_cgroup_fd);
+       if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
+                 "err %ld", PTR_ERR(parent_ingress_link)))
+               goto close_bpf_object;
+       err = connect_send(CHILD_CGROUP);
+       if (CHECK(err, "first-connect-send", "errno %d", errno))
+               goto close_bpf_object;
+       if (CHECK(obj->bss->invocations != 3,
+                 "first-invoke", "invocations=%d", obj->bss->invocations))
+               goto close_bpf_object;
+       key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+       key.attach_type = BPF_CGROUP_INET_EGRESS;
+       expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+       if (assert_storage(obj->maps.cgroup_storage,
+                          &key, &expected_cgroup_value))
+               goto close_bpf_object;
+       key.attach_type = BPF_CGROUP_INET_INGRESS;
+       expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 };
+       if (assert_storage(obj->maps.cgroup_storage,
+                          &key, &expected_cgroup_value))
+               goto close_bpf_object;
+       key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+       key.attach_type = BPF_CGROUP_INET_EGRESS;
+       if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+               goto close_bpf_object;
+       key.attach_type = BPF_CGROUP_INET_INGRESS;
+       if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+               goto close_bpf_object;
+
+       /* Attach to parent and child cgroup, trigger packet from child.
+        * Assert that there is six additional runs, parent cgroup egresses and
+        * ingress, child cgroup egresses and ingress.
+        * Assert that egree and ingress storages are separate.
+        */
+       child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+                                                       child_cgroup_fd);
+       if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
+                 "err %ld", PTR_ERR(child_egress1_link)))
+               goto close_bpf_object;
+       child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+                                                       child_cgroup_fd);
+       if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
+                 "err %ld", PTR_ERR(child_egress2_link)))
+               goto close_bpf_object;
+       child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+                                                       child_cgroup_fd);
+       if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
+                 "err %ld", PTR_ERR(child_ingress_link)))
+               goto close_bpf_object;
+       err = connect_send(CHILD_CGROUP);
+       if (CHECK(err, "second-connect-send", "errno %d", errno))
+               goto close_bpf_object;
+       if (CHECK(obj->bss->invocations != 9,
+                 "second-invoke", "invocations=%d", obj->bss->invocations))
+               goto close_bpf_object;
+       key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+       key.attach_type = BPF_CGROUP_INET_EGRESS;
+       expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 4 };
+       if (assert_storage(obj->maps.cgroup_storage,
+                          &key, &expected_cgroup_value))
+               goto close_bpf_object;
+       key.attach_type = BPF_CGROUP_INET_INGRESS;
+       expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 2 };
+       if (assert_storage(obj->maps.cgroup_storage,
+                          &key, &expected_cgroup_value))
+               goto close_bpf_object;
+       key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+       key.attach_type = BPF_CGROUP_INET_EGRESS;
+       expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+       if (assert_storage(obj->maps.cgroup_storage,
+                          &key, &expected_cgroup_value))
+               goto close_bpf_object;
+       key.attach_type = BPF_CGROUP_INET_INGRESS;
+       expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 };
+       if (assert_storage(obj->maps.cgroup_storage,
+                          &key, &expected_cgroup_value))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(parent_egress1_link);
+       bpf_link__destroy(parent_egress2_link);
+       bpf_link__destroy(parent_ingress_link);
+       bpf_link__destroy(child_egress1_link);
+       bpf_link__destroy(child_egress2_link);
+       bpf_link__destroy(child_ingress_link);
+
+       cg_storage_multi_isolated__destroy(obj);
+}
+
+static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
+{
+       struct cg_storage_multi_shared *obj;
+       struct cgroup_value expected_cgroup_value;
+       __u64 key;
+       struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL;
+       struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL;
+       struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL;
+       bool err;
+
+       obj = cg_storage_multi_shared__open_and_load();
+       if (CHECK(!obj, "skel-load", "errno %d", errno))
+               return;
+
+       /* Attach to parent cgroup, trigger packet from child.
+        * Assert that there is three runs, two with parent cgroup egress and
+        * one with parent cgroup ingress.
+        * Also assert that child cgroup's storage does not exist
+        */
+       parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+                                                        parent_cgroup_fd);
+       if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
+                 "err %ld", PTR_ERR(parent_egress1_link)))
+               goto close_bpf_object;
+       parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+                                                        parent_cgroup_fd);
+       if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
+                 "err %ld", PTR_ERR(parent_egress2_link)))
+               goto close_bpf_object;
+       parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+                                                        parent_cgroup_fd);
+       if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
+                 "err %ld", PTR_ERR(parent_ingress_link)))
+               goto close_bpf_object;
+       err = connect_send(CHILD_CGROUP);
+       if (CHECK(err, "first-connect-send", "errno %d", errno))
+               goto close_bpf_object;
+       if (CHECK(obj->bss->invocations != 3,
+                 "first-invoke", "invocations=%d", obj->bss->invocations))
+               goto close_bpf_object;
+       key = get_cgroup_id(PARENT_CGROUP);
+       expected_cgroup_value = (struct cgroup_value) {
+               .egress_pkts = 2,
+               .ingress_pkts = 1,
+       };
+       if (assert_storage(obj->maps.cgroup_storage,
+                          &key, &expected_cgroup_value))
+               goto close_bpf_object;
+       key = get_cgroup_id(CHILD_CGROUP);
+       if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+               goto close_bpf_object;
+
+       /* Attach to parent and child cgroup, trigger packet from child.
+        * Assert that there is six additional runs, parent cgroup egresses and
+        * ingress, child cgroup egresses and ingress.
+        */
+       child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+                                                       child_cgroup_fd);
+       if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
+                 "err %ld", PTR_ERR(child_egress1_link)))
+               goto close_bpf_object;
+       child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+                                                       child_cgroup_fd);
+       if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
+                 "err %ld", PTR_ERR(child_egress2_link)))
+               goto close_bpf_object;
+       child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+                                                       child_cgroup_fd);
+       if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
+                 "err %ld", PTR_ERR(child_ingress_link)))
+               goto close_bpf_object;
+       err = connect_send(CHILD_CGROUP);
+       if (CHECK(err, "second-connect-send", "errno %d", errno))
+               goto close_bpf_object;
+       if (CHECK(obj->bss->invocations != 9,
+                 "second-invoke", "invocations=%d", obj->bss->invocations))
+               goto close_bpf_object;
+       key = get_cgroup_id(PARENT_CGROUP);
+       expected_cgroup_value = (struct cgroup_value) {
+               .egress_pkts = 4,
+               .ingress_pkts = 2,
+       };
+       if (assert_storage(obj->maps.cgroup_storage,
+                          &key, &expected_cgroup_value))
+               goto close_bpf_object;
+       key = get_cgroup_id(CHILD_CGROUP);
+       expected_cgroup_value = (struct cgroup_value) {
+               .egress_pkts = 2,
+               .ingress_pkts = 1,
+       };
+       if (assert_storage(obj->maps.cgroup_storage,
+                          &key, &expected_cgroup_value))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(parent_egress1_link);
+       bpf_link__destroy(parent_egress2_link);
+       bpf_link__destroy(parent_ingress_link);
+       bpf_link__destroy(child_egress1_link);
+       bpf_link__destroy(child_egress2_link);
+       bpf_link__destroy(child_ingress_link);
+
+       cg_storage_multi_shared__destroy(obj);
+}
+
+void test_cg_storage_multi(void)
+{
+       int parent_cgroup_fd = -1, child_cgroup_fd = -1;
+
+       parent_cgroup_fd = test__join_cgroup(PARENT_CGROUP);
+       if (CHECK(parent_cgroup_fd < 0, "cg-create-parent", "errno %d", errno))
+               goto close_cgroup_fd;
+       child_cgroup_fd = create_and_get_cgroup(CHILD_CGROUP);
+       if (CHECK(child_cgroup_fd < 0, "cg-create-child", "errno %d", errno))
+               goto close_cgroup_fd;
+
+       if (test__start_subtest("egress_only"))
+               test_egress_only(parent_cgroup_fd, child_cgroup_fd);
+
+       if (test__start_subtest("isolated"))
+               test_isolated(parent_cgroup_fd, child_cgroup_fd);
+
+       if (test__start_subtest("shared"))
+               test_shared(parent_cgroup_fd, child_cgroup_fd);
+
+close_cgroup_fd:
+       close(child_cgroup_fd);
+       close(parent_cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi.h b/tools/testing/selftests/bpf/progs/cg_storage_multi.h

new file mode 100644 (file)

index 0000000..a0778fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __PROGS_CG_STORAGE_MULTI_H
+#define __PROGS_CG_STORAGE_MULTI_H
+
+#include <asm/types.h>
+
+struct cgroup_value {
+       __u32 egress_pkts;
+       __u32 ingress_pkts;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c

new file mode 100644 (file)

index 0000000..44ad46b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+       __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+       __type(key, struct bpf_cgroup_storage_key);
+       __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress")
+int egress(struct __sk_buff *skb)
+{
+       struct cgroup_value *ptr_cg_storage =
+               bpf_get_local_storage(&cgroup_storage, 0);
+
+       __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c

new file mode 100644 (file)

index 0000000..a253730
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+       __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+       __type(key, struct bpf_cgroup_storage_key);
+       __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress/1")
+int egress1(struct __sk_buff *skb)
+{
+       struct cgroup_value *ptr_cg_storage =
+               bpf_get_local_storage(&cgroup_storage, 0);
+
+       __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
+
+SEC("cgroup_skb/egress/2")
+int egress2(struct __sk_buff *skb)
+{
+       struct cgroup_value *ptr_cg_storage =
+               bpf_get_local_storage(&cgroup_storage, 0);
+
+       __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+       struct cgroup_value *ptr_cg_storage =
+               bpf_get_local_storage(&cgroup_storage, 0);
+
+       __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c

new file mode 100644 (file)

index 0000000..a149f33
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+       __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+       __type(key, __u64);
+       __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress/1")
+int egress1(struct __sk_buff *skb)
+{
+       struct cgroup_value *ptr_cg_storage =
+               bpf_get_local_storage(&cgroup_storage, 0);
+
+       __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
+
+SEC("cgroup_skb/egress/2")
+int egress2(struct __sk_buff *skb)
+{
+       struct cgroup_value *ptr_cg_storage =
+               bpf_get_local_storage(&cgroup_storage, 0);
+
+       __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+       struct cgroup_value *ptr_cg_storage =
+               bpf_get_local_storage(&cgroup_storage, 0);
+
+       __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
author	Alexei Starovoitov <ast@kernel.org>
	Fri, 24 Jul 2020 06:01:16 +0000 (23:01 -0700)
committer	Alexei Starovoitov <ast@kernel.org>
	Sun, 26 Jul 2020 03:16:36 +0000 (20:16 -0700)
Documentation/bpf/index.rst		patch \| blob \| history
Documentation/bpf/map_cgroup_storage.rst	[new file with mode: 0644]	patch \| blob
include/linux/bpf-cgroup.h		patch \| blob \| history
kernel/bpf/cgroup.c		patch \| blob \| history
kernel/bpf/core.c		patch \| blob \| history
kernel/bpf/local_storage.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/cg_storage_multi.h	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c	[new file with mode: 0644]	patch \| blob