kernel/bpf/local_storage.c

   1 //SPDX-License-Identifier: GPL-2.0
   2 #include <linux/bpf-cgroup.h>
   3 #include <linux/bpf.h>
   4 #include <linux/btf.h>
   5 #include <linux/bug.h>
   6 #include <linux/filter.h>
   7 #include <linux/mm.h>
   8 #include <linux/rbtree.h>
   9 #include <linux/slab.h>
  10 #include <uapi/linux/btf.h>
  11
  12 DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
  13
  14 #ifdef CONFIG_CGROUP_BPF
  15
  16 #define LOCAL_STORAGE_CREATE_FLAG_MASK                                  \
  17         (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
  18
  19 struct bpf_cgroup_storage_map {
  20         struct bpf_map map;
  21
  22         spinlock_t lock;
  23         struct bpf_prog *prog;
  24         struct rb_root root;
  25         struct list_head list;
  26 };
  27
  28 static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
  29 {
  30         return container_of(map, struct bpf_cgroup_storage_map, map);
  31 }
  32
  33 static int bpf_cgroup_storage_key_cmp(
  34         const struct bpf_cgroup_storage_key *key1,
  35         const struct bpf_cgroup_storage_key *key2)
  36 {
  37         if (key1->cgroup_inode_id < key2->cgroup_inode_id)
  38                 return -1;
  39         else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
  40                 return 1;
  41         else if (key1->attach_type < key2->attach_type)
  42                 return -1;
  43         else if (key1->attach_type > key2->attach_type)
  44                 return 1;
  45         return 0;
  46 }
  47
  48 static struct bpf_cgroup_storage *cgroup_storage_lookup(
  49         struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
  50         bool locked)
  51 {
  52         struct rb_root *root = &map->root;
  53         struct rb_node *node;
  54
  55         if (!locked)
  56                 spin_lock_bh(&map->lock);
  57
  58         node = root->rb_node;
  59         while (node) {
  60                 struct bpf_cgroup_storage *storage;
  61
  62                 storage = container_of(node, struct bpf_cgroup_storage, node);
  63
  64                 switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
  65                 case -1:
  66                         node = node->rb_left;
  67                         break;
  68                 case 1:
  69                         node = node->rb_right;
  70                         break;
  71                 default:
  72                         if (!locked)
  73                                 spin_unlock_bh(&map->lock);
  74                         return storage;
  75                 }
  76         }
  77
  78         if (!locked)
  79                 spin_unlock_bh(&map->lock);
  80
  81         return NULL;
  82 }
  83
  84 static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
  85                                  struct bpf_cgroup_storage *storage)
  86 {
  87         struct rb_root *root = &map->root;
  88         struct rb_node **new = &(root->rb_node), *parent = NULL;
  89
  90         while (*new) {
  91                 struct bpf_cgroup_storage *this;
  92
  93                 this = container_of(*new, struct bpf_cgroup_storage, node);
  94
  95                 parent = *new;
  96                 switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
  97                 case -1:
  98                         new = &((*new)->rb_left);
  99                         break;
 100                 case 1:
 101                         new = &((*new)->rb_right);
 102                         break;
 103                 default:
 104                         return -EEXIST;
 105                 }
 106         }
 107
 108         rb_link_node(&storage->node, parent, new);
 109         rb_insert_color(&storage->node, root);
 110
 111         return 0;
 112 }
 113
 114 static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
 115 {
 116         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 117         struct bpf_cgroup_storage_key *key = _key;
 118         struct bpf_cgroup_storage *storage;
 119
 120         storage = cgroup_storage_lookup(map, key, false);
 121         if (!storage)
 122                 return NULL;
 123
 124         return &READ_ONCE(storage->buf)->data[0];
 125 }
 126
 127 static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 128                                       void *value, u64 flags)
 129 {
 130         struct bpf_cgroup_storage_key *key = _key;
 131         struct bpf_cgroup_storage *storage;
 132         struct bpf_storage_buffer *new;
 133
 134         if (flags != BPF_ANY && flags != BPF_EXIST)
 135                 return -EINVAL;
 136
 137         storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
 138                                         key, false);
 139         if (!storage)
 140                 return -ENOENT;
 141
 142         new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
 143                            map->value_size,
 144                            __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
 145                            map->numa_node);
 146         if (!new)
 147                 return -ENOMEM;
 148
 149         memcpy(&new->data[0], value, map->value_size);
 150
 151         new = xchg(&storage->buf, new);
 152         kfree_rcu(new, rcu);
 153
 154         return 0;
 155 }
 156
 157 int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
 158                                    void *value)
 159 {
 160         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 161         struct bpf_cgroup_storage_key *key = _key;
 162         struct bpf_cgroup_storage *storage;
 163         int cpu, off = 0;
 164         u32 size;
 165
 166         rcu_read_lock();
 167         storage = cgroup_storage_lookup(map, key, false);
 168         if (!storage) {
 169                 rcu_read_unlock();
 170                 return -ENOENT;
 171         }
 172
 173         /* per_cpu areas are zero-filled and bpf programs can only
 174          * access 'value_size' of them, so copying rounded areas
 175          * will not leak any kernel data
 176          */
 177         size = round_up(_map->value_size, 8);
 178         for_each_possible_cpu(cpu) {
 179                 bpf_long_memcpy(value + off,
 180                                 per_cpu_ptr(storage->percpu_buf, cpu), size);
 181                 off += size;
 182         }
 183         rcu_read_unlock();
 184         return 0;
 185 }
 186
 187 int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
 188                                      void *value, u64 map_flags)
 189 {
 190         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 191         struct bpf_cgroup_storage_key *key = _key;
 192         struct bpf_cgroup_storage *storage;
 193         int cpu, off = 0;
 194         u32 size;
 195
 196         if (map_flags != BPF_ANY && map_flags != BPF_EXIST)
 197                 return -EINVAL;
 198
 199         rcu_read_lock();
 200         storage = cgroup_storage_lookup(map, key, false);
 201         if (!storage) {
 202                 rcu_read_unlock();
 203                 return -ENOENT;
 204         }
 205
 206         /* the user space will provide round_up(value_size, 8) bytes that
 207          * will be copied into per-cpu area. bpf programs can only access
 208          * value_size of it. During lookup the same extra bytes will be
 209          * returned or zeros which were zero-filled by percpu_alloc,
 210          * so no kernel data leaks possible
 211          */
 212         size = round_up(_map->value_size, 8);
 213         for_each_possible_cpu(cpu) {
 214                 bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),
 215                                 value + off, size);
 216                 off += size;
 217         }
 218         rcu_read_unlock();
 219         return 0;
 220 }
 221
 222 static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
 223                                        void *_next_key)
 224 {
 225         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 226         struct bpf_cgroup_storage_key *key = _key;
 227         struct bpf_cgroup_storage_key *next = _next_key;
 228         struct bpf_cgroup_storage *storage;
 229
 230         spin_lock_bh(&map->lock);
 231
 232         if (list_empty(&map->list))
 233                 goto enoent;
 234
 235         if (key) {
 236                 storage = cgroup_storage_lookup(map, key, true);
 237                 if (!storage)
 238                         goto enoent;
 239
 240                 storage = list_next_entry(storage, list);
 241                 if (!storage)
 242                         goto enoent;
 243         } else {
 244                 storage = list_first_entry(&map->list,
 245                                          struct bpf_cgroup_storage, list);
 246         }
 247
 248         spin_unlock_bh(&map->lock);
 249         next->attach_type = storage->key.attach_type;
 250         next->cgroup_inode_id = storage->key.cgroup_inode_id;
 251         return 0;
 252
 253 enoent:
 254         spin_unlock_bh(&map->lock);
 255         return -ENOENT;
 256 }
 257
 258 static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
 259 {
 260         int numa_node = bpf_map_attr_numa_node(attr);
 261         struct bpf_cgroup_storage_map *map;
 262
 263         if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
 264                 return ERR_PTR(-EINVAL);
 265
 266         if (attr->value_size == 0)
 267                 return ERR_PTR(-EINVAL);
 268
 269         if (attr->value_size > PAGE_SIZE)
 270                 return ERR_PTR(-E2BIG);
 271
 272         if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
 273                 /* reserved bits should not be used */
 274                 return ERR_PTR(-EINVAL);
 275
 276         if (attr->max_entries)
 277                 /* max_entries is not used and enforced to be 0 */
 278                 return ERR_PTR(-EINVAL);
 279
 280         map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
 281                            __GFP_ZERO | GFP_USER, numa_node);
 282         if (!map)
 283                 return ERR_PTR(-ENOMEM);
 284
 285         map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
 286                                   PAGE_SIZE) >> PAGE_SHIFT;
 287
 288         /* copy mandatory map attributes */
 289         bpf_map_init_from_attr(&map->map, attr);
 290
 291         spin_lock_init(&map->lock);
 292         map->root = RB_ROOT;
 293         INIT_LIST_HEAD(&map->list);
 294
 295         return &map->map;
 296 }
 297
 298 static void cgroup_storage_map_free(struct bpf_map *_map)
 299 {
 300         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 301
 302         WARN_ON(!RB_EMPTY_ROOT(&map->root));
 303         WARN_ON(!list_empty(&map->list));
 304
 305         kfree(map);
 306 }
 307
 308 static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
 309 {
 310         return -EINVAL;
 311 }
 312
 313 static int cgroup_storage_check_btf(const struct bpf_map *map,
 314                                     const struct btf *btf,
 315                                     const struct btf_type *key_type,
 316                                     const struct btf_type *value_type)
 317 {
 318         struct btf_member *m;
 319         u32 offset, size;
 320
 321         /* Key is expected to be of struct bpf_cgroup_storage_key type,
 322          * which is:
 323          * struct bpf_cgroup_storage_key {
 324          *      __u64   cgroup_inode_id;
 325          *      __u32   attach_type;
 326          * };
 327          */
 328
 329         /*
 330          * Key_type must be a structure with two fields.
 331          */
 332         if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ||
 333             BTF_INFO_VLEN(key_type->info) != 2)
 334                 return -EINVAL;
 335
 336         /*
 337          * The first field must be a 64 bit integer at 0 offset.
 338          */
 339         m = (struct btf_member *)(key_type + 1);
 340         size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, cgroup_inode_id);
 341         if (!btf_member_is_reg_int(btf, key_type, m, 0, size))
 342                 return -EINVAL;
 343
 344         /*
 345          * The second field must be a 32 bit integer at 64 bit offset.
 346          */
 347         m++;
 348         offset = offsetof(struct bpf_cgroup_storage_key, attach_type);
 349         size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, attach_type);
 350         if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
 351                 return -EINVAL;
 352
 353         return 0;
 354 }
 355
 356 static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *_key,
 357                                          struct seq_file *m)
 358 {
 359         enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
 360         struct bpf_cgroup_storage_key *key = _key;
 361         struct bpf_cgroup_storage *storage;
 362         int cpu;
 363
 364         rcu_read_lock();
 365         storage = cgroup_storage_lookup(map_to_storage(map), key, false);
 366         if (!storage) {
 367                 rcu_read_unlock();
 368                 return;
 369         }
 370
 371         btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
 372         stype = cgroup_storage_type(map);
 373         if (stype == BPF_CGROUP_STORAGE_SHARED) {
 374                 seq_puts(m, ": ");
 375                 btf_type_seq_show(map->btf, map->btf_value_type_id,
 376                                   &READ_ONCE(storage->buf)->data[0], m);
 377                 seq_puts(m, "\n");
 378         } else {
 379                 seq_puts(m, ": {\n");
 380                 for_each_possible_cpu(cpu) {
 381                         seq_printf(m, "\tcpu%d: ", cpu);
 382                         btf_type_seq_show(map->btf, map->btf_value_type_id,
 383                                           per_cpu_ptr(storage->percpu_buf, cpu),
 384                                           m);
 385                         seq_puts(m, "\n");
 386                 }
 387                 seq_puts(m, "}\n");
 388         }
 389         rcu_read_unlock();
 390 }
 391
 392 const struct bpf_map_ops cgroup_storage_map_ops = {
 393         .map_alloc = cgroup_storage_map_alloc,
 394         .map_free = cgroup_storage_map_free,
 395         .map_get_next_key = cgroup_storage_get_next_key,
 396         .map_lookup_elem = cgroup_storage_lookup_elem,
 397         .map_update_elem = cgroup_storage_update_elem,
 398         .map_delete_elem = cgroup_storage_delete_elem,
 399         .map_check_btf = cgroup_storage_check_btf,
 400         .map_seq_show_elem = cgroup_storage_seq_show_elem,
 401 };
 402
 403 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
 404 {
 405         enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
 406         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 407         int ret = -EBUSY;
 408
 409         spin_lock_bh(&map->lock);
 410
 411         if (map->prog && map->prog != prog)
 412                 goto unlock;
 413         if (prog->aux->cgroup_storage[stype] &&
 414             prog->aux->cgroup_storage[stype] != _map)
 415                 goto unlock;
 416
 417         map->prog = prog;
 418         prog->aux->cgroup_storage[stype] = _map;
 419         ret = 0;
 420 unlock:
 421         spin_unlock_bh(&map->lock);
 422
 423         return ret;
 424 }
 425
 426 void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
 427 {
 428         enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
 429         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 430
 431         spin_lock_bh(&map->lock);
 432         if (map->prog == prog) {
 433                 WARN_ON(prog->aux->cgroup_storage[stype] != _map);
 434                 map->prog = NULL;
 435                 prog->aux->cgroup_storage[stype] = NULL;
 436         }
 437         spin_unlock_bh(&map->lock);
 438 }
 439
 440 static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
 441 {
 442         size_t size;
 443
 444         if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) {
 445                 size = sizeof(struct bpf_storage_buffer) + map->value_size;
 446                 *pages = round_up(sizeof(struct bpf_cgroup_storage) + size,
 447                                   PAGE_SIZE) >> PAGE_SHIFT;
 448         } else {
 449                 size = map->value_size;
 450                 *pages = round_up(round_up(size, 8) * num_possible_cpus(),
 451                                   PAGE_SIZE) >> PAGE_SHIFT;
 452         }
 453
 454         return size;
 455 }
 456
 457 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
 458                                         enum bpf_cgroup_storage_type stype)
 459 {
 460         struct bpf_cgroup_storage *storage;
 461         struct bpf_map *map;
 462         gfp_t flags;
 463         size_t size;
 464         u32 pages;
 465
 466         map = prog->aux->cgroup_storage[stype];
 467         if (!map)
 468                 return NULL;
 469
 470         size = bpf_cgroup_storage_calculate_size(map, &pages);
 471
 472         if (bpf_map_charge_memlock(map, pages))
 473                 return ERR_PTR(-EPERM);
 474
 475         storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
 476                                __GFP_ZERO | GFP_USER, map->numa_node);
 477         if (!storage)
 478                 goto enomem;
 479
 480         flags = __GFP_ZERO | GFP_USER;
 481
 482         if (stype == BPF_CGROUP_STORAGE_SHARED) {
 483                 storage->buf = kmalloc_node(size, flags, map->numa_node);
 484                 if (!storage->buf)
 485                         goto enomem;
 486         } else {
 487                 storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
 488                 if (!storage->percpu_buf)
 489                         goto enomem;
 490         }
 491
 492         storage->map = (struct bpf_cgroup_storage_map *)map;
 493
 494         return storage;
 495
 496 enomem:
 497         bpf_map_uncharge_memlock(map, pages);
 498         kfree(storage);
 499         return ERR_PTR(-ENOMEM);
 500 }
 501
 502 static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu)
 503 {
 504         struct bpf_cgroup_storage *storage =
 505                 container_of(rcu, struct bpf_cgroup_storage, rcu);
 506
 507         kfree(storage->buf);
 508         kfree(storage);
 509 }
 510
 511 static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu)
 512 {
 513         struct bpf_cgroup_storage *storage =
 514                 container_of(rcu, struct bpf_cgroup_storage, rcu);
 515
 516         free_percpu(storage->percpu_buf);
 517         kfree(storage);
 518 }
 519
 520 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
 521 {
 522         enum bpf_cgroup_storage_type stype;
 523         struct bpf_map *map;
 524         u32 pages;
 525
 526         if (!storage)
 527                 return;
 528
 529         map = &storage->map->map;
 530
 531         bpf_cgroup_storage_calculate_size(map, &pages);
 532         bpf_map_uncharge_memlock(map, pages);
 533
 534         stype = cgroup_storage_type(map);
 535         if (stype == BPF_CGROUP_STORAGE_SHARED)
 536                 call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu);
 537         else
 538                 call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu);
 539 }
 540
 541 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
 542                              struct cgroup *cgroup,
 543                              enum bpf_attach_type type)
 544 {
 545         struct bpf_cgroup_storage_map *map;
 546
 547         if (!storage)
 548                 return;
 549
 550         storage->key.attach_type = type;
 551         storage->key.cgroup_inode_id = cgroup->kn->id.id;
 552
 553         map = storage->map;
 554
 555         spin_lock_bh(&map->lock);
 556         WARN_ON(cgroup_storage_insert(map, storage));
 557         list_add(&storage->list, &map->list);
 558         spin_unlock_bh(&map->lock);
 559 }
 560
 561 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
 562 {
 563         struct bpf_cgroup_storage_map *map;
 564         struct rb_root *root;
 565
 566         if (!storage)
 567                 return;
 568
 569         map = storage->map;
 570
 571         spin_lock_bh(&map->lock);
 572         root = &map->root;
 573         rb_erase(&storage->node, root);
 574
 575         list_del(&storage->list);
 576         spin_unlock_bh(&map->lock);
 577 }
 578
 579 #endif