kernel/bpf/syscall.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3  */
   4 #include <linux/bpf.h>
   5 #include <linux/bpf_trace.h>
   6 #include <linux/bpf_lirc.h>
   7 #include <linux/btf.h>
   8 #include <linux/syscalls.h>
   9 #include <linux/slab.h>
  10 #include <linux/sched/signal.h>
  11 #include <linux/vmalloc.h>
  12 #include <linux/mmzone.h>
  13 #include <linux/anon_inodes.h>
  14 #include <linux/fdtable.h>
  15 #include <linux/file.h>
  16 #include <linux/fs.h>
  17 #include <linux/license.h>
  18 #include <linux/filter.h>
  19 #include <linux/version.h>
  20 #include <linux/kernel.h>
  21 #include <linux/idr.h>
  22 #include <linux/cred.h>
  23 #include <linux/timekeeping.h>
  24 #include <linux/ctype.h>
  25 #include <linux/nospec.h>
  26 #include <linux/audit.h>
  27 #include <uapi/linux/btf.h>
  28 #include <linux/bpf_lsm.h>
  29
  30 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
  31                           (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
  32                           (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
  33 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY)
  34 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
  35 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \
  36                         IS_FD_HASH(map))
  37
  38 #define BPF_OBJ_FLAG_MASK   (BPF_F_RDONLY | BPF_F_WRONLY)
  39
  40 DEFINE_PER_CPU(int, bpf_prog_active);
  41 static DEFINE_IDR(prog_idr);
  42 static DEFINE_SPINLOCK(prog_idr_lock);
  43 static DEFINE_IDR(map_idr);
  44 static DEFINE_SPINLOCK(map_idr_lock);
  45
  46 int sysctl_unprivileged_bpf_disabled __read_mostly;
  47
  48 static const struct bpf_map_ops * const bpf_map_types[] = {
  49 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
  50 #define BPF_MAP_TYPE(_id, _ops) \
  51         [_id] = &_ops,
  52 #include <linux/bpf_types.h>
  53 #undef BPF_PROG_TYPE
  54 #undef BPF_MAP_TYPE
  55 };
  56
  57 /*
  58  * If we're handed a bigger struct than we know of, ensure all the unknown bits
  59  * are 0 - i.e. new user-space does not rely on any kernel feature extensions
  60  * we don't know about yet.
  61  *
  62  * There is a ToCToU between this function call and the following
  63  * copy_from_user() call. However, this is not a concern since this function is
  64  * meant to be a future-proofing of bits.
  65  */
  66 int bpf_check_uarg_tail_zero(void __user *uaddr,
  67                              size_t expected_size,
  68                              size_t actual_size)
  69 {
  70         unsigned char __user *addr;
  71         unsigned char __user *end;
  72         unsigned char val;
  73         int err;
  74
  75         if (unlikely(actual_size > PAGE_SIZE))  /* silly large */
  76                 return -E2BIG;
  77
  78         if (unlikely(!access_ok(uaddr, actual_size)))
  79                 return -EFAULT;
  80
  81         if (actual_size <= expected_size)
  82                 return 0;
  83
  84         addr = uaddr + expected_size;
  85         end  = uaddr + actual_size;
  86
  87         for (; addr < end; addr++) {
  88                 err = get_user(val, addr);
  89                 if (err)
  90                         return err;
  91                 if (val)
  92                         return -E2BIG;
  93         }
  94
  95         return 0;
  96 }
  97
  98 const struct bpf_map_ops bpf_map_offload_ops = {
  99         .map_alloc = bpf_map_offload_map_alloc,
 100         .map_free = bpf_map_offload_map_free,
 101         .map_check_btf = map_check_no_btf,
 102 };
 103
 104 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
 105 {
 106         const struct bpf_map_ops *ops;
 107         u32 type = attr->map_type;
 108         struct bpf_map *map;
 109         int err;
 110
 111         if (type >= ARRAY_SIZE(bpf_map_types))
 112                 return ERR_PTR(-EINVAL);
 113         type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
 114         ops = bpf_map_types[type];
 115         if (!ops)
 116                 return ERR_PTR(-EINVAL);
 117
 118         if (ops->map_alloc_check) {
 119                 err = ops->map_alloc_check(attr);
 120                 if (err)
 121                         return ERR_PTR(err);
 122         }
 123         if (attr->map_ifindex)
 124                 ops = &bpf_map_offload_ops;
 125         map = ops->map_alloc(attr);
 126         if (IS_ERR(map))
 127                 return map;
 128         map->ops = ops;
 129         map->map_type = type;
 130         return map;
 131 }
 132
 133 static u32 bpf_map_value_size(struct bpf_map *map)
 134 {
 135         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 136             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 137             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
 138             map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
 139                 return round_up(map->value_size, 8) * num_possible_cpus();
 140         else if (IS_FD_MAP(map))
 141                 return sizeof(u32);
 142         else
 143                 return  map->value_size;
 144 }
 145
 146 static void maybe_wait_bpf_programs(struct bpf_map *map)
 147 {
 148         /* Wait for any running BPF programs to complete so that
 149          * userspace, when we return to it, knows that all programs
 150          * that could be running use the new map value.
 151          */
 152         if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
 153             map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
 154                 synchronize_rcu();
 155 }
 156
 157 static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
 158                                 void *value, __u64 flags)
 159 {
 160         int err;
 161
 162         /* Need to create a kthread, thus must support schedule */
 163         if (bpf_map_is_dev_bound(map)) {
 164                 return bpf_map_offload_update_elem(map, key, value, flags);
 165         } else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
 166                    map->map_type == BPF_MAP_TYPE_SOCKHASH ||
 167                    map->map_type == BPF_MAP_TYPE_SOCKMAP ||
 168                    map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
 169                 return map->ops->map_update_elem(map, key, value, flags);
 170         } else if (IS_FD_PROG_ARRAY(map)) {
 171                 return bpf_fd_array_map_update_elem(map, f.file, key, value,
 172                                                     flags);
 173         }
 174
 175         bpf_disable_instrumentation();
 176         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 177             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 178                 err = bpf_percpu_hash_update(map, key, value, flags);
 179         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 180                 err = bpf_percpu_array_update(map, key, value, flags);
 181         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
 182                 err = bpf_percpu_cgroup_storage_update(map, key, value,
 183                                                        flags);
 184         } else if (IS_FD_ARRAY(map)) {
 185                 rcu_read_lock();
 186                 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
 187                                                    flags);
 188                 rcu_read_unlock();
 189         } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 190                 rcu_read_lock();
 191                 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
 192                                                   flags);
 193                 rcu_read_unlock();
 194         } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
 195                 /* rcu_read_lock() is not needed */
 196                 err = bpf_fd_reuseport_array_update_elem(map, key, value,
 197                                                          flags);
 198         } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
 199                    map->map_type == BPF_MAP_TYPE_STACK) {
 200                 err = map->ops->map_push_elem(map, value, flags);
 201         } else {
 202                 rcu_read_lock();
 203                 err = map->ops->map_update_elem(map, key, value, flags);
 204                 rcu_read_unlock();
 205         }
 206         bpf_enable_instrumentation();
 207         maybe_wait_bpf_programs(map);
 208
 209         return err;
 210 }
 211
 212 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
 213                               __u64 flags)
 214 {
 215         void *ptr;
 216         int err;
 217
 218         if (bpf_map_is_dev_bound(map))
 219                 return bpf_map_offload_lookup_elem(map, key, value);
 220
 221         bpf_disable_instrumentation();
 222         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 223             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 224                 err = bpf_percpu_hash_copy(map, key, value);
 225         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 226                 err = bpf_percpu_array_copy(map, key, value);
 227         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
 228                 err = bpf_percpu_cgroup_storage_copy(map, key, value);
 229         } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 230                 err = bpf_stackmap_copy(map, key, value);
 231         } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
 232                 err = bpf_fd_array_map_lookup_elem(map, key, value);
 233         } else if (IS_FD_HASH(map)) {
 234                 err = bpf_fd_htab_map_lookup_elem(map, key, value);
 235         } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
 236                 err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
 237         } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
 238                    map->map_type == BPF_MAP_TYPE_STACK) {
 239                 err = map->ops->map_peek_elem(map, value);
 240         } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
 241                 /* struct_ops map requires directly updating "value" */
 242                 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
 243         } else {
 244                 rcu_read_lock();
 245                 if (map->ops->map_lookup_elem_sys_only)
 246                         ptr = map->ops->map_lookup_elem_sys_only(map, key);
 247                 else
 248                         ptr = map->ops->map_lookup_elem(map, key);
 249                 if (IS_ERR(ptr)) {
 250                         err = PTR_ERR(ptr);
 251                 } else if (!ptr) {
 252                         err = -ENOENT;
 253                 } else {
 254                         err = 0;
 255                         if (flags & BPF_F_LOCK)
 256                                 /* lock 'ptr' and copy everything but lock */
 257                                 copy_map_value_locked(map, value, ptr, true);
 258                         else
 259                                 copy_map_value(map, value, ptr);
 260                         /* mask lock, since value wasn't zero inited */
 261                         check_and_init_map_lock(map, value);
 262                 }
 263                 rcu_read_unlock();
 264         }
 265
 266         bpf_enable_instrumentation();
 267         maybe_wait_bpf_programs(map);
 268
 269         return err;
 270 }
 271
 272 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
 273 {
 274         /* We really just want to fail instead of triggering OOM killer
 275          * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
 276          * which is used for lower order allocation requests.
 277          *
 278          * It has been observed that higher order allocation requests done by
 279          * vmalloc with __GFP_NORETRY being set might fail due to not trying
 280          * to reclaim memory from the page cache, thus we set
 281          * __GFP_RETRY_MAYFAIL to avoid such situations.
 282          */
 283
 284         const gfp_t flags = __GFP_NOWARN | __GFP_ZERO;
 285         void *area;
 286
 287         if (size >= SIZE_MAX)
 288                 return NULL;
 289
 290         /* kmalloc()'ed memory can't be mmap()'ed */
 291         if (!mmapable && size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
 292                 area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
 293                                     numa_node);
 294                 if (area != NULL)
 295                         return area;
 296         }
 297         if (mmapable) {
 298                 BUG_ON(!PAGE_ALIGNED(size));
 299                 return vmalloc_user_node_flags(size, numa_node, GFP_KERNEL |
 300                                                __GFP_RETRY_MAYFAIL | flags);
 301         }
 302         return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_RETRY_MAYFAIL | flags,
 303                               numa_node, __builtin_return_address(0));
 304 }
 305
 306 void *bpf_map_area_alloc(u64 size, int numa_node)
 307 {
 308         return __bpf_map_area_alloc(size, numa_node, false);
 309 }
 310
 311 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
 312 {
 313         return __bpf_map_area_alloc(size, numa_node, true);
 314 }
 315
 316 void bpf_map_area_free(void *area)
 317 {
 318         kvfree(area);
 319 }
 320
 321 static u32 bpf_map_flags_retain_permanent(u32 flags)
 322 {
 323         /* Some map creation flags are not tied to the map object but
 324          * rather to the map fd instead, so they have no meaning upon
 325          * map object inspection since multiple file descriptors with
 326          * different (access) properties can exist here. Thus, given
 327          * this has zero meaning for the map itself, lets clear these
 328          * from here.
 329          */
 330         return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
 331 }
 332
 333 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
 334 {
 335         map->map_type = attr->map_type;
 336         map->key_size = attr->key_size;
 337         map->value_size = attr->value_size;
 338         map->max_entries = attr->max_entries;
 339         map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
 340         map->numa_node = bpf_map_attr_numa_node(attr);
 341 }
 342
 343 static int bpf_charge_memlock(struct user_struct *user, u32 pages)
 344 {
 345         unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 346
 347         if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
 348                 atomic_long_sub(pages, &user->locked_vm);
 349                 return -EPERM;
 350         }
 351         return 0;
 352 }
 353
 354 static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
 355 {
 356         if (user)
 357                 atomic_long_sub(pages, &user->locked_vm);
 358 }
 359
 360 int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
 361 {
 362         u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
 363         struct user_struct *user;
 364         int ret;
 365
 366         if (size >= U32_MAX - PAGE_SIZE)
 367                 return -E2BIG;
 368
 369         user = get_current_user();
 370         ret = bpf_charge_memlock(user, pages);
 371         if (ret) {
 372                 free_uid(user);
 373                 return ret;
 374         }
 375
 376         mem->pages = pages;
 377         mem->user = user;
 378
 379         return 0;
 380 }
 381
 382 void bpf_map_charge_finish(struct bpf_map_memory *mem)
 383 {
 384         bpf_uncharge_memlock(mem->user, mem->pages);
 385         free_uid(mem->user);
 386 }
 387
 388 void bpf_map_charge_move(struct bpf_map_memory *dst,
 389                          struct bpf_map_memory *src)
 390 {
 391         *dst = *src;
 392
 393         /* Make sure src will not be used for the redundant uncharging. */
 394         memset(src, 0, sizeof(struct bpf_map_memory));
 395 }
 396
 397 int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
 398 {
 399         int ret;
 400
 401         ret = bpf_charge_memlock(map->memory.user, pages);
 402         if (ret)
 403                 return ret;
 404         map->memory.pages += pages;
 405         return ret;
 406 }
 407
 408 void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
 409 {
 410         bpf_uncharge_memlock(map->memory.user, pages);
 411         map->memory.pages -= pages;
 412 }
 413
 414 static int bpf_map_alloc_id(struct bpf_map *map)
 415 {
 416         int id;
 417
 418         idr_preload(GFP_KERNEL);
 419         spin_lock_bh(&map_idr_lock);
 420         id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
 421         if (id > 0)
 422                 map->id = id;
 423         spin_unlock_bh(&map_idr_lock);
 424         idr_preload_end();
 425
 426         if (WARN_ON_ONCE(!id))
 427                 return -ENOSPC;
 428
 429         return id > 0 ? 0 : id;
 430 }
 431
 432 void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
 433 {
 434         unsigned long flags;
 435
 436         /* Offloaded maps are removed from the IDR store when their device
 437          * disappears - even if someone holds an fd to them they are unusable,
 438          * the memory is gone, all ops will fail; they are simply waiting for
 439          * refcnt to drop to be freed.
 440          */
 441         if (!map->id)
 442                 return;
 443
 444         if (do_idr_lock)
 445                 spin_lock_irqsave(&map_idr_lock, flags);
 446         else
 447                 __acquire(&map_idr_lock);
 448
 449         idr_remove(&map_idr, map->id);
 450         map->id = 0;
 451
 452         if (do_idr_lock)
 453                 spin_unlock_irqrestore(&map_idr_lock, flags);
 454         else
 455                 __release(&map_idr_lock);
 456 }
 457
 458 /* called from workqueue */
 459 static void bpf_map_free_deferred(struct work_struct *work)
 460 {
 461         struct bpf_map *map = container_of(work, struct bpf_map, work);
 462         struct bpf_map_memory mem;
 463
 464         bpf_map_charge_move(&mem, &map->memory);
 465         security_bpf_map_free(map);
 466         /* implementation dependent freeing */
 467         map->ops->map_free(map);
 468         bpf_map_charge_finish(&mem);
 469 }
 470
 471 static void bpf_map_put_uref(struct bpf_map *map)
 472 {
 473         if (atomic64_dec_and_test(&map->usercnt)) {
 474                 if (map->ops->map_release_uref)
 475                         map->ops->map_release_uref(map);
 476         }
 477 }
 478
 479 /* decrement map refcnt and schedule it for freeing via workqueue
 480  * (unrelying map implementation ops->map_free() might sleep)
 481  */
 482 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
 483 {
 484         if (atomic64_dec_and_test(&map->refcnt)) {
 485                 /* bpf_map_free_id() must be called first */
 486                 bpf_map_free_id(map, do_idr_lock);
 487                 btf_put(map->btf);
 488                 INIT_WORK(&map->work, bpf_map_free_deferred);
 489                 schedule_work(&map->work);
 490         }
 491 }
 492
 493 void bpf_map_put(struct bpf_map *map)
 494 {
 495         __bpf_map_put(map, true);
 496 }
 497 EXPORT_SYMBOL_GPL(bpf_map_put);
 498
 499 void bpf_map_put_with_uref(struct bpf_map *map)
 500 {
 501         bpf_map_put_uref(map);
 502         bpf_map_put(map);
 503 }
 504
 505 static int bpf_map_release(struct inode *inode, struct file *filp)
 506 {
 507         struct bpf_map *map = filp->private_data;
 508
 509         if (map->ops->map_release)
 510                 map->ops->map_release(map, filp);
 511
 512         bpf_map_put_with_uref(map);
 513         return 0;
 514 }
 515
 516 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
 517 {
 518         fmode_t mode = f.file->f_mode;
 519
 520         /* Our file permissions may have been overridden by global
 521          * map permissions facing syscall side.
 522          */
 523         if (READ_ONCE(map->frozen))
 524                 mode &= ~FMODE_CAN_WRITE;
 525         return mode;
 526 }
 527
 528 #ifdef CONFIG_PROC_FS
 529 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 530 {
 531         const struct bpf_map *map = filp->private_data;
 532         const struct bpf_array *array;
 533         u32 type = 0, jited = 0;
 534
 535         if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
 536                 array = container_of(map, struct bpf_array, map);
 537                 type  = array->aux->type;
 538                 jited = array->aux->jited;
 539         }
 540
 541         seq_printf(m,
 542                    "map_type:\t%u\n"
 543                    "key_size:\t%u\n"
 544                    "value_size:\t%u\n"
 545                    "max_entries:\t%u\n"
 546                    "map_flags:\t%#x\n"
 547                    "memlock:\t%llu\n"
 548                    "map_id:\t%u\n"
 549                    "frozen:\t%u\n",
 550                    map->map_type,
 551                    map->key_size,
 552                    map->value_size,
 553                    map->max_entries,
 554                    map->map_flags,
 555                    map->memory.pages * 1ULL << PAGE_SHIFT,
 556                    map->id,
 557                    READ_ONCE(map->frozen));
 558         if (type) {
 559                 seq_printf(m, "owner_prog_type:\t%u\n", type);
 560                 seq_printf(m, "owner_jited:\t%u\n", jited);
 561         }
 562 }
 563 #endif
 564
 565 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz,
 566                               loff_t *ppos)
 567 {
 568         /* We need this handler such that alloc_file() enables
 569          * f_mode with FMODE_CAN_READ.
 570          */
 571         return -EINVAL;
 572 }
 573
 574 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
 575                                size_t siz, loff_t *ppos)
 576 {
 577         /* We need this handler such that alloc_file() enables
 578          * f_mode with FMODE_CAN_WRITE.
 579          */
 580         return -EINVAL;
 581 }
 582
 583 /* called for any extra memory-mapped regions (except initial) */
 584 static void bpf_map_mmap_open(struct vm_area_struct *vma)
 585 {
 586         struct bpf_map *map = vma->vm_file->private_data;
 587
 588         if (vma->vm_flags & VM_MAYWRITE) {
 589                 mutex_lock(&map->freeze_mutex);
 590                 map->writecnt++;
 591                 mutex_unlock(&map->freeze_mutex);
 592         }
 593 }
 594
 595 /* called for all unmapped memory region (including initial) */
 596 static void bpf_map_mmap_close(struct vm_area_struct *vma)
 597 {
 598         struct bpf_map *map = vma->vm_file->private_data;
 599
 600         if (vma->vm_flags & VM_MAYWRITE) {
 601                 mutex_lock(&map->freeze_mutex);
 602                 map->writecnt--;
 603                 mutex_unlock(&map->freeze_mutex);
 604         }
 605 }
 606
 607 static const struct vm_operations_struct bpf_map_default_vmops = {
 608         .open           = bpf_map_mmap_open,
 609         .close          = bpf_map_mmap_close,
 610 };
 611
 612 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
 613 {
 614         struct bpf_map *map = filp->private_data;
 615         int err;
 616
 617         if (!map->ops->map_mmap || map_value_has_spin_lock(map))
 618                 return -ENOTSUPP;
 619
 620         if (!(vma->vm_flags & VM_SHARED))
 621                 return -EINVAL;
 622
 623         mutex_lock(&map->freeze_mutex);
 624
 625         if (vma->vm_flags & VM_WRITE) {
 626                 if (map->frozen) {
 627                         err = -EPERM;
 628                         goto out;
 629                 }
 630                 /* map is meant to be read-only, so do not allow mapping as
 631                  * writable, because it's possible to leak a writable page
 632                  * reference and allows user-space to still modify it after
 633                  * freezing, while verifier will assume contents do not change
 634                  */
 635                 if (map->map_flags & BPF_F_RDONLY_PROG) {
 636                         err = -EACCES;
 637                         goto out;
 638                 }
 639         }
 640
 641         /* set default open/close callbacks */
 642         vma->vm_ops = &bpf_map_default_vmops;
 643         vma->vm_private_data = map;
 644         vma->vm_flags &= ~VM_MAYEXEC;
 645         if (!(vma->vm_flags & VM_WRITE))
 646                 /* disallow re-mapping with PROT_WRITE */
 647                 vma->vm_flags &= ~VM_MAYWRITE;
 648
 649         err = map->ops->map_mmap(map, vma);
 650         if (err)
 651                 goto out;
 652
 653         if (vma->vm_flags & VM_MAYWRITE)
 654                 map->writecnt++;
 655 out:
 656         mutex_unlock(&map->freeze_mutex);
 657         return err;
 658 }
 659
 660 const struct file_operations bpf_map_fops = {
 661 #ifdef CONFIG_PROC_FS
 662         .show_fdinfo    = bpf_map_show_fdinfo,
 663 #endif
 664         .release        = bpf_map_release,
 665         .read           = bpf_dummy_read,
 666         .write          = bpf_dummy_write,
 667         .mmap           = bpf_map_mmap,
 668 };
 669
 670 int bpf_map_new_fd(struct bpf_map *map, int flags)
 671 {
 672         int ret;
 673
 674         ret = security_bpf_map(map, OPEN_FMODE(flags));
 675         if (ret < 0)
 676                 return ret;
 677
 678         return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
 679                                 flags | O_CLOEXEC);
 680 }
 681
 682 int bpf_get_file_flag(int flags)
 683 {
 684         if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
 685                 return -EINVAL;
 686         if (flags & BPF_F_RDONLY)
 687                 return O_RDONLY;
 688         if (flags & BPF_F_WRONLY)
 689                 return O_WRONLY;
 690         return O_RDWR;
 691 }
 692
 693 /* helper macro to check that unused fields 'union bpf_attr' are zero */
 694 #define CHECK_ATTR(CMD) \
 695         memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
 696                    sizeof(attr->CMD##_LAST_FIELD), 0, \
 697                    sizeof(*attr) - \
 698                    offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 699                    sizeof(attr->CMD##_LAST_FIELD)) != NULL
 700
 701 /* dst and src must have at least "size" number of bytes.
 702  * Return strlen on success and < 0 on error.
 703  */
 704 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
 705 {
 706         const char *end = src + size;
 707         const char *orig_src = src;
 708
 709         memset(dst, 0, size);
 710         /* Copy all isalnum(), '_' and '.' chars. */
 711         while (src < end && *src) {
 712                 if (!isalnum(*src) &&
 713                     *src != '_' && *src != '.')
 714                         return -EINVAL;
 715                 *dst++ = *src++;
 716         }
 717
 718         /* No '\0' found in "size" number of bytes */
 719         if (src == end)
 720                 return -EINVAL;
 721
 722         return src - orig_src;
 723 }
 724
 725 int map_check_no_btf(const struct bpf_map *map,
 726                      const struct btf *btf,
 727                      const struct btf_type *key_type,
 728                      const struct btf_type *value_type)
 729 {
 730         return -ENOTSUPP;
 731 }
 732
 733 static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 734                          u32 btf_key_id, u32 btf_value_id)
 735 {
 736         const struct btf_type *key_type, *value_type;
 737         u32 key_size, value_size;
 738         int ret = 0;
 739
 740         /* Some maps allow key to be unspecified. */
 741         if (btf_key_id) {
 742                 key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
 743                 if (!key_type || key_size != map->key_size)
 744                         return -EINVAL;
 745         } else {
 746                 key_type = btf_type_by_id(btf, 0);
 747                 if (!map->ops->map_check_btf)
 748                         return -EINVAL;
 749         }
 750
 751         value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
 752         if (!value_type || value_size != map->value_size)
 753                 return -EINVAL;
 754
 755         map->spin_lock_off = btf_find_spin_lock(btf, value_type);
 756
 757         if (map_value_has_spin_lock(map)) {
 758                 if (map->map_flags & BPF_F_RDONLY_PROG)
 759                         return -EACCES;
 760                 if (map->map_type != BPF_MAP_TYPE_HASH &&
 761                     map->map_type != BPF_MAP_TYPE_ARRAY &&
 762                     map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
 763                     map->map_type != BPF_MAP_TYPE_SK_STORAGE)
 764                         return -ENOTSUPP;
 765                 if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
 766                     map->value_size) {
 767                         WARN_ONCE(1,
 768                                   "verifier bug spin_lock_off %d value_size %d\n",
 769                                   map->spin_lock_off, map->value_size);
 770                         return -EFAULT;
 771                 }
 772         }
 773
 774         if (map->ops->map_check_btf)
 775                 ret = map->ops->map_check_btf(map, btf, key_type, value_type);
 776
 777         return ret;
 778 }
 779
 780 #define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id
 781 /* called via syscall */
 782 static int map_create(union bpf_attr *attr)
 783 {
 784         int numa_node = bpf_map_attr_numa_node(attr);
 785         struct bpf_map_memory mem;
 786         struct bpf_map *map;
 787         int f_flags;
 788         int err;
 789
 790         err = CHECK_ATTR(BPF_MAP_CREATE);
 791         if (err)
 792                 return -EINVAL;
 793
 794         if (attr->btf_vmlinux_value_type_id) {
 795                 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
 796                     attr->btf_key_type_id || attr->btf_value_type_id)
 797                         return -EINVAL;
 798         } else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
 799                 return -EINVAL;
 800         }
 801
 802         f_flags = bpf_get_file_flag(attr->map_flags);
 803         if (f_flags < 0)
 804                 return f_flags;
 805
 806         if (numa_node != NUMA_NO_NODE &&
 807             ((unsigned int)numa_node >= nr_node_ids ||
 808              !node_online(numa_node)))
 809                 return -EINVAL;
 810
 811         /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 812         map = find_and_alloc_map(attr);
 813         if (IS_ERR(map))
 814                 return PTR_ERR(map);
 815
 816         err = bpf_obj_name_cpy(map->name, attr->map_name,
 817                                sizeof(attr->map_name));
 818         if (err < 0)
 819                 goto free_map;
 820
 821         atomic64_set(&map->refcnt, 1);
 822         atomic64_set(&map->usercnt, 1);
 823         mutex_init(&map->freeze_mutex);
 824
 825         map->spin_lock_off = -EINVAL;
 826         if (attr->btf_key_type_id || attr->btf_value_type_id ||
 827             /* Even the map's value is a kernel's struct,
 828              * the bpf_prog.o must have BTF to begin with
 829              * to figure out the corresponding kernel's
 830              * counter part.  Thus, attr->btf_fd has
 831              * to be valid also.
 832              */
 833             attr->btf_vmlinux_value_type_id) {
 834                 struct btf *btf;
 835
 836                 btf = btf_get_by_fd(attr->btf_fd);
 837                 if (IS_ERR(btf)) {
 838                         err = PTR_ERR(btf);
 839                         goto free_map;
 840                 }
 841                 map->btf = btf;
 842
 843                 if (attr->btf_value_type_id) {
 844                         err = map_check_btf(map, btf, attr->btf_key_type_id,
 845                                             attr->btf_value_type_id);
 846                         if (err)
 847                                 goto free_map;
 848                 }
 849
 850                 map->btf_key_type_id = attr->btf_key_type_id;
 851                 map->btf_value_type_id = attr->btf_value_type_id;
 852                 map->btf_vmlinux_value_type_id =
 853                         attr->btf_vmlinux_value_type_id;
 854         }
 855
 856         err = security_bpf_map_alloc(map);
 857         if (err)
 858                 goto free_map;
 859
 860         err = bpf_map_alloc_id(map);
 861         if (err)
 862                 goto free_map_sec;
 863
 864         err = bpf_map_new_fd(map, f_flags);
 865         if (err < 0) {
 866                 /* failed to allocate fd.
 867                  * bpf_map_put_with_uref() is needed because the above
 868                  * bpf_map_alloc_id() has published the map
 869                  * to the userspace and the userspace may
 870                  * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
 871                  */
 872                 bpf_map_put_with_uref(map);
 873                 return err;
 874         }
 875
 876         return err;
 877
 878 free_map_sec:
 879         security_bpf_map_free(map);
 880 free_map:
 881         btf_put(map->btf);
 882         bpf_map_charge_move(&mem, &map->memory);
 883         map->ops->map_free(map);
 884         bpf_map_charge_finish(&mem);
 885         return err;
 886 }
 887
 888 /* if error is returned, fd is released.
 889  * On success caller should complete fd access with matching fdput()
 890  */
 891 struct bpf_map *__bpf_map_get(struct fd f)
 892 {
 893         if (!f.file)
 894                 return ERR_PTR(-EBADF);
 895         if (f.file->f_op != &bpf_map_fops) {
 896                 fdput(f);
 897                 return ERR_PTR(-EINVAL);
 898         }
 899
 900         return f.file->private_data;
 901 }
 902
 903 void bpf_map_inc(struct bpf_map *map)
 904 {
 905         atomic64_inc(&map->refcnt);
 906 }
 907 EXPORT_SYMBOL_GPL(bpf_map_inc);
 908
 909 void bpf_map_inc_with_uref(struct bpf_map *map)
 910 {
 911         atomic64_inc(&map->refcnt);
 912         atomic64_inc(&map->usercnt);
 913 }
 914 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
 915
 916 struct bpf_map *bpf_map_get(u32 ufd)
 917 {
 918         struct fd f = fdget(ufd);
 919         struct bpf_map *map;
 920
 921         map = __bpf_map_get(f);
 922         if (IS_ERR(map))
 923                 return map;
 924
 925         bpf_map_inc(map);
 926         fdput(f);
 927
 928         return map;
 929 }
 930
 931 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 932 {
 933         struct fd f = fdget(ufd);
 934         struct bpf_map *map;
 935
 936         map = __bpf_map_get(f);
 937         if (IS_ERR(map))
 938                 return map;
 939
 940         bpf_map_inc_with_uref(map);
 941         fdput(f);
 942
 943         return map;
 944 }
 945
 946 /* map_idr_lock should have been held */
 947 static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
 948 {
 949         int refold;
 950
 951         refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0);
 952         if (!refold)
 953                 return ERR_PTR(-ENOENT);
 954         if (uref)
 955                 atomic64_inc(&map->usercnt);
 956
 957         return map;
 958 }
 959
 960 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
 961 {
 962         spin_lock_bh(&map_idr_lock);
 963         map = __bpf_map_inc_not_zero(map, false);
 964         spin_unlock_bh(&map_idr_lock);
 965
 966         return map;
 967 }
 968 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
 969
 970 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 971 {
 972         return -ENOTSUPP;
 973 }
 974
 975 static void *__bpf_copy_key(void __user *ukey, u64 key_size)
 976 {
 977         if (key_size)
 978                 return memdup_user(ukey, key_size);
 979
 980         if (ukey)
 981                 return ERR_PTR(-EINVAL);
 982
 983         return NULL;
 984 }
 985
 986 /* last field in 'union bpf_attr' used by this command */
 987 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
 988
 989 static int map_lookup_elem(union bpf_attr *attr)
 990 {
 991         void __user *ukey = u64_to_user_ptr(attr->key);
 992         void __user *uvalue = u64_to_user_ptr(attr->value);
 993         int ufd = attr->map_fd;
 994         struct bpf_map *map;
 995         void *key, *value;
 996         u32 value_size;
 997         struct fd f;
 998         int err;
 999
1000         if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
1001                 return -EINVAL;
1002
1003         if (attr->flags & ~BPF_F_LOCK)
1004                 return -EINVAL;
1005
1006         f = fdget(ufd);
1007         map = __bpf_map_get(f);
1008         if (IS_ERR(map))
1009                 return PTR_ERR(map);
1010         if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
1011                 err = -EPERM;
1012                 goto err_put;
1013         }
1014
1015         if ((attr->flags & BPF_F_LOCK) &&
1016             !map_value_has_spin_lock(map)) {
1017                 err = -EINVAL;
1018                 goto err_put;
1019         }
1020
1021         key = __bpf_copy_key(ukey, map->key_size);
1022         if (IS_ERR(key)) {
1023                 err = PTR_ERR(key);
1024                 goto err_put;
1025         }
1026
1027         value_size = bpf_map_value_size(map);
1028
1029         err = -ENOMEM;
1030         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1031         if (!value)
1032                 goto free_key;
1033
1034         err = bpf_map_copy_value(map, key, value, attr->flags);
1035         if (err)
1036                 goto free_value;
1037
1038         err = -EFAULT;
1039         if (copy_to_user(uvalue, value, value_size) != 0)
1040                 goto free_value;
1041
1042         err = 0;
1043
1044 free_value:
1045         kfree(value);
1046 free_key:
1047         kfree(key);
1048 err_put:
1049         fdput(f);
1050         return err;
1051 }
1052
1053
1054 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
1055
1056 static int map_update_elem(union bpf_attr *attr)
1057 {
1058         void __user *ukey = u64_to_user_ptr(attr->key);
1059         void __user *uvalue = u64_to_user_ptr(attr->value);
1060         int ufd = attr->map_fd;
1061         struct bpf_map *map;
1062         void *key, *value;
1063         u32 value_size;
1064         struct fd f;
1065         int err;
1066
1067         if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
1068                 return -EINVAL;
1069
1070         f = fdget(ufd);
1071         map = __bpf_map_get(f);
1072         if (IS_ERR(map))
1073                 return PTR_ERR(map);
1074         if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1075                 err = -EPERM;
1076                 goto err_put;
1077         }
1078
1079         if ((attr->flags & BPF_F_LOCK) &&
1080             !map_value_has_spin_lock(map)) {
1081                 err = -EINVAL;
1082                 goto err_put;
1083         }
1084
1085         key = __bpf_copy_key(ukey, map->key_size);
1086         if (IS_ERR(key)) {
1087                 err = PTR_ERR(key);
1088                 goto err_put;
1089         }
1090
1091         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
1092             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
1093             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
1094             map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
1095                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
1096         else
1097                 value_size = map->value_size;
1098
1099         err = -ENOMEM;
1100         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1101         if (!value)
1102                 goto free_key;
1103
1104         err = -EFAULT;
1105         if (copy_from_user(value, uvalue, value_size) != 0)
1106                 goto free_value;
1107
1108         err = bpf_map_update_value(map, f, key, value, attr->flags);
1109
1110 free_value:
1111         kfree(value);
1112 free_key:
1113         kfree(key);
1114 err_put:
1115         fdput(f);
1116         return err;
1117 }
1118
1119 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
1120
1121 static int map_delete_elem(union bpf_attr *attr)
1122 {
1123         void __user *ukey = u64_to_user_ptr(attr->key);
1124         int ufd = attr->map_fd;
1125         struct bpf_map *map;
1126         struct fd f;
1127         void *key;
1128         int err;
1129
1130         if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
1131                 return -EINVAL;
1132
1133         f = fdget(ufd);
1134         map = __bpf_map_get(f);
1135         if (IS_ERR(map))
1136                 return PTR_ERR(map);
1137         if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1138                 err = -EPERM;
1139                 goto err_put;
1140         }
1141
1142         key = __bpf_copy_key(ukey, map->key_size);
1143         if (IS_ERR(key)) {
1144                 err = PTR_ERR(key);
1145                 goto err_put;
1146         }
1147
1148         if (bpf_map_is_dev_bound(map)) {
1149                 err = bpf_map_offload_delete_elem(map, key);
1150                 goto out;
1151         } else if (IS_FD_PROG_ARRAY(map) ||
1152                    map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
1153                 /* These maps require sleepable context */
1154                 err = map->ops->map_delete_elem(map, key);
1155                 goto out;
1156         }
1157
1158         bpf_disable_instrumentation();
1159         rcu_read_lock();
1160         err = map->ops->map_delete_elem(map, key);
1161         rcu_read_unlock();
1162         bpf_enable_instrumentation();
1163         maybe_wait_bpf_programs(map);
1164 out:
1165         kfree(key);
1166 err_put:
1167         fdput(f);
1168         return err;
1169 }
1170
1171 /* last field in 'union bpf_attr' used by this command */
1172 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
1173
1174 static int map_get_next_key(union bpf_attr *attr)
1175 {
1176         void __user *ukey = u64_to_user_ptr(attr->key);
1177         void __user *unext_key = u64_to_user_ptr(attr->next_key);
1178         int ufd = attr->map_fd;
1179         struct bpf_map *map;
1180         void *key, *next_key;
1181         struct fd f;
1182         int err;
1183
1184         if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
1185                 return -EINVAL;
1186
1187         f = fdget(ufd);
1188         map = __bpf_map_get(f);
1189         if (IS_ERR(map))
1190                 return PTR_ERR(map);
1191         if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
1192                 err = -EPERM;
1193                 goto err_put;
1194         }
1195
1196         if (ukey) {
1197                 key = __bpf_copy_key(ukey, map->key_size);
1198                 if (IS_ERR(key)) {
1199                         err = PTR_ERR(key);
1200                         goto err_put;
1201                 }
1202         } else {
1203                 key = NULL;
1204         }
1205
1206         err = -ENOMEM;
1207         next_key = kmalloc(map->key_size, GFP_USER);
1208         if (!next_key)
1209                 goto free_key;
1210
1211         if (bpf_map_is_dev_bound(map)) {
1212                 err = bpf_map_offload_get_next_key(map, key, next_key);
1213                 goto out;
1214         }
1215
1216         rcu_read_lock();
1217         err = map->ops->map_get_next_key(map, key, next_key);
1218         rcu_read_unlock();
1219 out:
1220         if (err)
1221                 goto free_next_key;
1222
1223         err = -EFAULT;
1224         if (copy_to_user(unext_key, next_key, map->key_size) != 0)
1225                 goto free_next_key;
1226
1227         err = 0;
1228
1229 free_next_key:
1230         kfree(next_key);
1231 free_key:
1232         kfree(key);
1233 err_put:
1234         fdput(f);
1235         return err;
1236 }
1237
1238 int generic_map_delete_batch(struct bpf_map *map,
1239                              const union bpf_attr *attr,
1240                              union bpf_attr __user *uattr)
1241 {
1242         void __user *keys = u64_to_user_ptr(attr->batch.keys);
1243         u32 cp, max_count;
1244         int err = 0;
1245         void *key;
1246
1247         if (attr->batch.elem_flags & ~BPF_F_LOCK)
1248                 return -EINVAL;
1249
1250         if ((attr->batch.elem_flags & BPF_F_LOCK) &&
1251             !map_value_has_spin_lock(map)) {
1252                 return -EINVAL;
1253         }
1254
1255         max_count = attr->batch.count;
1256         if (!max_count)
1257                 return 0;
1258
1259         key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
1260         if (!key)
1261                 return -ENOMEM;
1262
1263         for (cp = 0; cp < max_count; cp++) {
1264                 err = -EFAULT;
1265                 if (copy_from_user(key, keys + cp * map->key_size,
1266                                    map->key_size))
1267                         break;
1268
1269                 if (bpf_map_is_dev_bound(map)) {
1270                         err = bpf_map_offload_delete_elem(map, key);
1271                         break;
1272                 }
1273
1274                 bpf_disable_instrumentation();
1275                 rcu_read_lock();
1276                 err = map->ops->map_delete_elem(map, key);
1277                 rcu_read_unlock();
1278                 bpf_enable_instrumentation();
1279                 maybe_wait_bpf_programs(map);
1280                 if (err)
1281                         break;
1282         }
1283         if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
1284                 err = -EFAULT;
1285
1286         kfree(key);
1287         return err;
1288 }
1289
1290 int generic_map_update_batch(struct bpf_map *map,
1291                              const union bpf_attr *attr,
1292                              union bpf_attr __user *uattr)
1293 {
1294         void __user *values = u64_to_user_ptr(attr->batch.values);
1295         void __user *keys = u64_to_user_ptr(attr->batch.keys);
1296         u32 value_size, cp, max_count;
1297         int ufd = attr->map_fd;
1298         void *key, *value;
1299         struct fd f;
1300         int err = 0;
1301
1302         f = fdget(ufd);
1303         if (attr->batch.elem_flags & ~BPF_F_LOCK)
1304                 return -EINVAL;
1305
1306         if ((attr->batch.elem_flags & BPF_F_LOCK) &&
1307             !map_value_has_spin_lock(map)) {
1308                 return -EINVAL;
1309         }
1310
1311         value_size = bpf_map_value_size(map);
1312
1313         max_count = attr->batch.count;
1314         if (!max_count)
1315                 return 0;
1316
1317         key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
1318         if (!key)
1319                 return -ENOMEM;
1320
1321         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1322         if (!value) {
1323                 kfree(key);
1324                 return -ENOMEM;
1325         }
1326
1327         for (cp = 0; cp < max_count; cp++) {
1328                 err = -EFAULT;
1329                 if (copy_from_user(key, keys + cp * map->key_size,
1330                     map->key_size) ||
1331                     copy_from_user(value, values + cp * value_size, value_size))
1332                         break;
1333
1334                 err = bpf_map_update_value(map, f, key, value,
1335                                            attr->batch.elem_flags);
1336
1337                 if (err)
1338                         break;
1339         }
1340
1341         if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
1342                 err = -EFAULT;
1343
1344         kfree(value);
1345         kfree(key);
1346         return err;
1347 }
1348
1349 #define MAP_LOOKUP_RETRIES 3
1350
1351 int generic_map_lookup_batch(struct bpf_map *map,
1352                                     const union bpf_attr *attr,
1353                                     union bpf_attr __user *uattr)
1354 {
1355         void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
1356         void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
1357         void __user *values = u64_to_user_ptr(attr->batch.values);
1358         void __user *keys = u64_to_user_ptr(attr->batch.keys);
1359         void *buf, *buf_prevkey, *prev_key, *key, *value;
1360         int err, retry = MAP_LOOKUP_RETRIES;
1361         u32 value_size, cp, max_count;
1362
1363         if (attr->batch.elem_flags & ~BPF_F_LOCK)
1364                 return -EINVAL;
1365
1366         if ((attr->batch.elem_flags & BPF_F_LOCK) &&
1367             !map_value_has_spin_lock(map))
1368                 return -EINVAL;
1369
1370         value_size = bpf_map_value_size(map);
1371
1372         max_count = attr->batch.count;
1373         if (!max_count)
1374                 return 0;
1375
1376         if (put_user(0, &uattr->batch.count))
1377                 return -EFAULT;
1378
1379         buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
1380         if (!buf_prevkey)
1381                 return -ENOMEM;
1382
1383         buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
1384         if (!buf) {
1385                 kvfree(buf_prevkey);
1386                 return -ENOMEM;
1387         }
1388
1389         err = -EFAULT;
1390         prev_key = NULL;
1391         if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size))
1392                 goto free_buf;
1393         key = buf;
1394         value = key + map->key_size;
1395         if (ubatch)
1396                 prev_key = buf_prevkey;
1397
1398         for (cp = 0; cp < max_count;) {
1399                 rcu_read_lock();
1400                 err = map->ops->map_get_next_key(map, prev_key, key);
1401                 rcu_read_unlock();
1402                 if (err)
1403                         break;
1404                 err = bpf_map_copy_value(map, key, value,
1405                                          attr->batch.elem_flags);
1406
1407                 if (err == -ENOENT) {
1408                         if (retry) {
1409                                 retry--;
1410                                 continue;
1411                         }
1412                         err = -EINTR;
1413                         break;
1414                 }
1415
1416                 if (err)
1417                         goto free_buf;
1418
1419                 if (copy_to_user(keys + cp * map->key_size, key,
1420                                  map->key_size)) {
1421                         err = -EFAULT;
1422                         goto free_buf;
1423                 }
1424                 if (copy_to_user(values + cp * value_size, value, value_size)) {
1425                         err = -EFAULT;
1426                         goto free_buf;
1427                 }
1428
1429                 if (!prev_key)
1430                         prev_key = buf_prevkey;
1431
1432                 swap(prev_key, key);
1433                 retry = MAP_LOOKUP_RETRIES;
1434                 cp++;
1435         }
1436
1437         if (err == -EFAULT)
1438                 goto free_buf;
1439
1440         if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
1441                     (cp && copy_to_user(uobatch, prev_key, map->key_size))))
1442                 err = -EFAULT;
1443
1444 free_buf:
1445         kfree(buf_prevkey);
1446         kfree(buf);
1447         return err;
1448 }
1449
1450 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
1451
1452 static int map_lookup_and_delete_elem(union bpf_attr *attr)
1453 {
1454         void __user *ukey = u64_to_user_ptr(attr->key);
1455         void __user *uvalue = u64_to_user_ptr(attr->value);
1456         int ufd = attr->map_fd;
1457         struct bpf_map *map;
1458         void *key, *value;
1459         u32 value_size;
1460         struct fd f;
1461         int err;
1462
1463         if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
1464                 return -EINVAL;
1465
1466         f = fdget(ufd);
1467         map = __bpf_map_get(f);
1468         if (IS_ERR(map))
1469                 return PTR_ERR(map);
1470         if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1471                 err = -EPERM;
1472                 goto err_put;
1473         }
1474
1475         key = __bpf_copy_key(ukey, map->key_size);
1476         if (IS_ERR(key)) {
1477                 err = PTR_ERR(key);
1478                 goto err_put;
1479         }
1480
1481         value_size = map->value_size;
1482
1483         err = -ENOMEM;
1484         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1485         if (!value)
1486                 goto free_key;
1487
1488         if (map->map_type == BPF_MAP_TYPE_QUEUE ||
1489             map->map_type == BPF_MAP_TYPE_STACK) {
1490                 err = map->ops->map_pop_elem(map, value);
1491         } else {
1492                 err = -ENOTSUPP;
1493         }
1494
1495         if (err)
1496                 goto free_value;
1497
1498         if (copy_to_user(uvalue, value, value_size) != 0) {
1499                 err = -EFAULT;
1500                 goto free_value;
1501         }
1502
1503         err = 0;
1504
1505 free_value:
1506         kfree(value);
1507 free_key:
1508         kfree(key);
1509 err_put:
1510         fdput(f);
1511         return err;
1512 }
1513
1514 #define BPF_MAP_FREEZE_LAST_FIELD map_fd
1515
1516 static int map_freeze(const union bpf_attr *attr)
1517 {
1518         int err = 0, ufd = attr->map_fd;
1519         struct bpf_map *map;
1520         struct fd f;
1521
1522         if (CHECK_ATTR(BPF_MAP_FREEZE))
1523                 return -EINVAL;
1524
1525         f = fdget(ufd);
1526         map = __bpf_map_get(f);
1527         if (IS_ERR(map))
1528                 return PTR_ERR(map);
1529
1530         if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
1531                 fdput(f);
1532                 return -ENOTSUPP;
1533         }
1534
1535         mutex_lock(&map->freeze_mutex);
1536
1537         if (map->writecnt) {
1538                 err = -EBUSY;
1539                 goto err_put;
1540         }
1541         if (READ_ONCE(map->frozen)) {
1542                 err = -EBUSY;
1543                 goto err_put;
1544         }
1545         if (!capable(CAP_SYS_ADMIN)) {
1546                 err = -EPERM;
1547                 goto err_put;
1548         }
1549
1550         WRITE_ONCE(map->frozen, true);
1551 err_put:
1552         mutex_unlock(&map->freeze_mutex);
1553         fdput(f);
1554         return err;
1555 }
1556
1557 static const struct bpf_prog_ops * const bpf_prog_types[] = {
1558 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
1559         [_id] = & _name ## _prog_ops,
1560 #define BPF_MAP_TYPE(_id, _ops)
1561 #include <linux/bpf_types.h>
1562 #undef BPF_PROG_TYPE
1563 #undef BPF_MAP_TYPE
1564 };
1565
1566 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
1567 {
1568         const struct bpf_prog_ops *ops;
1569
1570         if (type >= ARRAY_SIZE(bpf_prog_types))
1571                 return -EINVAL;
1572         type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
1573         ops = bpf_prog_types[type];
1574         if (!ops)
1575                 return -EINVAL;
1576
1577         if (!bpf_prog_is_dev_bound(prog->aux))
1578                 prog->aux->ops = ops;
1579         else
1580                 prog->aux->ops = &bpf_offload_prog_ops;
1581         prog->type = type;
1582         return 0;
1583 }
1584
1585 enum bpf_audit {
1586         BPF_AUDIT_LOAD,
1587         BPF_AUDIT_UNLOAD,
1588         BPF_AUDIT_MAX,
1589 };
1590
1591 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = {
1592         [BPF_AUDIT_LOAD]   = "LOAD",
1593         [BPF_AUDIT_UNLOAD] = "UNLOAD",
1594 };
1595
1596 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
1597 {
1598         struct audit_context *ctx = NULL;
1599         struct audit_buffer *ab;
1600
1601         if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX))
1602                 return;
1603         if (audit_enabled == AUDIT_OFF)
1604                 return;
1605         if (op == BPF_AUDIT_LOAD)
1606                 ctx = audit_context();
1607         ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
1608         if (unlikely(!ab))
1609                 return;
1610         audit_log_format(ab, "prog-id=%u op=%s",
1611                          prog->aux->id, bpf_audit_str[op]);
1612         audit_log_end(ab);
1613 }
1614
1615 int __bpf_prog_charge(struct user_struct *user, u32 pages)
1616 {
1617         unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1618         unsigned long user_bufs;
1619
1620         if (user) {
1621                 user_bufs = atomic_long_add_return(pages, &user->locked_vm);
1622                 if (user_bufs > memlock_limit) {
1623                         atomic_long_sub(pages, &user->locked_vm);
1624                         return -EPERM;
1625                 }
1626         }
1627
1628         return 0;
1629 }
1630
1631 void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
1632 {
1633         if (user)
1634                 atomic_long_sub(pages, &user->locked_vm);
1635 }
1636
1637 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
1638 {
1639         struct user_struct *user = get_current_user();
1640         int ret;
1641
1642         ret = __bpf_prog_charge(user, prog->pages);
1643         if (ret) {
1644                 free_uid(user);
1645                 return ret;
1646         }
1647
1648         prog->aux->user = user;
1649         return 0;
1650 }
1651
1652 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
1653 {
1654         struct user_struct *user = prog->aux->user;
1655
1656         __bpf_prog_uncharge(user, prog->pages);
1657         free_uid(user);
1658 }
1659
1660 static int bpf_prog_alloc_id(struct bpf_prog *prog)
1661 {
1662         int id;
1663
1664         idr_preload(GFP_KERNEL);
1665         spin_lock_bh(&prog_idr_lock);
1666         id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
1667         if (id > 0)
1668                 prog->aux->id = id;
1669         spin_unlock_bh(&prog_idr_lock);
1670         idr_preload_end();
1671
1672         /* id is in [1, INT_MAX) */
1673         if (WARN_ON_ONCE(!id))
1674                 return -ENOSPC;
1675
1676         return id > 0 ? 0 : id;
1677 }
1678
1679 void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
1680 {
1681         /* cBPF to eBPF migrations are currently not in the idr store.
1682          * Offloaded programs are removed from the store when their device
1683          * disappears - even if someone grabs an fd to them they are unusable,
1684          * simply waiting for refcnt to drop to be freed.
1685          */
1686         if (!prog->aux->id)
1687                 return;
1688
1689         if (do_idr_lock)
1690                 spin_lock_bh(&prog_idr_lock);
1691         else
1692                 __acquire(&prog_idr_lock);
1693
1694         idr_remove(&prog_idr, prog->aux->id);
1695         prog->aux->id = 0;
1696
1697         if (do_idr_lock)
1698                 spin_unlock_bh(&prog_idr_lock);
1699         else
1700                 __release(&prog_idr_lock);
1701 }
1702
1703 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
1704 {
1705         struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
1706
1707         kvfree(aux->func_info);
1708         kfree(aux->func_info_aux);
1709         bpf_prog_uncharge_memlock(aux->prog);
1710         security_bpf_prog_free(aux);
1711         bpf_prog_free(aux->prog);
1712 }
1713
1714 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
1715 {
1716         bpf_prog_kallsyms_del_all(prog);
1717         btf_put(prog->aux->btf);
1718         bpf_prog_free_linfo(prog);
1719
1720         if (deferred)
1721                 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
1722         else
1723                 __bpf_prog_put_rcu(&prog->aux->rcu);
1724 }
1725
1726 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
1727 {
1728         if (atomic64_dec_and_test(&prog->aux->refcnt)) {
1729                 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
1730                 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
1731                 /* bpf_prog_free_id() must be called first */
1732                 bpf_prog_free_id(prog, do_idr_lock);
1733                 __bpf_prog_put_noref(prog, true);
1734         }
1735 }
1736
1737 void bpf_prog_put(struct bpf_prog *prog)
1738 {
1739         __bpf_prog_put(prog, true);
1740 }
1741 EXPORT_SYMBOL_GPL(bpf_prog_put);
1742
1743 static int bpf_prog_release(struct inode *inode, struct file *filp)
1744 {
1745         struct bpf_prog *prog = filp->private_data;
1746
1747         bpf_prog_put(prog);
1748         return 0;
1749 }
1750
1751 static void bpf_prog_get_stats(const struct bpf_prog *prog,
1752                                struct bpf_prog_stats *stats)
1753 {
1754         u64 nsecs = 0, cnt = 0;
1755         int cpu;
1756
1757         for_each_possible_cpu(cpu) {
1758                 const struct bpf_prog_stats *st;
1759                 unsigned int start;
1760                 u64 tnsecs, tcnt;
1761
1762                 st = per_cpu_ptr(prog->aux->stats, cpu);
1763                 do {
1764                         start = u64_stats_fetch_begin_irq(&st->syncp);
1765                         tnsecs = st->nsecs;
1766                         tcnt = st->cnt;
1767                 } while (u64_stats_fetch_retry_irq(&st->syncp, start));
1768                 nsecs += tnsecs;
1769                 cnt += tcnt;
1770         }
1771         stats->nsecs = nsecs;
1772         stats->cnt = cnt;
1773 }
1774
1775 #ifdef CONFIG_PROC_FS
1776 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
1777 {
1778         const struct bpf_prog *prog = filp->private_data;
1779         char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
1780         struct bpf_prog_stats stats;
1781
1782         bpf_prog_get_stats(prog, &stats);
1783         bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
1784         seq_printf(m,
1785                    "prog_type:\t%u\n"
1786                    "prog_jited:\t%u\n"
1787                    "prog_tag:\t%s\n"
1788                    "memlock:\t%llu\n"
1789                    "prog_id:\t%u\n"
1790                    "run_time_ns:\t%llu\n"
1791                    "run_cnt:\t%llu\n",
1792                    prog->type,
1793                    prog->jited,
1794                    prog_tag,
1795                    prog->pages * 1ULL << PAGE_SHIFT,
1796                    prog->aux->id,
1797                    stats.nsecs,
1798                    stats.cnt);
1799 }
1800 #endif
1801
1802 const struct file_operations bpf_prog_fops = {
1803 #ifdef CONFIG_PROC_FS
1804         .show_fdinfo    = bpf_prog_show_fdinfo,
1805 #endif
1806         .release        = bpf_prog_release,
1807         .read           = bpf_dummy_read,
1808         .write          = bpf_dummy_write,
1809 };
1810
1811 int bpf_prog_new_fd(struct bpf_prog *prog)
1812 {
1813         int ret;
1814
1815         ret = security_bpf_prog(prog);
1816         if (ret < 0)
1817                 return ret;
1818
1819         return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
1820                                 O_RDWR | O_CLOEXEC);
1821 }
1822
1823 static struct bpf_prog *____bpf_prog_get(struct fd f)
1824 {
1825         if (!f.file)
1826                 return ERR_PTR(-EBADF);
1827         if (f.file->f_op != &bpf_prog_fops) {
1828                 fdput(f);
1829                 return ERR_PTR(-EINVAL);
1830         }
1831
1832         return f.file->private_data;
1833 }
1834
1835 void bpf_prog_add(struct bpf_prog *prog, int i)
1836 {
1837         atomic64_add(i, &prog->aux->refcnt);
1838 }
1839 EXPORT_SYMBOL_GPL(bpf_prog_add);
1840
1841 void bpf_prog_sub(struct bpf_prog *prog, int i)
1842 {
1843         /* Only to be used for undoing previous bpf_prog_add() in some
1844          * error path. We still know that another entity in our call
1845          * path holds a reference to the program, thus atomic_sub() can
1846          * be safely used in such cases!
1847          */
1848         WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0);
1849 }
1850 EXPORT_SYMBOL_GPL(bpf_prog_sub);
1851
1852 void bpf_prog_inc(struct bpf_prog *prog)
1853 {
1854         atomic64_inc(&prog->aux->refcnt);
1855 }
1856 EXPORT_SYMBOL_GPL(bpf_prog_inc);
1857
1858 /* prog_idr_lock should have been held */
1859 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
1860 {
1861         int refold;
1862
1863         refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0);
1864
1865         if (!refold)
1866                 return ERR_PTR(-ENOENT);
1867
1868         return prog;
1869 }
1870 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
1871
1872 bool bpf_prog_get_ok(struct bpf_prog *prog,
1873                             enum bpf_prog_type *attach_type, bool attach_drv)
1874 {
1875         /* not an attachment, just a refcount inc, always allow */
1876         if (!attach_type)
1877                 return true;
1878
1879         if (prog->type != *attach_type)
1880                 return false;
1881         if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv)
1882                 return false;
1883
1884         return true;
1885 }
1886
1887 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
1888                                        bool attach_drv)
1889 {
1890         struct fd f = fdget(ufd);
1891         struct bpf_prog *prog;
1892
1893         prog = ____bpf_prog_get(f);
1894         if (IS_ERR(prog))
1895                 return prog;
1896         if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
1897                 prog = ERR_PTR(-EINVAL);
1898                 goto out;
1899         }
1900
1901         bpf_prog_inc(prog);
1902 out:
1903         fdput(f);
1904         return prog;
1905 }
1906
1907 struct bpf_prog *bpf_prog_get(u32 ufd)
1908 {
1909         return __bpf_prog_get(ufd, NULL, false);
1910 }
1911
1912 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
1913                                        bool attach_drv)
1914 {
1915         return __bpf_prog_get(ufd, &type, attach_drv);
1916 }
1917 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
1918
1919 /* Initially all BPF programs could be loaded w/o specifying
1920  * expected_attach_type. Later for some of them specifying expected_attach_type
1921  * at load time became required so that program could be validated properly.
1922  * Programs of types that are allowed to be loaded both w/ and w/o (for
1923  * backward compatibility) expected_attach_type, should have the default attach
1924  * type assigned to expected_attach_type for the latter case, so that it can be
1925  * validated later at attach time.
1926  *
1927  * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
1928  * prog type requires it but has some attach types that have to be backward
1929  * compatible.
1930  */
1931 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
1932 {
1933         switch (attr->prog_type) {
1934         case BPF_PROG_TYPE_CGROUP_SOCK:
1935                 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
1936                  * exist so checking for non-zero is the way to go here.
1937                  */
1938                 if (!attr->expected_attach_type)
1939                         attr->expected_attach_type =
1940                                 BPF_CGROUP_INET_SOCK_CREATE;
1941                 break;
1942         }
1943 }
1944
1945 static int
1946 bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
1947                            enum bpf_attach_type expected_attach_type,
1948                            u32 btf_id, u32 prog_fd)
1949 {
1950         if (btf_id) {
1951                 if (btf_id > BTF_MAX_TYPE)
1952                         return -EINVAL;
1953
1954                 switch (prog_type) {
1955                 case BPF_PROG_TYPE_TRACING:
1956                 case BPF_PROG_TYPE_LSM:
1957                 case BPF_PROG_TYPE_STRUCT_OPS:
1958                 case BPF_PROG_TYPE_EXT:
1959                         break;
1960                 default:
1961                         return -EINVAL;
1962                 }
1963         }
1964
1965         if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING &&
1966             prog_type != BPF_PROG_TYPE_EXT)
1967                 return -EINVAL;
1968
1969         switch (prog_type) {
1970         case BPF_PROG_TYPE_CGROUP_SOCK:
1971                 switch (expected_attach_type) {
1972                 case BPF_CGROUP_INET_SOCK_CREATE:
1973                 case BPF_CGROUP_INET4_POST_BIND:
1974                 case BPF_CGROUP_INET6_POST_BIND:
1975                         return 0;
1976                 default:
1977                         return -EINVAL;
1978                 }
1979         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
1980                 switch (expected_attach_type) {
1981                 case BPF_CGROUP_INET4_BIND:
1982                 case BPF_CGROUP_INET6_BIND:
1983                 case BPF_CGROUP_INET4_CONNECT:
1984                 case BPF_CGROUP_INET6_CONNECT:
1985                 case BPF_CGROUP_UDP4_SENDMSG:
1986                 case BPF_CGROUP_UDP6_SENDMSG:
1987                 case BPF_CGROUP_UDP4_RECVMSG:
1988                 case BPF_CGROUP_UDP6_RECVMSG:
1989                         return 0;
1990                 default:
1991                         return -EINVAL;
1992                 }
1993         case BPF_PROG_TYPE_CGROUP_SKB:
1994                 switch (expected_attach_type) {
1995                 case BPF_CGROUP_INET_INGRESS:
1996                 case BPF_CGROUP_INET_EGRESS:
1997                         return 0;
1998                 default:
1999                         return -EINVAL;
2000                 }
2001         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2002                 switch (expected_attach_type) {
2003                 case BPF_CGROUP_SETSOCKOPT:
2004                 case BPF_CGROUP_GETSOCKOPT:
2005                         return 0;
2006                 default:
2007                         return -EINVAL;
2008                 }
2009         case BPF_PROG_TYPE_EXT:
2010                 if (expected_attach_type)
2011                         return -EINVAL;
2012                 /* fallthrough */
2013         default:
2014                 return 0;
2015         }
2016 }
2017
2018 /* last field in 'union bpf_attr' used by this command */
2019 #define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
2020
2021 static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
2022 {
2023         enum bpf_prog_type type = attr->prog_type;
2024         struct bpf_prog *prog;
2025         int err;
2026         char license[128];
2027         bool is_gpl;
2028
2029         if (CHECK_ATTR(BPF_PROG_LOAD))
2030                 return -EINVAL;
2031
2032         if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
2033                                  BPF_F_ANY_ALIGNMENT |
2034                                  BPF_F_TEST_STATE_FREQ |
2035                                  BPF_F_TEST_RND_HI32))
2036                 return -EINVAL;
2037
2038         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
2039             (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
2040             !capable(CAP_SYS_ADMIN))
2041                 return -EPERM;
2042
2043         /* copy eBPF program license from user space */
2044         if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
2045                               sizeof(license) - 1) < 0)
2046                 return -EFAULT;
2047         license[sizeof(license) - 1] = 0;
2048
2049         /* eBPF programs must be GPL compatible to use GPL-ed functions */
2050         is_gpl = license_is_gpl_compatible(license);
2051
2052         if (attr->insn_cnt == 0 ||
2053             attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
2054                 return -E2BIG;
2055         if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
2056             type != BPF_PROG_TYPE_CGROUP_SKB &&
2057             !capable(CAP_SYS_ADMIN))
2058                 return -EPERM;
2059
2060         bpf_prog_load_fixup_attach_type(attr);
2061         if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
2062                                        attr->attach_btf_id,
2063                                        attr->attach_prog_fd))
2064                 return -EINVAL;
2065
2066         /* plain bpf_prog allocation */
2067         prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
2068         if (!prog)
2069                 return -ENOMEM;
2070
2071         prog->expected_attach_type = attr->expected_attach_type;
2072         prog->aux->attach_btf_id = attr->attach_btf_id;
2073         if (attr->attach_prog_fd) {
2074                 struct bpf_prog *tgt_prog;
2075
2076                 tgt_prog = bpf_prog_get(attr->attach_prog_fd);
2077                 if (IS_ERR(tgt_prog)) {
2078                         err = PTR_ERR(tgt_prog);
2079                         goto free_prog_nouncharge;
2080                 }
2081                 prog->aux->linked_prog = tgt_prog;
2082         }
2083
2084         prog->aux->offload_requested = !!attr->prog_ifindex;
2085
2086         err = security_bpf_prog_alloc(prog->aux);
2087         if (err)
2088                 goto free_prog_nouncharge;
2089
2090         err = bpf_prog_charge_memlock(prog);
2091         if (err)
2092                 goto free_prog_sec;
2093
2094         prog->len = attr->insn_cnt;
2095
2096         err = -EFAULT;
2097         if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
2098                            bpf_prog_insn_size(prog)) != 0)
2099                 goto free_prog;
2100
2101         prog->orig_prog = NULL;
2102         prog->jited = 0;
2103
2104         atomic64_set(&prog->aux->refcnt, 1);
2105         prog->gpl_compatible = is_gpl ? 1 : 0;
2106
2107         if (bpf_prog_is_dev_bound(prog->aux)) {
2108                 err = bpf_prog_offload_init(prog, attr);
2109                 if (err)
2110                         goto free_prog;
2111         }
2112
2113         /* find program type: socket_filter vs tracing_filter */
2114         err = find_prog_type(type, prog);
2115         if (err < 0)
2116                 goto free_prog;
2117
2118         prog->aux->load_time = ktime_get_boottime_ns();
2119         err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
2120                                sizeof(attr->prog_name));
2121         if (err < 0)
2122                 goto free_prog;
2123
2124         /* run eBPF verifier */
2125         err = bpf_check(&prog, attr, uattr);
2126         if (err < 0)
2127                 goto free_used_maps;
2128
2129         prog = bpf_prog_select_runtime(prog, &err);
2130         if (err < 0)
2131                 goto free_used_maps;
2132
2133         err = bpf_prog_alloc_id(prog);
2134         if (err)
2135                 goto free_used_maps;
2136
2137         /* Upon success of bpf_prog_alloc_id(), the BPF prog is
2138          * effectively publicly exposed. However, retrieving via
2139          * bpf_prog_get_fd_by_id() will take another reference,
2140          * therefore it cannot be gone underneath us.
2141          *
2142          * Only for the time /after/ successful bpf_prog_new_fd()
2143          * and before returning to userspace, we might just hold
2144          * one reference and any parallel close on that fd could
2145          * rip everything out. Hence, below notifications must
2146          * happen before bpf_prog_new_fd().
2147          *
2148          * Also, any failure handling from this point onwards must
2149          * be using bpf_prog_put() given the program is exposed.
2150          */
2151         bpf_prog_kallsyms_add(prog);
2152         perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
2153         bpf_audit_prog(prog, BPF_AUDIT_LOAD);
2154
2155         err = bpf_prog_new_fd(prog);
2156         if (err < 0)
2157                 bpf_prog_put(prog);
2158         return err;
2159
2160 free_used_maps:
2161         /* In case we have subprogs, we need to wait for a grace
2162          * period before we can tear down JIT memory since symbols
2163          * are already exposed under kallsyms.
2164          */
2165         __bpf_prog_put_noref(prog, prog->aux->func_cnt);
2166         return err;
2167 free_prog:
2168         bpf_prog_uncharge_memlock(prog);
2169 free_prog_sec:
2170         security_bpf_prog_free(prog->aux);
2171 free_prog_nouncharge:
2172         bpf_prog_free(prog);
2173         return err;
2174 }
2175
2176 #define BPF_OBJ_LAST_FIELD file_flags
2177
2178 static int bpf_obj_pin(const union bpf_attr *attr)
2179 {
2180         if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
2181                 return -EINVAL;
2182
2183         return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
2184 }
2185
2186 static int bpf_obj_get(const union bpf_attr *attr)
2187 {
2188         if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
2189             attr->file_flags & ~BPF_OBJ_FLAG_MASK)
2190                 return -EINVAL;
2191
2192         return bpf_obj_get_user(u64_to_user_ptr(attr->pathname),
2193                                 attr->file_flags);
2194 }
2195
2196 void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
2197                    struct bpf_prog *prog)
2198 {
2199         atomic64_set(&link->refcnt, 1);
2200         link->ops = ops;
2201         link->prog = prog;
2202 }
2203
2204 /* Clean up bpf_link and corresponding anon_inode file and FD. After
2205  * anon_inode is created, bpf_link can't be just kfree()'d due to deferred
2206  * anon_inode's release() call. This helper manages marking bpf_link as
2207  * defunct, releases anon_inode file and puts reserved FD.
2208  */
2209 void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
2210                       int link_fd)
2211 {
2212         link->prog = NULL;
2213         fput(link_file);
2214         put_unused_fd(link_fd);
2215 }
2216
2217 void bpf_link_inc(struct bpf_link *link)
2218 {
2219         atomic64_inc(&link->refcnt);
2220 }
2221
2222 /* bpf_link_free is guaranteed to be called from process context */
2223 static void bpf_link_free(struct bpf_link *link)
2224 {
2225         if (link->prog) {
2226                 /* detach BPF program, clean up used resources */
2227                 link->ops->release(link);
2228                 bpf_prog_put(link->prog);
2229         }
2230         /* free bpf_link and its containing memory */
2231         link->ops->dealloc(link);
2232 }
2233
2234 static void bpf_link_put_deferred(struct work_struct *work)
2235 {
2236         struct bpf_link *link = container_of(work, struct bpf_link, work);
2237
2238         bpf_link_free(link);
2239 }
2240
2241 /* bpf_link_put can be called from atomic context, but ensures that resources
2242  * are freed from process context
2243  */
2244 void bpf_link_put(struct bpf_link *link)
2245 {
2246         if (!atomic64_dec_and_test(&link->refcnt))
2247                 return;
2248
2249         if (in_atomic()) {
2250                 INIT_WORK(&link->work, bpf_link_put_deferred);
2251                 schedule_work(&link->work);
2252         } else {
2253                 bpf_link_free(link);
2254         }
2255 }
2256
2257 static int bpf_link_release(struct inode *inode, struct file *filp)
2258 {
2259         struct bpf_link *link = filp->private_data;
2260
2261         bpf_link_put(link);
2262         return 0;
2263 }
2264
2265 #ifdef CONFIG_PROC_FS
2266 static const struct bpf_link_ops bpf_raw_tp_lops;
2267 static const struct bpf_link_ops bpf_tracing_link_lops;
2268
2269 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
2270 {
2271         const struct bpf_link *link = filp->private_data;
2272         const struct bpf_prog *prog = link->prog;
2273         char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
2274         const char *link_type;
2275
2276         if (link->ops == &bpf_raw_tp_lops)
2277                 link_type = "raw_tracepoint";
2278         else if (link->ops == &bpf_tracing_link_lops)
2279                 link_type = "tracing";
2280 #ifdef CONFIG_CGROUP_BPF
2281         else if (link->ops == &bpf_cgroup_link_lops)
2282                 link_type = "cgroup";
2283 #endif
2284         else
2285                 link_type = "unknown";
2286
2287         bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
2288         seq_printf(m,
2289                    "link_type:\t%s\n"
2290                    "prog_tag:\t%s\n"
2291                    "prog_id:\t%u\n",
2292                    link_type,
2293                    prog_tag,
2294                    prog->aux->id);
2295 }
2296 #endif
2297
2298 static const struct file_operations bpf_link_fops = {
2299 #ifdef CONFIG_PROC_FS
2300         .show_fdinfo    = bpf_link_show_fdinfo,
2301 #endif
2302         .release        = bpf_link_release,
2303         .read           = bpf_dummy_read,
2304         .write          = bpf_dummy_write,
2305 };
2306
2307 int bpf_link_new_fd(struct bpf_link *link)
2308 {
2309         return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
2310 }
2311
2312 /* Similar to bpf_link_new_fd, create anon_inode for given bpf_link, but
2313  * instead of immediately installing fd in fdtable, just reserve it and
2314  * return. Caller then need to either install it with fd_install(fd, file) or
2315  * release with put_unused_fd(fd).
2316  * This is useful for cases when bpf_link attachment/detachment are
2317  * complicated and expensive operations and should be delayed until all the fd
2318  * reservation and anon_inode creation succeeds.
2319  */
2320 struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd)
2321 {
2322         struct file *file;
2323         int fd;
2324
2325         fd = get_unused_fd_flags(O_CLOEXEC);
2326         if (fd < 0)
2327                 return ERR_PTR(fd);
2328
2329         file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
2330         if (IS_ERR(file)) {
2331                 put_unused_fd(fd);
2332                 return file;
2333         }
2334
2335         *reserved_fd = fd;
2336         return file;
2337 }
2338
2339 struct bpf_link *bpf_link_get_from_fd(u32 ufd)
2340 {
2341         struct fd f = fdget(ufd);
2342         struct bpf_link *link;
2343
2344         if (!f.file)
2345                 return ERR_PTR(-EBADF);
2346         if (f.file->f_op != &bpf_link_fops) {
2347                 fdput(f);
2348                 return ERR_PTR(-EINVAL);
2349         }
2350
2351         link = f.file->private_data;
2352         bpf_link_inc(link);
2353         fdput(f);
2354
2355         return link;
2356 }
2357
2358 struct bpf_tracing_link {
2359         struct bpf_link link;
2360 };
2361
2362 static void bpf_tracing_link_release(struct bpf_link *link)
2363 {
2364         WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
2365 }
2366
2367 static void bpf_tracing_link_dealloc(struct bpf_link *link)
2368 {
2369         struct bpf_tracing_link *tr_link =
2370                 container_of(link, struct bpf_tracing_link, link);
2371
2372         kfree(tr_link);
2373 }
2374
2375 static const struct bpf_link_ops bpf_tracing_link_lops = {
2376         .release = bpf_tracing_link_release,
2377         .dealloc = bpf_tracing_link_dealloc,
2378 };
2379
2380 static int bpf_tracing_prog_attach(struct bpf_prog *prog)
2381 {
2382         struct bpf_tracing_link *link;
2383         struct file *link_file;
2384         int link_fd, err;
2385
2386         switch (prog->type) {
2387         case BPF_PROG_TYPE_TRACING:
2388                 if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
2389                     prog->expected_attach_type != BPF_TRACE_FEXIT &&
2390                     prog->expected_attach_type != BPF_MODIFY_RETURN) {
2391                         err = -EINVAL;
2392                         goto out_put_prog;
2393                 }
2394                 break;
2395         case BPF_PROG_TYPE_EXT:
2396                 if (prog->expected_attach_type != 0) {
2397                         err = -EINVAL;
2398                         goto out_put_prog;
2399                 }
2400                 break;
2401         case BPF_PROG_TYPE_LSM:
2402                 if (prog->expected_attach_type != BPF_LSM_MAC) {
2403                         err = -EINVAL;
2404                         goto out_put_prog;
2405                 }
2406                 break;
2407         default:
2408                 err = -EINVAL;
2409                 goto out_put_prog;
2410         }
2411
2412         link = kzalloc(sizeof(*link), GFP_USER);
2413         if (!link) {
2414                 err = -ENOMEM;
2415                 goto out_put_prog;
2416         }
2417         bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
2418
2419         link_file = bpf_link_new_file(&link->link, &link_fd);
2420         if (IS_ERR(link_file)) {
2421                 kfree(link);
2422                 err = PTR_ERR(link_file);
2423                 goto out_put_prog;
2424         }
2425
2426         err = bpf_trampoline_link_prog(prog);
2427         if (err) {
2428                 bpf_link_cleanup(&link->link, link_file, link_fd);
2429                 goto out_put_prog;
2430         }
2431
2432         fd_install(link_fd, link_file);
2433         return link_fd;
2434
2435 out_put_prog:
2436         bpf_prog_put(prog);
2437         return err;
2438 }
2439
2440 struct bpf_raw_tp_link {
2441         struct bpf_link link;
2442         struct bpf_raw_event_map *btp;
2443 };
2444
2445 static void bpf_raw_tp_link_release(struct bpf_link *link)
2446 {
2447         struct bpf_raw_tp_link *raw_tp =
2448                 container_of(link, struct bpf_raw_tp_link, link);
2449
2450         bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
2451         bpf_put_raw_tracepoint(raw_tp->btp);
2452 }
2453
2454 static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
2455 {
2456         struct bpf_raw_tp_link *raw_tp =
2457                 container_of(link, struct bpf_raw_tp_link, link);
2458
2459         kfree(raw_tp);
2460 }
2461
2462 static const struct bpf_link_ops bpf_raw_tp_lops = {
2463         .release = bpf_raw_tp_link_release,
2464         .dealloc = bpf_raw_tp_link_dealloc,
2465 };
2466
2467 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
2468
2469 static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
2470 {
2471         struct bpf_raw_tp_link *link;
2472         struct bpf_raw_event_map *btp;
2473         struct file *link_file;
2474         struct bpf_prog *prog;
2475         const char *tp_name;
2476         char buf[128];
2477         int link_fd, err;
2478
2479         if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
2480                 return -EINVAL;
2481
2482         prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
2483         if (IS_ERR(prog))
2484                 return PTR_ERR(prog);
2485
2486         switch (prog->type) {
2487         case BPF_PROG_TYPE_TRACING:
2488         case BPF_PROG_TYPE_EXT:
2489         case BPF_PROG_TYPE_LSM:
2490                 if (attr->raw_tracepoint.name) {
2491                         /* The attach point for this category of programs
2492                          * should be specified via btf_id during program load.
2493                          */
2494                         err = -EINVAL;
2495                         goto out_put_prog;
2496                 }
2497                 if (prog->type == BPF_PROG_TYPE_TRACING &&
2498                     prog->expected_attach_type == BPF_TRACE_RAW_TP) {
2499                         tp_name = prog->aux->attach_func_name;
2500                         break;
2501                 }
2502                 return bpf_tracing_prog_attach(prog);
2503         case BPF_PROG_TYPE_RAW_TRACEPOINT:
2504         case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
2505                 if (strncpy_from_user(buf,
2506                                       u64_to_user_ptr(attr->raw_tracepoint.name),
2507                                       sizeof(buf) - 1) < 0) {
2508                         err = -EFAULT;
2509                         goto out_put_prog;
2510                 }
2511                 buf[sizeof(buf) - 1] = 0;
2512                 tp_name = buf;
2513                 break;
2514         default:
2515                 err = -EINVAL;
2516                 goto out_put_prog;
2517         }
2518
2519         btp = bpf_get_raw_tracepoint(tp_name);
2520         if (!btp) {
2521                 err = -ENOENT;
2522                 goto out_put_prog;
2523         }
2524
2525         link = kzalloc(sizeof(*link), GFP_USER);
2526         if (!link) {
2527                 err = -ENOMEM;
2528                 goto out_put_btp;
2529         }
2530         bpf_link_init(&link->link, &bpf_raw_tp_lops, prog);
2531         link->btp = btp;
2532
2533         link_file = bpf_link_new_file(&link->link, &link_fd);
2534         if (IS_ERR(link_file)) {
2535                 kfree(link);
2536                 err = PTR_ERR(link_file);
2537                 goto out_put_btp;
2538         }
2539
2540         err = bpf_probe_register(link->btp, prog);
2541         if (err) {
2542                 bpf_link_cleanup(&link->link, link_file, link_fd);
2543                 goto out_put_btp;
2544         }
2545
2546         fd_install(link_fd, link_file);
2547         return link_fd;
2548
2549 out_put_btp:
2550         bpf_put_raw_tracepoint(btp);
2551 out_put_prog:
2552         bpf_prog_put(prog);
2553         return err;
2554 }
2555
2556 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
2557                                              enum bpf_attach_type attach_type)
2558 {
2559         switch (prog->type) {
2560         case BPF_PROG_TYPE_CGROUP_SOCK:
2561         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2562         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2563                 return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
2564         case BPF_PROG_TYPE_CGROUP_SKB:
2565                 return prog->enforce_expected_attach_type &&
2566                         prog->expected_attach_type != attach_type ?
2567                         -EINVAL : 0;
2568         default:
2569                 return 0;
2570         }
2571 }
2572
2573 static enum bpf_prog_type
2574 attach_type_to_prog_type(enum bpf_attach_type attach_type)
2575 {
2576         switch (attach_type) {
2577         case BPF_CGROUP_INET_INGRESS:
2578         case BPF_CGROUP_INET_EGRESS:
2579                 return BPF_PROG_TYPE_CGROUP_SKB;
2580                 break;
2581         case BPF_CGROUP_INET_SOCK_CREATE:
2582         case BPF_CGROUP_INET4_POST_BIND:
2583         case BPF_CGROUP_INET6_POST_BIND:
2584                 return BPF_PROG_TYPE_CGROUP_SOCK;
2585         case BPF_CGROUP_INET4_BIND:
2586         case BPF_CGROUP_INET6_BIND:
2587         case BPF_CGROUP_INET4_CONNECT:
2588         case BPF_CGROUP_INET6_CONNECT:
2589         case BPF_CGROUP_UDP4_SENDMSG:
2590         case BPF_CGROUP_UDP6_SENDMSG:
2591         case BPF_CGROUP_UDP4_RECVMSG:
2592         case BPF_CGROUP_UDP6_RECVMSG:
2593                 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
2594         case BPF_CGROUP_SOCK_OPS:
2595                 return BPF_PROG_TYPE_SOCK_OPS;
2596         case BPF_CGROUP_DEVICE:
2597                 return BPF_PROG_TYPE_CGROUP_DEVICE;
2598         case BPF_SK_MSG_VERDICT:
2599                 return BPF_PROG_TYPE_SK_MSG;
2600         case BPF_SK_SKB_STREAM_PARSER:
2601         case BPF_SK_SKB_STREAM_VERDICT:
2602                 return BPF_PROG_TYPE_SK_SKB;
2603         case BPF_LIRC_MODE2:
2604                 return BPF_PROG_TYPE_LIRC_MODE2;
2605         case BPF_FLOW_DISSECTOR:
2606                 return BPF_PROG_TYPE_FLOW_DISSECTOR;
2607         case BPF_CGROUP_SYSCTL:
2608                 return BPF_PROG_TYPE_CGROUP_SYSCTL;
2609         case BPF_CGROUP_GETSOCKOPT:
2610         case BPF_CGROUP_SETSOCKOPT:
2611                 return BPF_PROG_TYPE_CGROUP_SOCKOPT;
2612         default:
2613                 return BPF_PROG_TYPE_UNSPEC;
2614         }
2615 }
2616
2617 #define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
2618
2619 #define BPF_F_ATTACH_MASK \
2620         (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
2621
2622 static int bpf_prog_attach(const union bpf_attr *attr)
2623 {
2624         enum bpf_prog_type ptype;
2625         struct bpf_prog *prog;
2626         int ret;
2627
2628         if (!capable(CAP_NET_ADMIN))
2629                 return -EPERM;
2630
2631         if (CHECK_ATTR(BPF_PROG_ATTACH))
2632                 return -EINVAL;
2633
2634         if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
2635                 return -EINVAL;
2636
2637         ptype = attach_type_to_prog_type(attr->attach_type);
2638         if (ptype == BPF_PROG_TYPE_UNSPEC)
2639                 return -EINVAL;
2640
2641         prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
2642         if (IS_ERR(prog))
2643                 return PTR_ERR(prog);
2644
2645         if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
2646                 bpf_prog_put(prog);
2647                 return -EINVAL;
2648         }
2649
2650         switch (ptype) {
2651         case BPF_PROG_TYPE_SK_SKB:
2652         case BPF_PROG_TYPE_SK_MSG:
2653                 ret = sock_map_get_from_fd(attr, prog);
2654                 break;
2655         case BPF_PROG_TYPE_LIRC_MODE2:
2656                 ret = lirc_prog_attach(attr, prog);
2657                 break;
2658         case BPF_PROG_TYPE_FLOW_DISSECTOR:
2659                 ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
2660                 break;
2661         case BPF_PROG_TYPE_CGROUP_DEVICE:
2662         case BPF_PROG_TYPE_CGROUP_SKB:
2663         case BPF_PROG_TYPE_CGROUP_SOCK:
2664         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2665         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2666         case BPF_PROG_TYPE_CGROUP_SYSCTL:
2667         case BPF_PROG_TYPE_SOCK_OPS:
2668                 ret = cgroup_bpf_prog_attach(attr, ptype, prog);
2669                 break;
2670         default:
2671                 ret = -EINVAL;
2672         }
2673
2674         if (ret)
2675                 bpf_prog_put(prog);
2676         return ret;
2677 }
2678
2679 #define BPF_PROG_DETACH_LAST_FIELD attach_type
2680
2681 static int bpf_prog_detach(const union bpf_attr *attr)
2682 {
2683         enum bpf_prog_type ptype;
2684
2685         if (!capable(CAP_NET_ADMIN))
2686                 return -EPERM;
2687
2688         if (CHECK_ATTR(BPF_PROG_DETACH))
2689                 return -EINVAL;
2690
2691         ptype = attach_type_to_prog_type(attr->attach_type);
2692
2693         switch (ptype) {
2694         case BPF_PROG_TYPE_SK_MSG:
2695         case BPF_PROG_TYPE_SK_SKB:
2696                 return sock_map_get_from_fd(attr, NULL);
2697         case BPF_PROG_TYPE_LIRC_MODE2:
2698                 return lirc_prog_detach(attr);
2699         case BPF_PROG_TYPE_FLOW_DISSECTOR:
2700                 return skb_flow_dissector_bpf_prog_detach(attr);
2701         case BPF_PROG_TYPE_CGROUP_DEVICE:
2702         case BPF_PROG_TYPE_CGROUP_SKB:
2703         case BPF_PROG_TYPE_CGROUP_SOCK:
2704         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2705         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2706         case BPF_PROG_TYPE_CGROUP_SYSCTL:
2707         case BPF_PROG_TYPE_SOCK_OPS:
2708                 return cgroup_bpf_prog_detach(attr, ptype);
2709         default:
2710                 return -EINVAL;
2711         }
2712 }
2713
2714 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
2715
2716 static int bpf_prog_query(const union bpf_attr *attr,
2717                           union bpf_attr __user *uattr)
2718 {
2719         if (!capable(CAP_NET_ADMIN))
2720                 return -EPERM;
2721         if (CHECK_ATTR(BPF_PROG_QUERY))
2722                 return -EINVAL;
2723         if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE)
2724                 return -EINVAL;
2725
2726         switch (attr->query.attach_type) {
2727         case BPF_CGROUP_INET_INGRESS:
2728         case BPF_CGROUP_INET_EGRESS:
2729         case BPF_CGROUP_INET_SOCK_CREATE:
2730         case BPF_CGROUP_INET4_BIND:
2731         case BPF_CGROUP_INET6_BIND:
2732         case BPF_CGROUP_INET4_POST_BIND:
2733         case BPF_CGROUP_INET6_POST_BIND:
2734         case BPF_CGROUP_INET4_CONNECT:
2735         case BPF_CGROUP_INET6_CONNECT:
2736         case BPF_CGROUP_UDP4_SENDMSG:
2737         case BPF_CGROUP_UDP6_SENDMSG:
2738         case BPF_CGROUP_UDP4_RECVMSG:
2739         case BPF_CGROUP_UDP6_RECVMSG:
2740         case BPF_CGROUP_SOCK_OPS:
2741         case BPF_CGROUP_DEVICE:
2742         case BPF_CGROUP_SYSCTL:
2743         case BPF_CGROUP_GETSOCKOPT:
2744         case BPF_CGROUP_SETSOCKOPT:
2745                 return cgroup_bpf_prog_query(attr, uattr);
2746         case BPF_LIRC_MODE2:
2747                 return lirc_prog_query(attr, uattr);
2748         case BPF_FLOW_DISSECTOR:
2749                 return skb_flow_dissector_prog_query(attr, uattr);
2750         default:
2751                 return -EINVAL;
2752         }
2753 }
2754
2755 #define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
2756
2757 static int bpf_prog_test_run(const union bpf_attr *attr,
2758                              union bpf_attr __user *uattr)
2759 {
2760         struct bpf_prog *prog;
2761         int ret = -ENOTSUPP;
2762
2763         if (!capable(CAP_SYS_ADMIN))
2764                 return -EPERM;
2765         if (CHECK_ATTR(BPF_PROG_TEST_RUN))
2766                 return -EINVAL;
2767
2768         if ((attr->test.ctx_size_in && !attr->test.ctx_in) ||
2769             (!attr->test.ctx_size_in && attr->test.ctx_in))
2770                 return -EINVAL;
2771
2772         if ((attr->test.ctx_size_out && !attr->test.ctx_out) ||
2773             (!attr->test.ctx_size_out && attr->test.ctx_out))
2774                 return -EINVAL;
2775
2776         prog = bpf_prog_get(attr->test.prog_fd);
2777         if (IS_ERR(prog))
2778                 return PTR_ERR(prog);
2779
2780         if (prog->aux->ops->test_run)
2781                 ret = prog->aux->ops->test_run(prog, attr, uattr);
2782
2783         bpf_prog_put(prog);
2784         return ret;
2785 }
2786
2787 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
2788
2789 static int bpf_obj_get_next_id(const union bpf_attr *attr,
2790                                union bpf_attr __user *uattr,
2791                                struct idr *idr,
2792                                spinlock_t *lock)
2793 {
2794         u32 next_id = attr->start_id;
2795         int err = 0;
2796
2797         if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
2798                 return -EINVAL;
2799
2800         if (!capable(CAP_SYS_ADMIN))
2801                 return -EPERM;
2802
2803         next_id++;
2804         spin_lock_bh(lock);
2805         if (!idr_get_next(idr, &next_id))
2806                 err = -ENOENT;
2807         spin_unlock_bh(lock);
2808
2809         if (!err)
2810                 err = put_user(next_id, &uattr->next_id);
2811
2812         return err;
2813 }
2814
2815 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
2816
2817 struct bpf_prog *bpf_prog_by_id(u32 id)
2818 {
2819         struct bpf_prog *prog;
2820
2821         if (!id)
2822                 return ERR_PTR(-ENOENT);
2823
2824         spin_lock_bh(&prog_idr_lock);
2825         prog = idr_find(&prog_idr, id);
2826         if (prog)
2827                 prog = bpf_prog_inc_not_zero(prog);
2828         else
2829                 prog = ERR_PTR(-ENOENT);
2830         spin_unlock_bh(&prog_idr_lock);
2831         return prog;
2832 }
2833
2834 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
2835 {
2836         struct bpf_prog *prog;
2837         u32 id = attr->prog_id;
2838         int fd;
2839
2840         if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
2841                 return -EINVAL;
2842
2843         if (!capable(CAP_SYS_ADMIN))
2844                 return -EPERM;
2845
2846         prog = bpf_prog_by_id(id);
2847         if (IS_ERR(prog))
2848                 return PTR_ERR(prog);
2849
2850         fd = bpf_prog_new_fd(prog);
2851         if (fd < 0)
2852                 bpf_prog_put(prog);
2853
2854         return fd;
2855 }
2856
2857 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
2858
2859 static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
2860 {
2861         struct bpf_map *map;
2862         u32 id = attr->map_id;
2863         int f_flags;
2864         int fd;
2865
2866         if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) ||
2867             attr->open_flags & ~BPF_OBJ_FLAG_MASK)
2868                 return -EINVAL;
2869
2870         if (!capable(CAP_SYS_ADMIN))
2871                 return -EPERM;
2872
2873         f_flags = bpf_get_file_flag(attr->open_flags);
2874         if (f_flags < 0)
2875                 return f_flags;
2876
2877         spin_lock_bh(&map_idr_lock);
2878         map = idr_find(&map_idr, id);
2879         if (map)
2880                 map = __bpf_map_inc_not_zero(map, true);
2881         else
2882                 map = ERR_PTR(-ENOENT);
2883         spin_unlock_bh(&map_idr_lock);
2884
2885         if (IS_ERR(map))
2886                 return PTR_ERR(map);
2887
2888         fd = bpf_map_new_fd(map, f_flags);
2889         if (fd < 0)
2890                 bpf_map_put_with_uref(map);
2891
2892         return fd;
2893 }
2894
2895 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
2896                                               unsigned long addr, u32 *off,
2897                                               u32 *type)
2898 {
2899         const struct bpf_map *map;
2900         int i;
2901
2902         for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
2903                 map = prog->aux->used_maps[i];
2904                 if (map == (void *)addr) {
2905                         *type = BPF_PSEUDO_MAP_FD;
2906                         return map;
2907                 }
2908                 if (!map->ops->map_direct_value_meta)
2909                         continue;
2910                 if (!map->ops->map_direct_value_meta(map, addr, off)) {
2911                         *type = BPF_PSEUDO_MAP_VALUE;
2912                         return map;
2913                 }
2914         }
2915
2916         return NULL;
2917 }
2918
2919 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
2920 {
2921         const struct bpf_map *map;
2922         struct bpf_insn *insns;
2923         u32 off, type;
2924         u64 imm;
2925         int i;
2926
2927         insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
2928                         GFP_USER);
2929         if (!insns)
2930                 return insns;
2931
2932         for (i = 0; i < prog->len; i++) {
2933                 if (insns[i].code == (BPF_JMP | BPF_TAIL_CALL)) {
2934                         insns[i].code = BPF_JMP | BPF_CALL;
2935                         insns[i].imm = BPF_FUNC_tail_call;
2936                         /* fall-through */
2937                 }
2938                 if (insns[i].code == (BPF_JMP | BPF_CALL) ||
2939                     insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) {
2940                         if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS))
2941                                 insns[i].code = BPF_JMP | BPF_CALL;
2942                         if (!bpf_dump_raw_ok())
2943                                 insns[i].imm = 0;
2944                         continue;
2945                 }
2946
2947                 if (insns[i].code != (BPF_LD | BPF_IMM | BPF_DW))
2948                         continue;
2949
2950                 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
2951                 map = bpf_map_from_imm(prog, imm, &off, &type);
2952                 if (map) {
2953                         insns[i].src_reg = type;
2954                         insns[i].imm = map->id;
2955                         insns[i + 1].imm = off;
2956                         continue;
2957                 }
2958         }
2959
2960         return insns;
2961 }
2962
2963 static int set_info_rec_size(struct bpf_prog_info *info)
2964 {
2965         /*
2966          * Ensure info.*_rec_size is the same as kernel expected size
2967          *
2968          * or
2969          *
2970          * Only allow zero *_rec_size if both _rec_size and _cnt are
2971          * zero.  In this case, the kernel will set the expected
2972          * _rec_size back to the info.
2973          */
2974
2975         if ((info->nr_func_info || info->func_info_rec_size) &&
2976             info->func_info_rec_size != sizeof(struct bpf_func_info))
2977                 return -EINVAL;
2978
2979         if ((info->nr_line_info || info->line_info_rec_size) &&
2980             info->line_info_rec_size != sizeof(struct bpf_line_info))
2981                 return -EINVAL;
2982
2983         if ((info->nr_jited_line_info || info->jited_line_info_rec_size) &&
2984             info->jited_line_info_rec_size != sizeof(__u64))
2985                 return -EINVAL;
2986
2987         info->func_info_rec_size = sizeof(struct bpf_func_info);
2988         info->line_info_rec_size = sizeof(struct bpf_line_info);
2989         info->jited_line_info_rec_size = sizeof(__u64);
2990
2991         return 0;
2992 }
2993
2994 static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
2995                                    const union bpf_attr *attr,
2996                                    union bpf_attr __user *uattr)
2997 {
2998         struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
2999         struct bpf_prog_info info;
3000         u32 info_len = attr->info.info_len;
3001         struct bpf_prog_stats stats;
3002         char __user *uinsns;
3003         u32 ulen;
3004         int err;
3005
3006         err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
3007         if (err)
3008                 return err;
3009         info_len = min_t(u32, sizeof(info), info_len);
3010
3011         memset(&info, 0, sizeof(info));
3012         if (copy_from_user(&info, uinfo, info_len))
3013                 return -EFAULT;
3014
3015         info.type = prog->type;
3016         info.id = prog->aux->id;
3017         info.load_time = prog->aux->load_time;
3018         info.created_by_uid = from_kuid_munged(current_user_ns(),
3019                                                prog->aux->user->uid);
3020         info.gpl_compatible = prog->gpl_compatible;
3021
3022         memcpy(info.tag, prog->tag, sizeof(prog->tag));
3023         memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
3024
3025         ulen = info.nr_map_ids;
3026         info.nr_map_ids = prog->aux->used_map_cnt;
3027         ulen = min_t(u32, info.nr_map_ids, ulen);
3028         if (ulen) {
3029                 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
3030                 u32 i;
3031
3032                 for (i = 0; i < ulen; i++)
3033                         if (put_user(prog->aux->used_maps[i]->id,
3034                                      &user_map_ids[i]))
3035                                 return -EFAULT;
3036         }
3037
3038         err = set_info_rec_size(&info);
3039         if (err)
3040                 return err;
3041
3042         bpf_prog_get_stats(prog, &stats);
3043         info.run_time_ns = stats.nsecs;
3044         info.run_cnt = stats.cnt;
3045
3046         if (!capable(CAP_SYS_ADMIN)) {
3047                 info.jited_prog_len = 0;
3048                 info.xlated_prog_len = 0;
3049                 info.nr_jited_ksyms = 0;
3050                 info.nr_jited_func_lens = 0;
3051                 info.nr_func_info = 0;
3052                 info.nr_line_info = 0;
3053                 info.nr_jited_line_info = 0;
3054                 goto done;
3055         }
3056
3057         ulen = info.xlated_prog_len;
3058         info.xlated_prog_len = bpf_prog_insn_size(prog);
3059         if (info.xlated_prog_len && ulen) {
3060                 struct bpf_insn *insns_sanitized;
3061                 bool fault;
3062
3063                 if (prog->blinded && !bpf_dump_raw_ok()) {
3064                         info.xlated_prog_insns = 0;
3065                         goto done;
3066                 }
3067                 insns_sanitized = bpf_insn_prepare_dump(prog);
3068                 if (!insns_sanitized)
3069                         return -ENOMEM;
3070                 uinsns = u64_to_user_ptr(info.xlated_prog_insns);
3071                 ulen = min_t(u32, info.xlated_prog_len, ulen);
3072                 fault = copy_to_user(uinsns, insns_sanitized, ulen);
3073                 kfree(insns_sanitized);
3074                 if (fault)
3075                         return -EFAULT;
3076         }
3077
3078         if (bpf_prog_is_dev_bound(prog->aux)) {
3079                 err = bpf_prog_offload_info_fill(&info, prog);
3080                 if (err)
3081                         return err;
3082                 goto done;
3083         }
3084
3085         /* NOTE: the following code is supposed to be skipped for offload.
3086          * bpf_prog_offload_info_fill() is the place to fill similar fields
3087          * for offload.
3088          */
3089         ulen = info.jited_prog_len;
3090         if (prog->aux->func_cnt) {
3091                 u32 i;
3092
3093                 info.jited_prog_len = 0;
3094                 for (i = 0; i < prog->aux->func_cnt; i++)
3095                         info.jited_prog_len += prog->aux->func[i]->jited_len;
3096         } else {
3097                 info.jited_prog_len = prog->jited_len;
3098         }
3099
3100         if (info.jited_prog_len && ulen) {
3101                 if (bpf_dump_raw_ok()) {
3102                         uinsns = u64_to_user_ptr(info.jited_prog_insns);
3103                         ulen = min_t(u32, info.jited_prog_len, ulen);
3104
3105                         /* for multi-function programs, copy the JITed
3106                          * instructions for all the functions
3107                          */
3108                         if (prog->aux->func_cnt) {
3109                                 u32 len, free, i;
3110                                 u8 *img;
3111
3112                                 free = ulen;
3113                                 for (i = 0; i < prog->aux->func_cnt; i++) {
3114                                         len = prog->aux->func[i]->jited_len;
3115                                         len = min_t(u32, len, free);
3116                                         img = (u8 *) prog->aux->func[i]->bpf_func;
3117                                         if (copy_to_user(uinsns, img, len))
3118                                                 return -EFAULT;
3119                                         uinsns += len;
3120                                         free -= len;
3121                                         if (!free)
3122                                                 break;
3123                                 }
3124                         } else {
3125                                 if (copy_to_user(uinsns, prog->bpf_func, ulen))
3126                                         return -EFAULT;
3127                         }
3128                 } else {
3129                         info.jited_prog_insns = 0;
3130                 }
3131         }
3132
3133         ulen = info.nr_jited_ksyms;
3134         info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
3135         if (ulen) {
3136                 if (bpf_dump_raw_ok()) {
3137                         unsigned long ksym_addr;
3138                         u64 __user *user_ksyms;
3139                         u32 i;
3140
3141                         /* copy the address of the kernel symbol
3142                          * corresponding to each function
3143                          */
3144                         ulen = min_t(u32, info.nr_jited_ksyms, ulen);
3145                         user_ksyms = u64_to_user_ptr(info.jited_ksyms);
3146                         if (prog->aux->func_cnt) {
3147                                 for (i = 0; i < ulen; i++) {
3148                                         ksym_addr = (unsigned long)
3149                                                 prog->aux->func[i]->bpf_func;
3150                                         if (put_user((u64) ksym_addr,
3151                                                      &user_ksyms[i]))
3152                                                 return -EFAULT;
3153                                 }
3154                         } else {
3155                                 ksym_addr = (unsigned long) prog->bpf_func;
3156                                 if (put_user((u64) ksym_addr, &user_ksyms[0]))
3157                                         return -EFAULT;
3158                         }
3159                 } else {
3160                         info.jited_ksyms = 0;
3161                 }
3162         }
3163
3164         ulen = info.nr_jited_func_lens;
3165         info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
3166         if (ulen) {
3167                 if (bpf_dump_raw_ok()) {
3168                         u32 __user *user_lens;
3169                         u32 func_len, i;
3170
3171                         /* copy the JITed image lengths for each function */
3172                         ulen = min_t(u32, info.nr_jited_func_lens, ulen);
3173                         user_lens = u64_to_user_ptr(info.jited_func_lens);
3174                         if (prog->aux->func_cnt) {
3175                                 for (i = 0; i < ulen; i++) {
3176                                         func_len =
3177                                                 prog->aux->func[i]->jited_len;
3178                                         if (put_user(func_len, &user_lens[i]))
3179                                                 return -EFAULT;
3180                                 }
3181                         } else {
3182                                 func_len = prog->jited_len;
3183                                 if (put_user(func_len, &user_lens[0]))
3184                                         return -EFAULT;
3185                         }
3186                 } else {
3187                         info.jited_func_lens = 0;
3188                 }
3189         }
3190
3191         if (prog->aux->btf)
3192                 info.btf_id = btf_id(prog->aux->btf);
3193
3194         ulen = info.nr_func_info;
3195         info.nr_func_info = prog->aux->func_info_cnt;
3196         if (info.nr_func_info && ulen) {
3197                 char __user *user_finfo;
3198
3199                 user_finfo = u64_to_user_ptr(info.func_info);
3200                 ulen = min_t(u32, info.nr_func_info, ulen);
3201                 if (copy_to_user(user_finfo, prog->aux->func_info,
3202                                  info.func_info_rec_size * ulen))
3203                         return -EFAULT;
3204         }
3205
3206         ulen = info.nr_line_info;
3207         info.nr_line_info = prog->aux->nr_linfo;
3208         if (info.nr_line_info && ulen) {
3209                 __u8 __user *user_linfo;
3210
3211                 user_linfo = u64_to_user_ptr(info.line_info);
3212                 ulen = min_t(u32, info.nr_line_info, ulen);
3213                 if (copy_to_user(user_linfo, prog->aux->linfo,
3214                                  info.line_info_rec_size * ulen))
3215                         return -EFAULT;
3216         }
3217
3218         ulen = info.nr_jited_line_info;
3219         if (prog->aux->jited_linfo)
3220                 info.nr_jited_line_info = prog->aux->nr_linfo;
3221         else
3222                 info.nr_jited_line_info = 0;
3223         if (info.nr_jited_line_info && ulen) {
3224                 if (bpf_dump_raw_ok()) {
3225                         __u64 __user *user_linfo;
3226                         u32 i;
3227
3228                         user_linfo = u64_to_user_ptr(info.jited_line_info);
3229                         ulen = min_t(u32, info.nr_jited_line_info, ulen);
3230                         for (i = 0; i < ulen; i++) {
3231                                 if (put_user((__u64)(long)prog->aux->jited_linfo[i],
3232                                              &user_linfo[i]))
3233                                         return -EFAULT;
3234                         }
3235                 } else {
3236                         info.jited_line_info = 0;
3237                 }
3238         }
3239
3240         ulen = info.nr_prog_tags;
3241         info.nr_prog_tags = prog->aux->func_cnt ? : 1;
3242         if (ulen) {
3243                 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE];
3244                 u32 i;
3245
3246                 user_prog_tags = u64_to_user_ptr(info.prog_tags);
3247                 ulen = min_t(u32, info.nr_prog_tags, ulen);
3248                 if (prog->aux->func_cnt) {
3249                         for (i = 0; i < ulen; i++) {
3250                                 if (copy_to_user(user_prog_tags[i],
3251                                                  prog->aux->func[i]->tag,
3252                                                  BPF_TAG_SIZE))
3253                                         return -EFAULT;
3254                         }
3255                 } else {
3256                         if (copy_to_user(user_prog_tags[0],
3257                                          prog->tag, BPF_TAG_SIZE))
3258                                 return -EFAULT;
3259                 }
3260         }
3261
3262 done:
3263         if (copy_to_user(uinfo, &info, info_len) ||
3264             put_user(info_len, &uattr->info.info_len))
3265                 return -EFAULT;
3266
3267         return 0;
3268 }
3269
3270 static int bpf_map_get_info_by_fd(struct bpf_map *map,
3271                                   const union bpf_attr *attr,
3272                                   union bpf_attr __user *uattr)
3273 {
3274         struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3275         struct bpf_map_info info;
3276         u32 info_len = attr->info.info_len;
3277         int err;
3278
3279         err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
3280         if (err)
3281                 return err;
3282         info_len = min_t(u32, sizeof(info), info_len);
3283
3284         memset(&info, 0, sizeof(info));
3285         info.type = map->map_type;
3286         info.id = map->id;
3287         info.key_size = map->key_size;
3288         info.value_size = map->value_size;
3289         info.max_entries = map->max_entries;
3290         info.map_flags = map->map_flags;
3291         memcpy(info.name, map->name, sizeof(map->name));
3292
3293         if (map->btf) {
3294                 info.btf_id = btf_id(map->btf);
3295                 info.btf_key_type_id = map->btf_key_type_id;
3296                 info.btf_value_type_id = map->btf_value_type_id;
3297         }
3298         info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
3299
3300         if (bpf_map_is_dev_bound(map)) {
3301                 err = bpf_map_offload_info_fill(&info, map);
3302                 if (err)
3303                         return err;
3304         }
3305
3306         if (copy_to_user(uinfo, &info, info_len) ||
3307             put_user(info_len, &uattr->info.info_len))
3308                 return -EFAULT;
3309
3310         return 0;
3311 }
3312
3313 static int bpf_btf_get_info_by_fd(struct btf *btf,
3314                                   const union bpf_attr *attr,
3315                                   union bpf_attr __user *uattr)
3316 {
3317         struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3318         u32 info_len = attr->info.info_len;
3319         int err;
3320
3321         err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
3322         if (err)
3323                 return err;
3324
3325         return btf_get_info_by_fd(btf, attr, uattr);
3326 }
3327
3328 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
3329
3330 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
3331                                   union bpf_attr __user *uattr)
3332 {
3333         int ufd = attr->info.bpf_fd;
3334         struct fd f;
3335         int err;
3336
3337         if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
3338                 return -EINVAL;
3339
3340         f = fdget(ufd);
3341         if (!f.file)
3342                 return -EBADFD;
3343
3344         if (f.file->f_op == &bpf_prog_fops)
3345                 err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
3346                                               uattr);
3347         else if (f.file->f_op == &bpf_map_fops)
3348                 err = bpf_map_get_info_by_fd(f.file->private_data, attr,
3349                                              uattr);
3350         else if (f.file->f_op == &btf_fops)
3351                 err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr);
3352         else
3353                 err = -EINVAL;
3354
3355         fdput(f);
3356         return err;
3357 }
3358
3359 #define BPF_BTF_LOAD_LAST_FIELD btf_log_level
3360
3361 static int bpf_btf_load(const union bpf_attr *attr)
3362 {
3363         if (CHECK_ATTR(BPF_BTF_LOAD))
3364                 return -EINVAL;
3365
3366         if (!capable(CAP_SYS_ADMIN))
3367                 return -EPERM;
3368
3369         return btf_new_fd(attr);
3370 }
3371
3372 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
3373
3374 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
3375 {
3376         if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
3377                 return -EINVAL;
3378
3379         if (!capable(CAP_SYS_ADMIN))
3380                 return -EPERM;
3381
3382         return btf_get_fd_by_id(attr->btf_id);
3383 }
3384
3385 static int bpf_task_fd_query_copy(const union bpf_attr *attr,
3386                                     union bpf_attr __user *uattr,
3387                                     u32 prog_id, u32 fd_type,
3388                                     const char *buf, u64 probe_offset,
3389                                     u64 probe_addr)
3390 {
3391         char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
3392         u32 len = buf ? strlen(buf) : 0, input_len;
3393         int err = 0;
3394
3395         if (put_user(len, &uattr->task_fd_query.buf_len))
3396                 return -EFAULT;
3397         input_len = attr->task_fd_query.buf_len;
3398         if (input_len && ubuf) {
3399                 if (!len) {
3400                         /* nothing to copy, just make ubuf NULL terminated */
3401                         char zero = '\0';
3402
3403                         if (put_user(zero, ubuf))
3404                                 return -EFAULT;
3405                 } else if (input_len >= len + 1) {
3406                         /* ubuf can hold the string with NULL terminator */
3407                         if (copy_to_user(ubuf, buf, len + 1))
3408                                 return -EFAULT;
3409                 } else {
3410                         /* ubuf cannot hold the string with NULL terminator,
3411                          * do a partial copy with NULL terminator.
3412                          */
3413                         char zero = '\0';
3414
3415                         err = -ENOSPC;
3416                         if (copy_to_user(ubuf, buf, input_len - 1))
3417                                 return -EFAULT;
3418                         if (put_user(zero, ubuf + input_len - 1))
3419                                 return -EFAULT;
3420                 }
3421         }
3422
3423         if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
3424             put_user(fd_type, &uattr->task_fd_query.fd_type) ||
3425             put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
3426             put_user(probe_addr, &uattr->task_fd_query.probe_addr))
3427                 return -EFAULT;
3428
3429         return err;
3430 }
3431
3432 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
3433
3434 static int bpf_task_fd_query(const union bpf_attr *attr,
3435                              union bpf_attr __user *uattr)
3436 {
3437         pid_t pid = attr->task_fd_query.pid;
3438         u32 fd = attr->task_fd_query.fd;
3439         const struct perf_event *event;
3440         struct files_struct *files;
3441         struct task_struct *task;
3442         struct file *file;
3443         int err;
3444
3445         if (CHECK_ATTR(BPF_TASK_FD_QUERY))
3446                 return -EINVAL;
3447
3448         if (!capable(CAP_SYS_ADMIN))
3449                 return -EPERM;
3450
3451         if (attr->task_fd_query.flags != 0)
3452                 return -EINVAL;
3453
3454         task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
3455         if (!task)
3456                 return -ENOENT;
3457
3458         files = get_files_struct(task);
3459         put_task_struct(task);
3460         if (!files)
3461                 return -ENOENT;
3462
3463         err = 0;
3464         spin_lock(&files->file_lock);
3465         file = fcheck_files(files, fd);
3466         if (!file)
3467                 err = -EBADF;
3468         else
3469                 get_file(file);
3470         spin_unlock(&files->file_lock);
3471         put_files_struct(files);
3472
3473         if (err)
3474                 goto out;
3475
3476         if (file->f_op == &bpf_link_fops) {
3477                 struct bpf_link *link = file->private_data;
3478
3479                 if (link->ops == &bpf_raw_tp_lops) {
3480                         struct bpf_raw_tp_link *raw_tp =
3481                                 container_of(link, struct bpf_raw_tp_link, link);
3482                         struct bpf_raw_event_map *btp = raw_tp->btp;
3483
3484                         err = bpf_task_fd_query_copy(attr, uattr,
3485                                                      raw_tp->link.prog->aux->id,
3486                                                      BPF_FD_TYPE_RAW_TRACEPOINT,
3487                                                      btp->tp->name, 0, 0);
3488                         goto put_file;
3489                 }
3490                 goto out_not_supp;
3491         }
3492
3493         event = perf_get_event(file);
3494         if (!IS_ERR(event)) {
3495                 u64 probe_offset, probe_addr;
3496                 u32 prog_id, fd_type;
3497                 const char *buf;
3498
3499                 err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
3500                                               &buf, &probe_offset,
3501                                               &probe_addr);
3502                 if (!err)
3503                         err = bpf_task_fd_query_copy(attr, uattr, prog_id,
3504                                                      fd_type, buf,
3505                                                      probe_offset,
3506                                                      probe_addr);
3507                 goto put_file;
3508         }
3509
3510 out_not_supp:
3511         err = -ENOTSUPP;
3512 put_file:
3513         fput(file);
3514 out:
3515         return err;
3516 }
3517
3518 #define BPF_MAP_BATCH_LAST_FIELD batch.flags
3519
3520 #define BPF_DO_BATCH(fn)                        \
3521         do {                                    \
3522                 if (!fn) {                      \
3523                         err = -ENOTSUPP;        \
3524                         goto err_put;           \
3525                 }                               \
3526                 err = fn(map, attr, uattr);     \
3527         } while (0)
3528
3529 static int bpf_map_do_batch(const union bpf_attr *attr,
3530                             union bpf_attr __user *uattr,
3531                             int cmd)
3532 {
3533         struct bpf_map *map;
3534         int err, ufd;
3535         struct fd f;
3536
3537         if (CHECK_ATTR(BPF_MAP_BATCH))
3538                 return -EINVAL;
3539
3540         ufd = attr->batch.map_fd;
3541         f = fdget(ufd);
3542         map = __bpf_map_get(f);
3543         if (IS_ERR(map))
3544                 return PTR_ERR(map);
3545
3546         if ((cmd == BPF_MAP_LOOKUP_BATCH ||
3547              cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
3548             !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
3549                 err = -EPERM;
3550                 goto err_put;
3551         }
3552
3553         if (cmd != BPF_MAP_LOOKUP_BATCH &&
3554             !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
3555                 err = -EPERM;
3556                 goto err_put;
3557         }
3558
3559         if (cmd == BPF_MAP_LOOKUP_BATCH)
3560                 BPF_DO_BATCH(map->ops->map_lookup_batch);
3561         else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
3562                 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
3563         else if (cmd == BPF_MAP_UPDATE_BATCH)
3564                 BPF_DO_BATCH(map->ops->map_update_batch);
3565         else
3566                 BPF_DO_BATCH(map->ops->map_delete_batch);
3567
3568 err_put:
3569         fdput(f);
3570         return err;
3571 }
3572
3573 #define BPF_LINK_CREATE_LAST_FIELD link_create.flags
3574 static int link_create(union bpf_attr *attr)
3575 {
3576         enum bpf_prog_type ptype;
3577         struct bpf_prog *prog;
3578         int ret;
3579
3580         if (!capable(CAP_NET_ADMIN))
3581                 return -EPERM;
3582
3583         if (CHECK_ATTR(BPF_LINK_CREATE))
3584                 return -EINVAL;
3585
3586         ptype = attach_type_to_prog_type(attr->link_create.attach_type);
3587         if (ptype == BPF_PROG_TYPE_UNSPEC)
3588                 return -EINVAL;
3589
3590         prog = bpf_prog_get_type(attr->link_create.prog_fd, ptype);
3591         if (IS_ERR(prog))
3592                 return PTR_ERR(prog);
3593
3594         ret = bpf_prog_attach_check_attach_type(prog,
3595                                                 attr->link_create.attach_type);
3596         if (ret)
3597                 goto err_out;
3598
3599         switch (ptype) {
3600         case BPF_PROG_TYPE_CGROUP_SKB:
3601         case BPF_PROG_TYPE_CGROUP_SOCK:
3602         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3603         case BPF_PROG_TYPE_SOCK_OPS:
3604         case BPF_PROG_TYPE_CGROUP_DEVICE:
3605         case BPF_PROG_TYPE_CGROUP_SYSCTL:
3606         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3607                 ret = cgroup_bpf_link_attach(attr, prog);
3608                 break;
3609         default:
3610                 ret = -EINVAL;
3611         }
3612
3613 err_out:
3614         if (ret < 0)
3615                 bpf_prog_put(prog);
3616         return ret;
3617 }
3618
3619 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
3620
3621 static int link_update(union bpf_attr *attr)
3622 {
3623         struct bpf_prog *old_prog = NULL, *new_prog;
3624         struct bpf_link *link;
3625         u32 flags;
3626         int ret;
3627
3628         if (!capable(CAP_NET_ADMIN))
3629                 return -EPERM;
3630
3631         if (CHECK_ATTR(BPF_LINK_UPDATE))
3632                 return -EINVAL;
3633
3634         flags = attr->link_update.flags;
3635         if (flags & ~BPF_F_REPLACE)
3636                 return -EINVAL;
3637
3638         link = bpf_link_get_from_fd(attr->link_update.link_fd);
3639         if (IS_ERR(link))
3640                 return PTR_ERR(link);
3641
3642         new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
3643         if (IS_ERR(new_prog)) {
3644                 ret = PTR_ERR(new_prog);
3645                 goto out_put_link;
3646         }
3647
3648         if (flags & BPF_F_REPLACE) {
3649                 old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
3650                 if (IS_ERR(old_prog)) {
3651                         ret = PTR_ERR(old_prog);
3652                         old_prog = NULL;
3653                         goto out_put_progs;
3654                 }
3655         } else if (attr->link_update.old_prog_fd) {
3656                 ret = -EINVAL;
3657                 goto out_put_progs;
3658         }
3659
3660 #ifdef CONFIG_CGROUP_BPF
3661         if (link->ops == &bpf_cgroup_link_lops) {
3662                 ret = cgroup_bpf_replace(link, old_prog, new_prog);
3663                 goto out_put_progs;
3664         }
3665 #endif
3666         ret = -EINVAL;
3667
3668 out_put_progs:
3669         if (old_prog)
3670                 bpf_prog_put(old_prog);
3671         if (ret)
3672                 bpf_prog_put(new_prog);
3673 out_put_link:
3674         bpf_link_put(link);
3675         return ret;
3676 }
3677
3678 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
3679 {
3680         union bpf_attr attr;
3681         int err;
3682
3683         if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
3684                 return -EPERM;
3685
3686         err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
3687         if (err)
3688                 return err;
3689         size = min_t(u32, size, sizeof(attr));
3690
3691         /* copy attributes from user space, may be less than sizeof(bpf_attr) */
3692         memset(&attr, 0, sizeof(attr));
3693         if (copy_from_user(&attr, uattr, size) != 0)
3694                 return -EFAULT;
3695
3696         err = security_bpf(cmd, &attr, size);
3697         if (err < 0)
3698                 return err;
3699
3700         switch (cmd) {
3701         case BPF_MAP_CREATE:
3702                 err = map_create(&attr);
3703                 break;
3704         case BPF_MAP_LOOKUP_ELEM:
3705                 err = map_lookup_elem(&attr);
3706                 break;
3707         case BPF_MAP_UPDATE_ELEM:
3708                 err = map_update_elem(&attr);
3709                 break;
3710         case BPF_MAP_DELETE_ELEM:
3711                 err = map_delete_elem(&attr);
3712                 break;
3713         case BPF_MAP_GET_NEXT_KEY:
3714                 err = map_get_next_key(&attr);
3715                 break;
3716         case BPF_MAP_FREEZE:
3717                 err = map_freeze(&attr);
3718                 break;
3719         case BPF_PROG_LOAD:
3720                 err = bpf_prog_load(&attr, uattr);
3721                 break;
3722         case BPF_OBJ_PIN:
3723                 err = bpf_obj_pin(&attr);
3724                 break;
3725         case BPF_OBJ_GET:
3726                 err = bpf_obj_get(&attr);
3727                 break;
3728         case BPF_PROG_ATTACH:
3729                 err = bpf_prog_attach(&attr);
3730                 break;
3731         case BPF_PROG_DETACH:
3732                 err = bpf_prog_detach(&attr);
3733                 break;
3734         case BPF_PROG_QUERY:
3735                 err = bpf_prog_query(&attr, uattr);
3736                 break;
3737         case BPF_PROG_TEST_RUN:
3738                 err = bpf_prog_test_run(&attr, uattr);
3739                 break;
3740         case BPF_PROG_GET_NEXT_ID:
3741                 err = bpf_obj_get_next_id(&attr, uattr,
3742                                           &prog_idr, &prog_idr_lock);
3743                 break;
3744         case BPF_MAP_GET_NEXT_ID:
3745                 err = bpf_obj_get_next_id(&attr, uattr,
3746                                           &map_idr, &map_idr_lock);
3747                 break;
3748         case BPF_BTF_GET_NEXT_ID:
3749                 err = bpf_obj_get_next_id(&attr, uattr,
3750                                           &btf_idr, &btf_idr_lock);
3751                 break;
3752         case BPF_PROG_GET_FD_BY_ID:
3753                 err = bpf_prog_get_fd_by_id(&attr);
3754                 break;
3755         case BPF_MAP_GET_FD_BY_ID:
3756                 err = bpf_map_get_fd_by_id(&attr);
3757                 break;
3758         case BPF_OBJ_GET_INFO_BY_FD:
3759                 err = bpf_obj_get_info_by_fd(&attr, uattr);
3760                 break;
3761         case BPF_RAW_TRACEPOINT_OPEN:
3762                 err = bpf_raw_tracepoint_open(&attr);
3763                 break;
3764         case BPF_BTF_LOAD:
3765                 err = bpf_btf_load(&attr);
3766                 break;
3767         case BPF_BTF_GET_FD_BY_ID:
3768                 err = bpf_btf_get_fd_by_id(&attr);
3769                 break;
3770         case BPF_TASK_FD_QUERY:
3771                 err = bpf_task_fd_query(&attr, uattr);
3772                 break;
3773         case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
3774                 err = map_lookup_and_delete_elem(&attr);
3775                 break;
3776         case BPF_MAP_LOOKUP_BATCH:
3777                 err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH);
3778                 break;
3779         case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
3780                 err = bpf_map_do_batch(&attr, uattr,
3781                                        BPF_MAP_LOOKUP_AND_DELETE_BATCH);
3782                 break;
3783         case BPF_MAP_UPDATE_BATCH:
3784                 err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH);
3785                 break;
3786         case BPF_MAP_DELETE_BATCH:
3787                 err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH);
3788                 break;
3789         case BPF_LINK_CREATE:
3790                 err = link_create(&attr);
3791                 break;
3792         case BPF_LINK_UPDATE:
3793                 err = link_update(&attr);
3794                 break;
3795         default:
3796                 err = -EINVAL;
3797                 break;
3798         }
3799
3800         return err;
3801 }